diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..bccdb92 Binary files /dev/null and b/.DS_Store differ diff --git a/Cargo.lock b/Cargo.lock index 56cda25..387f93a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -99,6 +99,16 @@ dependencies = [ "clap_derive", ] +[[package]] +name = "clap-verbosity-flag" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1eef05769009513df2eb1c3b4613e7fad873a14c600ff025b08f250f59fee7de" +dependencies = [ + "clap", + "log", +] + [[package]] name = "clap_builder" version = "4.4.2" @@ -242,6 +252,23 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "heck" version = "0.4.1" @@ -254,12 +281,31 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "indoc" version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.11.0" @@ -281,6 +327,12 @@ version = "0.2.148" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + [[package]] name = "matrixmultiply" version = "0.3.7" @@ -335,6 +387,30 @@ dependencies = [ "rayon", ] +[[package]] +name = "ndarray-stats" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af5a8477ac96877b5bd1fd67e0c28736c12943aba24eda92b127e036b0c8f400" +dependencies = [ + "indexmap", + "itertools 0.10.5", + "ndarray", + "noisy_float", + "num-integer", + "num-traits", + "rand", +] + +[[package]] +name = "noisy_float" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978fe6e6ebc0bf53de533cd456ca2d9de13de13856eda1518a285d7705a213af" +dependencies = [ + "num-traits", +] + [[package]] name = "nom" version = "7.1.3" @@ -478,6 +554,12 @@ dependencies = [ "sha2", ] +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "proc-macro2" version = "1.0.67" @@ -509,6 +591,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -538,15 +650,17 @@ dependencies = [ ] [[package]] -name = "rsParser-r" +name = "rsParser" version = "0.1.0" dependencies = [ "anyhow", "clap", + "clap-verbosity-flag", "flate2", "indoc", - "itertools", + "itertools 0.11.0", "ndarray", + "ndarray-stats", "nom", "nom-derive", "npyz", @@ -695,6 +809,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 356faa6..bf6ec47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "rsParser-r" +name = "rsParser" version = "0.1.0" edition = "2021" @@ -20,4 +20,9 @@ num-traits = "0.2.16" thiserror = "1.0.48" anyhow = "1.0.75" npyz = "0.8.1" -itertools = "0.11.0" \ No newline at end of file +itertools = "0.11.0" +clap-verbosity-flag = "2.0.1" +ndarray-stats = "0.5.1" + +[profile.release] +debug = true \ No newline at end of file diff --git a/a.py b/a.py deleted file mode 100644 index 28f105a..0000000 --- a/a.py +++ /dev/null @@ -1,5 +0,0 @@ -import numpy as np - -a = np.load("/Users/ruomu/Desktop/a.npy").astype(np.int8) - -pass \ No newline at end of file diff --git a/package_linux.sh b/package_linux.sh new file mode 100644 index 0000000..2513bde --- /dev/null +++ b/package_linux.sh @@ -0,0 +1,7 @@ +if [[ $(echo $0 | awk '/^\//') == $0 ]]; then + ABSPATH=$(dirname $0) +else + ABSPATH=$PWD/$(dirname $0) +fi +cd ${ABSPATH} +cargo build --release --target x86_64-unknown-linux-musl \ No newline at end of file diff --git a/src/app.rs b/src/app.rs index d657f8a..5cf4428 100644 --- a/src/app.rs +++ b/src/app.rs @@ -27,7 +27,13 @@ impl App { #[command(name = "rsParser")] #[command(author = "K.Tsuki . ")] #[command(version = "1.0")] -#[command(about = "A fictional versioning CLI", long_about = None)] +#[command(about = "A fictional versioning CLI", long_about = " +The rsParser is a powerful command-line utility tailored +for internal product data analysis needs. With its versatile +features, this tool empowers users to efficiently process and +analyze product data in various formats, facilitating data-driven +decision-making and enhancing operational efficiency. +")] pub struct Cli { #[command(subcommand)] pub command: Commands, @@ -51,6 +57,8 @@ pub enum Commands { path: Vec, #[arg(short = 'o', long = "output", required = false)] output_path: Option, + #[arg(short = 'v', long = "verbose", required = false)] + verbose: bool, }, } diff --git a/src/cmds/parse.rs b/src/cmds/parse.rs index fba5ee9..7eda199 100644 --- a/src/cmds/parse.rs +++ b/src/cmds/parse.rs @@ -21,13 +21,14 @@ pub enum OutputType { } pub struct ParseCmdConfig { + pub verbose: bool, pub paths: Vec, pub output_path: Option, output_type: OutputType, } impl ParseCmdConfig { - pub fn new(paths: Vec, output_path: Option) -> Self { + pub fn new(paths: Vec, output_path: Option, v: bool) -> Self { let t = if let Some(o) = &output_path { match o.extension().unwrap().to_str().unwrap() { "npy" => OutputType::Npy, @@ -42,6 +43,7 @@ impl ParseCmdConfig { paths: paths, output_path: output_path, output_type: t, + verbose: v, } } } @@ -52,8 +54,8 @@ impl Cmd for ParseCmd { type Config = ParseCmdConfig; fn parse(&self, app: &App, config: Self::Config) -> AResult { - let only_header = config.output_type == OutputType::None; - println!("only_header:{}", only_header); + let only_header = !config.verbose && config.output_type == OutputType::None; + // println!("only_header:{}", only_header); let paths = &config.paths; let _data = paths .par_iter() @@ -84,7 +86,6 @@ impl Cmd for ParseCmd { .unwrap(); writer.extend(v.iter()).unwrap(); writer.finish().unwrap(); - }); } _ => {} diff --git a/src/main.rs b/src/main.rs index 9069f49..cd7b535 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod error; mod parse; mod printer; mod utils; + use crate::app::App; use app::{Cli, Commands}; use clap::Parser; @@ -16,10 +17,9 @@ fn main() { let app = App::new(config); match args.command { - Commands::Parse { path, output_path } => { - let parse_conf = ParseCmdConfig::new(path, output_path); - let p = ParseCmd; - p.parse(&app, parse_conf).unwrap().unwrap(); + Commands::Parse { path, output_path, verbose } => { + let parse_conf = ParseCmdConfig::new(path, output_path, verbose); + ParseCmd.parse(&app, parse_conf).unwrap().unwrap(); } } } diff --git a/src/parse.rs b/src/parse.rs index 10ce14f..90da584 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,12 +1,16 @@ use nom::bytes::complete::*; -use nom::error::{ParseError, ErrorKind}; +use nom::error::{ErrorKind, ParseError}; use nom::multi::count; -use nom::number::complete::{be_f32, be_i32, be_i8, be_u32, be_u8, le_f32, le_i32, le_i8, le_u8, le_u32}; +use nom::number::complete::{ + be_f32, be_i32, be_i8, be_u32, be_u8, le_f32, le_i32, le_i8, le_u32, le_u8, +}; use nom::sequence::tuple; use nom::IResult; use serde::{Deserialize, Serialize}; use std::any::Any; use std::str::from_utf8; +use ndarray::{ArrayD, ShapeBuilder}; +use ndarray_stats::QuantileExt; #[derive(Deserialize, Serialize, Debug)] pub struct HeaderJson { @@ -31,6 +35,12 @@ pub struct BlockInfo { pub dimension_values: Vec>, } +#[derive(Debug)] +pub struct StatInfo { + pub max_value: f64, + pub min_value: f64, +} + #[derive(Copy, Clone, Debug)] enum Endian { Big, @@ -46,6 +56,7 @@ pub struct CommonHeader { pub struct Block { pub block_info: BlockInfo, + pub stat_info: Option, pub data: Option>, } @@ -88,11 +99,10 @@ fn common_parse(input: &[u8]) -> IResult<&[u8], CommonHeader> { fn block_parse<'a, Error: ParseError<&'a [u8]>>( only_header: bool, endian: Endian, -) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Block, Error> -{ +) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Block, Error> { let l_p = match endian { - Endian::Big => be_i32, - Endian::Little => le_i32, + Endian::Big => be_u32, + Endian::Little => le_u32, }; move |input| { @@ -109,47 +119,133 @@ fn block_parse<'a, Error: ParseError<&'a [u8]>>( let (next, _data) = take(l2 as usize)(next)?; let mut d: Option> = None; + let mut stat = None; if !only_header { let c = j.dimension_size.iter().fold(1, |i, j| i * (*j)) as usize; + let mut converted_v: Option>; d = match j.value_type.as_str() { "b" => { - let (_, v) = count(match endian { - Endian::Big => be_i8, - _ => le_i8 - }, c)(_data)?; + let (_, v) = count( + match endian { + Endian::Big => be_i8, + _ => le_i8, + }, + c, + )(_data)?; + let v = ArrayD::from_shape_vec((&j.dimension_size.clone()).clone().into_shape(), v).unwrap(); + let f64_v = v.mapv(|i| { + if i == j.fill_value as i8 { + f64::NAN + } else { + i as f64 + } + }); + + stat = Some(StatInfo { + min_value: f64_v.min_skipnan().clone(), + max_value: f64_v.max_skipnan().clone(), + }); + Some(Box::new(v)) } "B" => { - let (_, v) = count(match endian { - Endian::Big => be_u8, - _ => le_u8 - }, c)(_data)?; + let (_, v) = count( + match endian { + Endian::Big => be_u8, + _ => le_u8, + }, + c, + )(_data)?; + let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap(); + let f64_v = v.mapv(|i| { + if i == j.fill_value as u8 { + f64::NAN + } else { + i as f64 + } + }); + + stat = Some(StatInfo { + min_value: f64_v.min_skipnan().clone(), + max_value: f64_v.max_skipnan().clone(), + }); Some(Box::new(v)) } "i" => { - let (_, v) = count(match endian { - Endian::Big => be_i32, - _ => le_i32 - }, c)(_data)?; + let (_, v) = count( + match endian { + Endian::Big => be_i32, + _ => le_i32, + }, + c, + )(_data)?; + let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap(); + let f64_v = v.mapv(|i| { + if i == j.fill_value as i32 { + f64::NAN + } else { + i as f64 + } + }); + + stat = Some(StatInfo { + min_value: f64_v.min_skipnan().clone(), + max_value: f64_v.max_skipnan().clone(), + }); Some(Box::new(v)) } "u" => { - let (_, v) = count(match endian { - Endian::Big => be_u32, - _ => le_u32 - }, c)(_data)?; + let (_, v) = count( + match endian { + Endian::Big => be_u32, + _ => le_u32, + }, + c, + )(_data)?; + let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap(); + let f64_v = v.mapv(|i| { + if i == j.fill_value as u32 { + f64::NAN + } else { + i as f64 + } + }); + + stat = Some(StatInfo { + min_value: f64_v.min_skipnan().clone(), + max_value: f64_v.max_skipnan().clone(), + }); Some(Box::new(v)) } "f" => { - let (_, v) = count(match endian { - Endian::Big => be_f32, - _ => le_f32 - }, c)(_data)?; + let (_, v) = count( + match endian { + Endian::Big => be_f32, + _ => le_f32, + }, + c, + )(_data)?; + let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap(); + let f64_v = v.mapv(|i| { + if i == j.fill_value as f32 { + f64::NAN + } else { + i as f64 + } + }); + + stat = Some(StatInfo { + min_value: f64_v.min_skipnan().clone(), + max_value: f64_v.max_skipnan().clone(), + }); Some(Box::new(v)) } _ => { - return Err(nom::Err::Failure(Error::from_error_kind(next, ErrorKind::Alpha))); + return Err(nom::Err::Failure(Error::from_error_kind( + next, + ErrorKind::Alpha, + ))); } }; } @@ -159,6 +255,7 @@ fn block_parse<'a, Error: ParseError<&'a [u8]>>( Block { block_info: j, data: d, + stat_info: stat, }, )) } diff --git a/src/printer.rs b/src/printer.rs index 66330a1..1b30ccc 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -1,11 +1,17 @@ use crate::parse::Parsed; use indoc::formatdoc; +use num_traits::Num; use std::fmt::{Display, Formatter}; use std::string::String; -use num_traits::Num; -fn print_vec_in_e(v:&Vec) -> String { - return v.iter().map(|v| {return format!("{:.2}",*v);}).collect::>().join(","); +fn print_vec_in_e(v: &Vec) -> String { + return v + .iter() + .map(|v| { + return format!("{:.2}", *v); + }) + .collect::>() + .join(","); } fn typechar_to_name(input_type: &str) -> &'static str { @@ -15,7 +21,7 @@ fn typechar_to_name(input_type: &str) -> &'static str { "f" => "f (float 32)", "i" => "i (int 32)", "u" => "u (uint 32)", - _ => "" + _ => "", } } @@ -26,9 +32,8 @@ impl Display for Parsed { .iter() .enumerate() .map(|(index, i)| { - - - return formatdoc! {" + if let Some(_data) = i.data.as_ref() {} + let mut result_format = formatdoc! {" \0 Block {}: value_name: {} value_des: {} @@ -57,6 +62,21 @@ impl Display for Parsed { print_vec_in_e(&i.block_info.dimension_res), i.block_info.fill_value, }; + + if let Some(stat) = i.stat_info.as_ref() { + let stat_info_format = formatdoc! {" + \0 + Stat Info: + min value: {} + max value: {} + ", + stat.min_value, + stat.max_value + }; + result_format.push_str(stat_info_format.as_str()); + } + + return result_format; }) .collect(); let message = fv.join("\n"); diff --git a/src/utils.rs b/src/utils.rs index da910ef..fb4bb0e 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,6 @@ use rayon::prelude::*; + +use crate::parse::Block; pub fn is_gz(data: &[u8; 2]) -> bool { *data == [0x1f, 0x8b] } \ No newline at end of file