Compare commits

...

3 Commits

Author SHA1 Message Date
c76af97f58 close release debug, add new stat infomation 2023-10-13 13:19:47 +08:00
7463a2c743 close release debug, add new stat infomation 2023-10-13 13:19:20 +08:00
b2febe579e add stat info parse 2023-10-08 15:55:04 +08:00
11 changed files with 344 additions and 52 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

124
Cargo.lock generated
View File

@ -99,6 +99,16 @@ dependencies = [
"clap_derive",
]
[[package]]
name = "clap-verbosity-flag"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1eef05769009513df2eb1c3b4613e7fad873a14c600ff025b08f250f59fee7de"
dependencies = [
"clap",
"log",
]
[[package]]
name = "clap_builder"
version = "4.4.2"
@ -242,6 +252,23 @@ dependencies = [
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "heck"
version = "0.4.1"
@ -254,12 +281,31 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "indoc"
version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4"
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
@ -281,6 +327,12 @@ version = "0.2.148"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "matrixmultiply"
version = "0.3.7"
@ -335,6 +387,30 @@ dependencies = [
"rayon",
]
[[package]]
name = "ndarray-stats"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af5a8477ac96877b5bd1fd67e0c28736c12943aba24eda92b127e036b0c8f400"
dependencies = [
"indexmap",
"itertools 0.10.5",
"ndarray",
"noisy_float",
"num-integer",
"num-traits",
"rand",
]
[[package]]
name = "noisy_float"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978fe6e6ebc0bf53de533cd456ca2d9de13de13856eda1518a285d7705a213af"
dependencies = [
"num-traits",
]
[[package]]
name = "nom"
version = "7.1.3"
@ -478,6 +554,12 @@ dependencies = [
"sha2",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "proc-macro2"
version = "1.0.67"
@ -509,6 +591,36 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rawpointer"
version = "0.2.1"
@ -538,15 +650,17 @@ dependencies = [
]
[[package]]
name = "rsParser-r"
name = "rsParser"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"clap-verbosity-flag",
"flate2",
"indoc",
"itertools",
"itertools 0.11.0",
"ndarray",
"ndarray-stats",
"nom",
"nom-derive",
"npyz",
@ -695,6 +809,12 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.48.0"

View File

@ -1,5 +1,5 @@
[package]
name = "rsParser-r"
name = "rsParser"
version = "0.1.0"
edition = "2021"
@ -21,3 +21,8 @@ thiserror = "1.0.48"
anyhow = "1.0.75"
npyz = "0.8.1"
itertools = "0.11.0"
clap-verbosity-flag = "2.0.1"
ndarray-stats = "0.5.1"
[profile.release]
debug = false

5
a.py
View File

@ -1,5 +0,0 @@
import numpy as np
a = np.load("/Users/ruomu/Desktop/a.npy").astype(np.int8)
pass

7
package_linux.sh Normal file
View File

@ -0,0 +1,7 @@
if [[ $(echo $0 | awk '/^\//') == $0 ]]; then
ABSPATH=$(dirname $0)
else
ABSPATH=$PWD/$(dirname $0)
fi
cd ${ABSPATH}
cargo build --release --target x86_64-unknown-linux-musl

View File

@ -27,7 +27,13 @@ impl App {
#[command(name = "rsParser")]
#[command(author = "K.Tsuki . <tsuki@keitsuki.top>")]
#[command(version = "1.0")]
#[command(about = "A fictional versioning CLI", long_about = None)]
#[command(about = "A fictional versioning CLI", long_about = "
The rsParser is a powerful command-line utility tailored
for internal product data analysis needs. With its versatile
features, this tool empowers users to efficiently process and
analyze product data in various formats, facilitating data-driven
decision-making and enhancing operational efficiency.
")]
pub struct Cli {
#[command(subcommand)]
pub command: Commands,
@ -51,6 +57,8 @@ pub enum Commands {
path: Vec<PathBuf>,
#[arg(short = 'o', long = "output", required = false)]
output_path: Option<PathBuf>,
#[arg(short = 'v', long = "verbose", required = false)]
verbose: bool,
},
}

View File

@ -21,13 +21,14 @@ pub enum OutputType {
}
pub struct ParseCmdConfig {
pub verbose: bool,
pub paths: Vec<PathBuf>,
pub output_path: Option<PathBuf>,
output_type: OutputType,
}
impl ParseCmdConfig {
pub fn new(paths: Vec<PathBuf>, output_path: Option<PathBuf>) -> Self {
pub fn new(paths: Vec<PathBuf>, output_path: Option<PathBuf>, v: bool) -> Self {
let t = if let Some(o) = &output_path {
match o.extension().unwrap().to_str().unwrap() {
"npy" => OutputType::Npy,
@ -42,6 +43,7 @@ impl ParseCmdConfig {
paths: paths,
output_path: output_path,
output_type: t,
verbose: v,
}
}
}
@ -52,8 +54,8 @@ impl Cmd for ParseCmd {
type Config = ParseCmdConfig;
fn parse(&self, app: &App, config: Self::Config) -> AResult<Self::Output, Self::Exception> {
let only_header = config.output_type == OutputType::None;
println!("only_header:{}", only_header);
let only_header = !config.verbose && config.output_type == OutputType::None;
// println!("only_header:{}", only_header);
let paths = &config.paths;
let _data = paths
.par_iter()
@ -84,7 +86,6 @@ impl Cmd for ParseCmd {
.unwrap();
writer.extend(v.iter()).unwrap();
writer.finish().unwrap();
});
}
_ => {}

View File

@ -4,11 +4,13 @@ mod error;
mod parse;
mod printer;
mod utils;
use crate::app::App;
use app::{Cli, Commands};
use clap::Parser;
use cmds::parse::{ParseCmd, ParseCmdConfig};
use cmds::Cmd;
use std::time::Instant;
fn main() {
let args = Cli::parse();
@ -16,10 +18,11 @@ fn main() {
let app = App::new(config);
match args.command {
Commands::Parse { path, output_path } => {
let parse_conf = ParseCmdConfig::new(path, output_path);
let p = ParseCmd;
p.parse(&app, parse_conf).unwrap().unwrap();
Commands::Parse { path, output_path, verbose } => {
let parse_conf = ParseCmdConfig::new(path, output_path, verbose);
let start = Instant::now();
ParseCmd.parse(&app, parse_conf).unwrap().unwrap();
println!("elapse time: {}", start.elapsed().as_secs());
}
}
}

View File

@ -1,12 +1,16 @@
use nom::bytes::complete::*;
use nom::error::{ParseError, ErrorKind};
use nom::error::{ErrorKind, ParseError};
use nom::multi::count;
use nom::number::complete::{be_f32, be_i32, be_i8, be_u32, be_u8, le_f32, le_i32, le_i8, le_u8, le_u32};
use nom::number::complete::{
be_f32, be_i32, be_i8, be_u32, be_u8, le_f32, le_i32, le_i8, le_u32, le_u8,
};
use nom::sequence::tuple;
use nom::IResult;
use serde::{Deserialize, Serialize};
use std::any::Any;
use std::str::from_utf8;
use ndarray::{ArrayD, ShapeBuilder};
use ndarray::parallel::prelude::*;
#[derive(Deserialize, Serialize, Debug)]
pub struct HeaderJson {
@ -31,6 +35,12 @@ pub struct BlockInfo {
pub dimension_values: Vec<Vec<f64>>,
}
#[derive(Debug)]
pub struct StatInfo {
pub max_value: f64,
pub min_value: f64,
}
#[derive(Copy, Clone, Debug)]
enum Endian {
Big,
@ -46,6 +56,7 @@ pub struct CommonHeader {
pub struct Block {
pub block_info: BlockInfo,
pub stat_info: Option<StatInfo>,
pub data: Option<Box<dyn Any + Sync + Send>>,
}
@ -88,11 +99,10 @@ fn common_parse(input: &[u8]) -> IResult<&[u8], CommonHeader> {
fn block_parse<'a, Error: ParseError<&'a [u8]>>(
only_header: bool,
endian: Endian,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Block, Error>
{
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Block, Error> {
let l_p = match endian {
Endian::Big => be_i32,
Endian::Little => le_i32,
Endian::Big => be_u32,
Endian::Little => le_u32,
};
move |input| {
@ -109,47 +119,167 @@ fn block_parse<'a, Error: ParseError<&'a [u8]>>(
let (next, _data) = take(l2 as usize)(next)?;
let mut d: Option<Box<dyn Any + Send + Sync>> = None;
let mut stat = None;
if !only_header {
let c = j.dimension_size.iter().fold(1, |i, j| i * (*j)) as usize;
// let mut converted_v: Option<ArrayD<f64>>;
d = match j.value_type.as_str() {
"b" => {
let (_, v) = count(match endian {
Endian::Big => be_i8,
_ => le_i8
}, c)(_data)?;
let (_, v) = count(
match endian {
Endian::Big => be_i8,
_ => le_i8,
},
c,
)(_data)?;
let v = ArrayD::from_shape_vec((&j.dimension_size.clone()).clone().into_shape(), v).unwrap();
let (min_value, max_value) = v.iter().fold((i8::MAX, i8::MIN), |(min, max), i| {
if *i == j.fill_value as i8 {
return (min, max);
}
let mut g = (min, max);
if *i < min {
g.0 = *i;
}
if *i > max {
g.1 = *i;
}
return g;
});
stat = Some(StatInfo {
min_value: min_value as f64,
max_value: max_value as f64,
});
Some(Box::new(v))
}
"B" => {
let (_, v) = count(match endian {
Endian::Big => be_u8,
_ => le_u8
}, c)(_data)?;
let (_, v) = count(
match endian {
Endian::Big => be_u8,
_ => le_u8,
},
c,
)(_data)?;
let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap();
let (min_value, max_value) = v.iter().fold((u8::MAX, u8::MIN), |(min, max), i| {
if *i == j.fill_value as u8 {
return (min, max);
}
let mut g = (min, max);
if *i < min {
g.0 = *i;
}
if *i > max {
g.1 = *i;
}
return g;
});
stat = Some(StatInfo {
min_value: min_value as f64,
max_value: max_value as f64,
});
Some(Box::new(v))
}
"i" => {
let (_, v) = count(match endian {
Endian::Big => be_i32,
_ => le_i32
}, c)(_data)?;
let (_, v) = count(
match endian {
Endian::Big => be_i32,
_ => le_i32,
},
c,
)(_data)?;
let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap();
let (min_value, max_value) = v.iter().fold((i32::MAX, i32::MIN), |(min, max), i| {
if *i == j.fill_value as i32 {
return (min, max);
}
let mut g = (min, max);
if *i < min {
g.0 = *i;
}
if *i > max {
g.1 = *i;
}
return g;
});
stat = Some(StatInfo {
min_value: min_value as f64,
max_value: max_value as f64,
});
Some(Box::new(v))
}
"u" => {
let (_, v) = count(match endian {
Endian::Big => be_u32,
_ => le_u32
}, c)(_data)?;
let (_, v) = count(
match endian {
Endian::Big => be_u32,
_ => le_u32,
},
c,
)(_data)?;
let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap();
let (min_value, max_value) = v.iter().fold((u32::MAX, u32::MIN), |(min, max), i| {
if *i == j.fill_value as u32 {
return (min, max);
}
let mut g = (min, max);
if *i < min {
g.0 = *i;
}
if *i > max {
g.1 = *i;
}
return g;
});
stat = Some(StatInfo {
min_value: min_value as f64,
max_value: max_value as f64,
});
Some(Box::new(v))
}
"f" => {
let (_, v) = count(match endian {
Endian::Big => be_f32,
_ => le_f32
}, c)(_data)?;
let (_, v) = count(
match endian {
Endian::Big => be_f32,
_ => le_f32,
},
c,
)(_data)?;
let v = ArrayD::from_shape_vec((&j.dimension_size).clone().into_shape(), v).unwrap();
let (min_value, max_value) = v.iter().fold((f32::MAX, f32::MIN), |(min, max), i| {
if *i == j.fill_value as f32 {
return (min, max);
}
let mut g = (min, max);
if *i < min {
g.0 = *i;
}
if *i > max {
g.1 = *i;
}
return g;
});
stat = Some(StatInfo {
min_value: min_value as f64,
max_value: max_value as f64,
});
Some(Box::new(v))
}
_ => {
return Err(nom::Err::Failure(Error::from_error_kind(next, ErrorKind::Alpha)));
return Err(nom::Err::Failure(Error::from_error_kind(
next,
ErrorKind::Alpha,
)));
}
};
}
@ -159,6 +289,7 @@ fn block_parse<'a, Error: ParseError<&'a [u8]>>(
Block {
block_info: j,
data: d,
stat_info: stat,
},
))
}

View File

@ -1,11 +1,17 @@
use crate::parse::Parsed;
use indoc::formatdoc;
use num_traits::Num;
use std::fmt::{Display, Formatter};
use std::string::String;
use num_traits::Num;
fn print_vec_in_e<T:Num + Display>(v:&Vec<T>) -> String {
return v.iter().map(|v| {return format!("{:.2}",*v);}).collect::<Vec<String>>().join(",");
fn print_vec_in_e<T: Num + Display>(v: &Vec<T>) -> String {
return v
.iter()
.map(|v| {
return format!("{:.2}", *v);
})
.collect::<Vec<String>>()
.join(",");
}
fn typechar_to_name(input_type: &str) -> &'static str {
@ -15,7 +21,7 @@ fn typechar_to_name(input_type: &str) -> &'static str {
"f" => "f (float 32)",
"i" => "i (int 32)",
"u" => "u (uint 32)",
_ => ""
_ => "",
}
}
@ -26,9 +32,8 @@ impl Display for Parsed {
.iter()
.enumerate()
.map(|(index, i)| {
return formatdoc! {"
if let Some(_data) = i.data.as_ref() {}
let mut result_format = formatdoc! {"
\0 Block {}:
value_name: {}
value_des: {}
@ -57,6 +62,21 @@ impl Display for Parsed {
print_vec_in_e(&i.block_info.dimension_res),
i.block_info.fill_value,
};
if let Some(stat) = i.stat_info.as_ref() {
let stat_info_format = formatdoc! {"
\0
Stat Info:
min value: {}
max value: {}
",
stat.min_value,
stat.max_value
};
result_format.push_str(stat_info_format.as_str());
}
return result_format;
})
.collect();
let message = fv.join("\n");

View File

@ -1,4 +1,6 @@
use rayon::prelude::*;
use crate::parse::Block;
pub fn is_gz(data: &[u8; 2]) -> bool {
*data == [0x1f, 0x8b]
}