diff --git a/Cargo.lock b/Cargo.lock index c317f4c..56cda25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,6 +68,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + [[package]] name = "cfg-if" version = "1.0.0" @@ -120,6 +135,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -172,6 +196,26 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "either" version = "1.9.0" @@ -188,6 +232,16 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "heck" version = "0.4.1" @@ -206,6 +260,15 @@ version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -304,6 +367,28 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "npyz" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5d7d7b4142b8ad36ba84122b91491276a4e9f7104e7f51c8c56f16a7a3825ef" +dependencies = [ + "byteorder", + "num-bigint", + "py_literal", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.4" @@ -342,6 +427,57 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "pest" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a4d085fd991ac8d5b05a147b437791b4260b76326baf0fc60cf7c9c27ecd33" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bee7be22ce7918f641a33f08e3f43388c7656772244e2bbb2477f44cc9021a" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1511785c5e98d79a05e8a6bc34b4ac2168a0e3e92161862030ad84daa223141" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.33", +] + +[[package]] +name = "pest_meta" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42f0394d3123e33353ca5e1e89092e533d2cc490389f2bd6131c43c634ebc5f" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "proc-macro2" version = "1.0.67" @@ -351,6 +487,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "py_literal" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "102df7a3d46db9d3891f178dcc826dc270a6746277a9ae6436f8d29fd490a8e1" +dependencies = [ + "num-bigint", + "num-complex", + "num-traits", + "pest", + "pest_derive", +] + [[package]] name = "quote" version = "1.0.33" @@ -396,9 +545,11 @@ dependencies = [ "clap", "flate2", "indoc", + "itertools", "ndarray", "nom", "nom-derive", + "npyz", "num-traits", "rayon", "serde", @@ -455,6 +606,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "strsim" version = "0.10.0" @@ -503,6 +665,18 @@ dependencies = [ "syn 2.0.33", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -515,6 +689,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 3a7fb07..356faa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,3 +19,5 @@ indoc = "2.0.3" num-traits = "0.2.16" thiserror = "1.0.48" anyhow = "1.0.75" +npyz = "0.8.1" +itertools = "0.11.0" \ No newline at end of file diff --git a/a.py b/a.py new file mode 100644 index 0000000..28f105a --- /dev/null +++ b/a.py @@ -0,0 +1,5 @@ +import numpy as np + +a = np.load("/Users/ruomu/Desktop/a.npy").astype(np.int8) + +pass \ No newline at end of file diff --git a/src/app.rs b/src/app.rs index 72f17c6..d657f8a 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1,57 +1,94 @@ -use crate::parse::{Parsed, parse}; use crate::error::{AResult, BrokenError}; +use crate::parse::{parse, Parsed}; use crate::utils::is_gz; -use rayon::prelude::{*, IndexedParallelIterator}; +pub use clap::{command, Args, Parser, Subcommand, ValueEnum}; use flate2::read::GzDecoder; -use std::path::PathBuf; +use rayon::prelude::{IndexedParallelIterator, *}; use std::fs::File; use std::io::{Read, Seek, SeekFrom}; +use std::path::PathBuf; -type AAResult = AResult; -pub struct App { - paths: Vec, - config: AppConfig, -} +pub struct App(AppConfig); #[derive(Copy, Clone, Debug, PartialOrd, PartialEq)] pub struct AppConfig { - pub only_header: bool, - pub multi_threading: bool, + pub multi_threading: usize, } impl App { - pub fn new(paths: Vec, config: AppConfig) -> Result { - Ok(Self { paths, config }) + pub fn new(config: AppConfig) -> Self { + Self(config) } +} - pub fn parse(&self) ->AAResult> { - let paths = &self.paths; - let config = &self.config; - let datas = paths.par_iter().map(|p| { - let d = Self::data_prepare(p); - match d { - Ok(_d) => parse(&_d, config.only_header).map(|(_,b)| b).map_err(|e| e.into()), - Err(e) => Err(e.into()) - } - }).collect::, BrokenError>>()?; - Ok((datas,())) - } +/// A fictional versioning CLI +#[derive(Debug, Parser)] // requires `derive` feature +#[command(name = "rsParser")] +#[command(author = "K.Tsuki . ")] +#[command(version = "1.0")] +#[command(about = "A fictional versioning CLI", long_about = None)] +pub struct Cli { + #[command(subcommand)] + pub command: Commands, + #[arg(short = 't', long = "thread", default_value_t = 4)] + thread_count: usize, +} - fn data_prepare(path: &PathBuf) -> Result, std::io::Error> { - let mut f = File::open(path)?; - let mut buf = Vec::new(); - { - let mut magic = [0; 2]; - f.read_exact(&mut magic)?; - f.seek(SeekFrom::Start(0))?; - if is_gz(&magic) { - let mut d = GzDecoder::new(f); - d.read_to_end(&mut buf)?; - } else { - f.read_to_end(&mut buf)?; - } +impl Cli { + pub fn appConfig(&self) -> AppConfig { + AppConfig { + multi_threading: self.thread_count, } - Ok(buf) } -} \ No newline at end of file +} + +#[derive(Debug, Subcommand)] +pub enum Commands { + #[command(arg_required_else_help = true)] + Parse { + #[arg(required = true)] + path: Vec, + #[arg(short = 'o', long = "output", required = false)] + output_path: Option, + }, +} + +#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)] +enum ColorWhen { + Always, + Auto, + Never, +} + +impl std::fmt::Display for ColorWhen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.to_possible_value() + .expect("no values are skipped") + .get_name() + .fmt(f) + } +} + +#[derive(Debug, Args)] +#[command(args_conflicts_with_subcommands = true)] +struct StashArgs { + #[command(subcommand)] + command: Option, + + #[command(flatten)] + push: StashPushArgs, +} + +#[derive(Debug, Subcommand)] +enum StashCommands { + Push(StashPushArgs), + Pop { stash: Option }, + Apply { stash: Option }, +} + +#[derive(Debug, Args)] +struct StashPushArgs { + #[arg(short, long)] + message: Option, +} diff --git a/src/cmds/mod.rs b/src/cmds/mod.rs new file mode 100644 index 0000000..bb6f96d --- /dev/null +++ b/src/cmds/mod.rs @@ -0,0 +1,9 @@ +use crate::{error::AResult, app::{App, AppConfig}}; +pub mod parse; + +pub trait Cmd { + type Output; + type Exception; + type Config; + fn parse(&self,app:&App,config:Self::Config) -> AResult; +} \ No newline at end of file diff --git a/src/cmds/parse.rs b/src/cmds/parse.rs new file mode 100644 index 0000000..fba5ee9 --- /dev/null +++ b/src/cmds/parse.rs @@ -0,0 +1,165 @@ +use super::Cmd; +use crate::app::App; +use crate::error::{AAResult, AResult, BrokenError}; +use crate::parse::{parse, Parsed}; +use crate::utils::is_gz; +use flate2::read::GzDecoder; +use ndarray::{self, ArrayD}; +use npyz::{self, write_options::WriterBuilder}; +use rayon::prelude::{IndexedParallelIterator, *}; +use std::fs::File; +use std::io::{self, BufWriter, Read, Seek, SeekFrom}; +use std::path::PathBuf; + +pub struct ParseCmd; + +#[derive(PartialOrd, PartialEq)] +pub enum OutputType { + Npy, + Npz, + None, +} + +pub struct ParseCmdConfig { + pub paths: Vec, + pub output_path: Option, + output_type: OutputType, +} + +impl ParseCmdConfig { + pub fn new(paths: Vec, output_path: Option) -> Self { + let t = if let Some(o) = &output_path { + match o.extension().unwrap().to_str().unwrap() { + "npy" => OutputType::Npy, + "npz" => OutputType::Npz, + _ => OutputType::None, + } + } else { + OutputType::None + }; + + Self { + paths: paths, + output_path: output_path, + output_type: t, + } + } +} + +impl Cmd for ParseCmd { + type Exception = (); + type Output = (); + type Config = ParseCmdConfig; + + fn parse(&self, app: &App, config: Self::Config) -> AResult { + let only_header = config.output_type == OutputType::None; + println!("only_header:{}", only_header); + let paths = &config.paths; + let _data = paths + .par_iter() + .map(|p| { + let d = Self::data_prepare(p); + match d { + Ok(_d) => parse(&_d, only_header) + .map(|(_, b)| b) + .map_err(|e| e.into()), + Err(e) => Err(e.into()), + } + }) + .collect::, BrokenError>>()?; + + _data.iter().for_each(|x| println!("{}", x)); + + match &config.output_type { + OutputType::Npy => { + self.npy_parse(_data)?.first().unwrap().into_iter().for_each(|v| { + let file = + BufWriter::new(File::create("/Users/ruomu/Desktop/a.npy").unwrap()); + let shape = v.shape().into_iter().map(|c| *c as u64).collect::>(); + let mut writer = npyz::WriteOptions::new() + .default_dtype() + .shape(&shape) + .writer(file) + .begin_nd() + .unwrap(); + writer.extend(v.iter()).unwrap(); + writer.finish().unwrap(); + + }); + } + _ => {} + } + Ok(Ok(())) + } +} + +impl ParseCmd { + fn data_prepare(path: &PathBuf) -> Result, std::io::Error> { + let mut f = File::open(path)?; + let mut buf = Vec::new(); + { + let mut magic = [0; 2]; + f.read_exact(&mut magic)?; + f.seek(SeekFrom::Start(0))?; + if is_gz(&magic) { + let mut d = GzDecoder::new(f); + d.read_to_end(&mut buf)?; + } else { + f.read_to_end(&mut buf)?; + } + } + Ok(buf) + } + + fn npy_parse(&self, data: Vec) -> Result>>, BrokenError> { + let c = data + .into_par_iter() + .map(|d| { + d.blocks + .into_par_iter() + .map(|mut _block| { + let t = &_block.block_info.value_type; + let shape: &[usize] = &_block.block_info.dimension_size; + let offset = &_block.block_info.value_offset; + let scale = &_block.block_info.value_scale; + if let Some(b) = _block.data.as_mut() { + match t.as_str() { + "b" => { + let v = b.downcast_ref::>().unwrap(); + let _m = ndarray::ArrayView::from_shape(shape, v).unwrap(); + return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale)); + } + "B" => { + let v = b.downcast_ref::>().unwrap(); + let _m = ndarray::ArrayView::from_shape(shape, v).unwrap(); + return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale)); + } + "i" => { + let v = b.downcast_ref::>().unwrap(); + let _m = ndarray::ArrayView::from_shape(shape, v).unwrap(); + return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale)); + } + "u" => { + let v = b.downcast_ref::>().unwrap(); + let _m = ndarray::ArrayView::from_shape(shape, v).unwrap(); + return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale)); + } + "f" => { + let v = b.downcast_ref::>().unwrap(); + let _m = ndarray::ArrayView::from_shape(shape, v).unwrap(); + return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale)); + } + _ => { + return Err(BrokenError::ParseError); + } + } + } + return Err(BrokenError::ParseError); + }) + .collect::, BrokenError>>() + }) + .collect::, BrokenError>>()?; + + Ok(c) + } +} diff --git a/src/error.rs b/src/error.rs index ec2fec3..860d778 100644 --- a/src/error.rs +++ b/src/error.rs @@ -15,4 +15,6 @@ impl From> for BrokenError { } } -pub type AResult = Result<(O, E), BrokenError>; \ No newline at end of file +pub type AResult = Result, BrokenError>; + +pub type AAResult = AResult; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 2557e91..9069f49 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,90 +1,25 @@ -mod parse; mod app; +mod cmds; +mod error; +mod parse; mod printer; mod utils; -mod error; - -use std::io::prelude::*; -use flate2::read::GzDecoder; -use std::fs::File; -use std::io::BufReader; -use std::path::PathBuf; - -use clap::{Args, command, Parser, Subcommand, ValueEnum}; -use parse::parse; - -use crate::app::{App, AppConfig}; - -/// A fictional versioning CLI -#[derive(Debug, Parser)] // requires `derive` feature -#[command(name = "rsParser")] -#[command(author = "K.Tsuki . ")] -#[command(version = "1.0")] -#[command(about = "A fictional versioning CLI", long_about = None)] -struct Cli { - #[command(subcommand)] - command: Commands, -} - -#[derive(Debug, Subcommand)] -enum Commands { - #[command(arg_required_else_help = true)] - Parse { - #[arg(required = true)] - path: PathBuf, - #[arg(short = 'd', long = "only-header")] - header: bool, - }, -} - -#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)] -enum ColorWhen { - Always, - Auto, - Never, -} - -impl std::fmt::Display for ColorWhen { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.to_possible_value() - .expect("no values are skipped") - .get_name() - .fmt(f) - } -} - -#[derive(Debug, Args)] -#[command(args_conflicts_with_subcommands = true)] -struct StashArgs { - #[command(subcommand)] - command: Option, - - #[command(flatten)] - push: StashPushArgs, -} - -#[derive(Debug, Subcommand)] -enum StashCommands { - Push(StashPushArgs), - Pop { stash: Option }, - Apply { stash: Option }, -} - -#[derive(Debug, Args)] -struct StashPushArgs { - #[arg(short, long)] - message: Option, -} +use crate::app::App; +use app::{Cli, Commands}; +use clap::Parser; +use cmds::parse::{ParseCmd, ParseCmdConfig}; +use cmds::Cmd; fn main() { let args = Cli::parse(); + let config = args.appConfig(); + let app = App::new(config); + match args.command { - Commands::Parse { path, header } => { - let config = AppConfig{ - only_header:header, - multi_threading:true - }; - let app = App::new(vec![path], config).unwrap(); + Commands::Parse { path, output_path } => { + let parse_conf = ParseCmdConfig::new(path, output_path); + let p = ParseCmd; + p.parse(&app, parse_conf).unwrap().unwrap(); } } -} \ No newline at end of file +} diff --git a/src/parse.rs b/src/parse.rs index 3202861..10ce14f 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -25,7 +25,7 @@ pub struct BlockInfo { pub dimension_des: Vec, pub dimension_start: Vec, pub dimension_end: Vec, - pub dimension_size: Vec, + pub dimension_size: Vec, pub dimension_res: Vec, pub fill_value: f64, pub dimension_values: Vec>, diff --git a/src/printer.rs b/src/printer.rs index 86b09fd..66330a1 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -8,6 +8,17 @@ fn print_vec_in_e(v:&Vec) -> String { return v.iter().map(|v| {return format!("{:.2}",*v);}).collect::>().join(","); } +fn typechar_to_name(input_type: &str) -> &'static str { + match input_type { + "b" => "b (int 8)", + "B" => "B (uint 8)", + "f" => "f (float 32)", + "i" => "i (int 32)", + "u" => "u (uint 32)", + _ => "" + } +} + impl Display for Parsed { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let fv: Vec = self @@ -19,18 +30,18 @@ impl Display for Parsed { return formatdoc! {" \0 Block {}: - value_name: {}, - value_des: {}, - dimension: {}, - value_offset: {}, - value_scale: {}, - value_type: {}, - dimension_des: {:?}, - dimension_start: {}, - dimension_end: {}, - dimension_size: {}, - dimension_res: {}, - fill_value: {}, + value_name: {} + value_des: {} + dimension: {} + value_offset: {} + value_scale: {} + value_type: {} + dimension_des: {:?} + dimension_start: {} + dimension_end: {} + dimension_size: {} + dimension_res: {} + fill_value: {} ", index+1, i.block_info.value_name, @@ -38,7 +49,7 @@ impl Display for Parsed { i.block_info.dimension, i.block_info.value_offset, i.block_info.value_scale, - i.block_info.value_type, + typechar_to_name(i.block_info.value_type.as_str()), i.block_info.dimension_des, print_vec_in_e(&i.block_info.dimension_start), print_vec_in_e(&i.block_info.dimension_end), @@ -52,7 +63,7 @@ impl Display for Parsed { write!( f, - "File Infomations: \n filetime: {}\n block num: {}\n\nBlocks:\n{}", + "File Infomations: \n filetime: {}\n block num: {}\n\nBlocks:\n{}", self.common_info.header_info.file_time, self.common_info.header_info.block_num, message ) } diff --git a/src/utils.rs b/src/utils.rs index 5b898e7..da910ef 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,4 @@ -pub fn is_gz(data:&[u8;2]) -> bool{ +use rayon::prelude::*; +pub fn is_gz(data: &[u8; 2]) -> bool { *data == [0x1f, 0x8b] -} +} \ No newline at end of file