add npy parse

This commit is contained in:
sleptworld 2023-09-18 01:01:49 +08:00
parent ffedf08dfd
commit 00d209b24a
11 changed files with 485 additions and 138 deletions

180
Cargo.lock generated
View File

@ -68,6 +68,21 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -120,6 +135,15 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "cpufeatures"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
@ -172,6 +196,26 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "either"
version = "1.9.0"
@ -188,6 +232,16 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "heck"
version = "0.4.1"
@ -206,6 +260,15 @@ version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4"
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.9"
@ -304,6 +367,28 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "npyz"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d7d7b4142b8ad36ba84122b91491276a4e9f7104e7f51c8c56f16a7a3825ef"
dependencies = [
"byteorder",
"num-bigint",
"py_literal",
]
[[package]]
name = "num-bigint"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.4"
@ -342,6 +427,57 @@ dependencies = [
"libc",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "pest"
version = "2.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7a4d085fd991ac8d5b05a147b437791b4260b76326baf0fc60cf7c9c27ecd33"
dependencies = [
"memchr",
"thiserror",
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2bee7be22ce7918f641a33f08e3f43388c7656772244e2bbb2477f44cc9021a"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1511785c5e98d79a05e8a6bc34b4ac2168a0e3e92161862030ad84daa223141"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.33",
]
[[package]]
name = "pest_meta"
version = "2.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42f0394d3123e33353ca5e1e89092e533d2cc490389f2bd6131c43c634ebc5f"
dependencies = [
"once_cell",
"pest",
"sha2",
]
[[package]]
name = "proc-macro2"
version = "1.0.67"
@ -351,6 +487,19 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "py_literal"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "102df7a3d46db9d3891f178dcc826dc270a6746277a9ae6436f8d29fd490a8e1"
dependencies = [
"num-bigint",
"num-complex",
"num-traits",
"pest",
"pest_derive",
]
[[package]]
name = "quote"
version = "1.0.33"
@ -396,9 +545,11 @@ dependencies = [
"clap",
"flate2",
"indoc",
"itertools",
"ndarray",
"nom",
"nom-derive",
"npyz",
"num-traits",
"rayon",
"serde",
@ -455,6 +606,17 @@ dependencies = [
"serde",
]
[[package]]
name = "sha2"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "strsim"
version = "0.10.0"
@ -503,6 +665,18 @@ dependencies = [
"syn 2.0.33",
]
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ucd-trie"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
[[package]]
name = "unicode-ident"
version = "1.0.12"
@ -515,6 +689,12 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "windows-sys"
version = "0.48.0"

View File

@ -19,3 +19,5 @@ indoc = "2.0.3"
num-traits = "0.2.16"
thiserror = "1.0.48"
anyhow = "1.0.75"
npyz = "0.8.1"
itertools = "0.11.0"

5
a.py Normal file
View File

@ -0,0 +1,5 @@
import numpy as np
a = np.load("/Users/ruomu/Desktop/a.npy").astype(np.int8)
pass

View File

@ -1,57 +1,94 @@
use crate::parse::{Parsed, parse};
use crate::error::{AResult, BrokenError};
use crate::parse::{parse, Parsed};
use crate::utils::is_gz;
use rayon::prelude::{*, IndexedParallelIterator};
pub use clap::{command, Args, Parser, Subcommand, ValueEnum};
use flate2::read::GzDecoder;
use std::path::PathBuf;
use rayon::prelude::{IndexedParallelIterator, *};
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::path::PathBuf;
type AAResult<O> = AResult<O,()>;
pub struct App {
paths: Vec<PathBuf>,
config: AppConfig,
}
pub struct App(AppConfig);
#[derive(Copy, Clone, Debug, PartialOrd, PartialEq)]
pub struct AppConfig {
pub only_header: bool,
pub multi_threading: bool,
pub multi_threading: usize,
}
impl App {
pub fn new(paths: Vec<PathBuf>, config: AppConfig) -> Result<Self, BrokenError> {
Ok(Self { paths, config })
}
pub fn parse(&self) ->AAResult<Vec<Parsed>> {
let paths = &self.paths;
let config = &self.config;
let datas = paths.par_iter().map(|p| {
let d = Self::data_prepare(p);
match d {
Ok(_d) => parse(&_d, config.only_header).map(|(_,b)| b).map_err(|e| e.into()),
Err(e) => Err(e.into())
}
}).collect::<Result<Vec<_>, BrokenError>>()?;
Ok((datas,()))
}
fn data_prepare(path: &PathBuf) -> Result<Vec<u8>, std::io::Error> {
let mut f = File::open(path)?;
let mut buf = Vec::new();
{
let mut magic = [0; 2];
f.read_exact(&mut magic)?;
f.seek(SeekFrom::Start(0))?;
if is_gz(&magic) {
let mut d = GzDecoder::new(f);
d.read_to_end(&mut buf)?;
} else {
f.read_to_end(&mut buf)?;
}
}
Ok(buf)
pub fn new(config: AppConfig) -> Self {
Self(config)
}
}
/// A fictional versioning CLI
#[derive(Debug, Parser)] // requires `derive` feature
#[command(name = "rsParser")]
#[command(author = "K.Tsuki . <tsuki@keitsuki.top>")]
#[command(version = "1.0")]
#[command(about = "A fictional versioning CLI", long_about = None)]
pub struct Cli {
#[command(subcommand)]
pub command: Commands,
#[arg(short = 't', long = "thread", default_value_t = 4)]
thread_count: usize,
}
impl Cli {
pub fn appConfig(&self) -> AppConfig {
AppConfig {
multi_threading: self.thread_count,
}
}
}
#[derive(Debug, Subcommand)]
pub enum Commands {
#[command(arg_required_else_help = true)]
Parse {
#[arg(required = true)]
path: Vec<PathBuf>,
#[arg(short = 'o', long = "output", required = false)]
output_path: Option<PathBuf>,
},
}
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)]
enum ColorWhen {
Always,
Auto,
Never,
}
impl std::fmt::Display for ColorWhen {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.to_possible_value()
.expect("no values are skipped")
.get_name()
.fmt(f)
}
}
#[derive(Debug, Args)]
#[command(args_conflicts_with_subcommands = true)]
struct StashArgs {
#[command(subcommand)]
command: Option<StashCommands>,
#[command(flatten)]
push: StashPushArgs,
}
#[derive(Debug, Subcommand)]
enum StashCommands {
Push(StashPushArgs),
Pop { stash: Option<String> },
Apply { stash: Option<String> },
}
#[derive(Debug, Args)]
struct StashPushArgs {
#[arg(short, long)]
message: Option<String>,
}

9
src/cmds/mod.rs Normal file
View File

@ -0,0 +1,9 @@
use crate::{error::AResult, app::{App, AppConfig}};
pub mod parse;
pub trait Cmd {
type Output;
type Exception;
type Config;
fn parse(&self,app:&App,config:Self::Config) -> AResult<Self::Output,Self::Exception>;
}

165
src/cmds/parse.rs Normal file
View File

@ -0,0 +1,165 @@
use super::Cmd;
use crate::app::App;
use crate::error::{AAResult, AResult, BrokenError};
use crate::parse::{parse, Parsed};
use crate::utils::is_gz;
use flate2::read::GzDecoder;
use ndarray::{self, ArrayD};
use npyz::{self, write_options::WriterBuilder};
use rayon::prelude::{IndexedParallelIterator, *};
use std::fs::File;
use std::io::{self, BufWriter, Read, Seek, SeekFrom};
use std::path::PathBuf;
pub struct ParseCmd;
#[derive(PartialOrd, PartialEq)]
pub enum OutputType {
Npy,
Npz,
None,
}
pub struct ParseCmdConfig {
pub paths: Vec<PathBuf>,
pub output_path: Option<PathBuf>,
output_type: OutputType,
}
impl ParseCmdConfig {
pub fn new(paths: Vec<PathBuf>, output_path: Option<PathBuf>) -> Self {
let t = if let Some(o) = &output_path {
match o.extension().unwrap().to_str().unwrap() {
"npy" => OutputType::Npy,
"npz" => OutputType::Npz,
_ => OutputType::None,
}
} else {
OutputType::None
};
Self {
paths: paths,
output_path: output_path,
output_type: t,
}
}
}
impl Cmd for ParseCmd {
type Exception = ();
type Output = ();
type Config = ParseCmdConfig;
fn parse(&self, app: &App, config: Self::Config) -> AResult<Self::Output, Self::Exception> {
let only_header = config.output_type == OutputType::None;
println!("only_header:{}", only_header);
let paths = &config.paths;
let _data = paths
.par_iter()
.map(|p| {
let d = Self::data_prepare(p);
match d {
Ok(_d) => parse(&_d, only_header)
.map(|(_, b)| b)
.map_err(|e| e.into()),
Err(e) => Err(e.into()),
}
})
.collect::<Result<Vec<_>, BrokenError>>()?;
_data.iter().for_each(|x| println!("{}", x));
match &config.output_type {
OutputType::Npy => {
self.npy_parse(_data)?.first().unwrap().into_iter().for_each(|v| {
let file =
BufWriter::new(File::create("/Users/ruomu/Desktop/a.npy").unwrap());
let shape = v.shape().into_iter().map(|c| *c as u64).collect::<Vec<_>>();
let mut writer = npyz::WriteOptions::new()
.default_dtype()
.shape(&shape)
.writer(file)
.begin_nd()
.unwrap();
writer.extend(v.iter()).unwrap();
writer.finish().unwrap();
});
}
_ => {}
}
Ok(Ok(()))
}
}
impl ParseCmd {
fn data_prepare(path: &PathBuf) -> Result<Vec<u8>, std::io::Error> {
let mut f = File::open(path)?;
let mut buf = Vec::new();
{
let mut magic = [0; 2];
f.read_exact(&mut magic)?;
f.seek(SeekFrom::Start(0))?;
if is_gz(&magic) {
let mut d = GzDecoder::new(f);
d.read_to_end(&mut buf)?;
} else {
f.read_to_end(&mut buf)?;
}
}
Ok(buf)
}
fn npy_parse(&self, data: Vec<Parsed>) -> Result<Vec<Vec<ArrayD<f32>>>, BrokenError> {
let c = data
.into_par_iter()
.map(|d| {
d.blocks
.into_par_iter()
.map(|mut _block| {
let t = &_block.block_info.value_type;
let shape: &[usize] = &_block.block_info.dimension_size;
let offset = &_block.block_info.value_offset;
let scale = &_block.block_info.value_scale;
if let Some(b) = _block.data.as_mut() {
match t.as_str() {
"b" => {
let v = b.downcast_ref::<Vec<i8>>().unwrap();
let _m = ndarray::ArrayView::from_shape(shape, v).unwrap();
return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale));
}
"B" => {
let v = b.downcast_ref::<Vec<u8>>().unwrap();
let _m = ndarray::ArrayView::from_shape(shape, v).unwrap();
return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale));
}
"i" => {
let v = b.downcast_ref::<Vec<i32>>().unwrap();
let _m = ndarray::ArrayView::from_shape(shape, v).unwrap();
return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale));
}
"u" => {
let v = b.downcast_ref::<Vec<u32>>().unwrap();
let _m = ndarray::ArrayView::from_shape(shape, v).unwrap();
return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale));
}
"f" => {
let v = b.downcast_ref::<Vec<f32>>().unwrap();
let _m = ndarray::ArrayView::from_shape(shape, v).unwrap();
return Ok(_m.mapv(|x| (x as f32 - *offset) / *scale));
}
_ => {
return Err(BrokenError::ParseError);
}
}
}
return Err(BrokenError::ParseError);
})
.collect::<Result<Vec<_>, BrokenError>>()
})
.collect::<Result<Vec<_>, BrokenError>>()?;
Ok(c)
}
}

View File

@ -15,4 +15,6 @@ impl<T> From<Err<T>> for BrokenError {
}
}
pub type AResult<O, E> = Result<(O, E), BrokenError>;
pub type AResult<O, E> = Result<Result<O,E>, BrokenError>;
pub type AAResult<O> = AResult<O, ()>;

View File

@ -1,90 +1,25 @@
mod parse;
mod app;
mod cmds;
mod error;
mod parse;
mod printer;
mod utils;
mod error;
use std::io::prelude::*;
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::BufReader;
use std::path::PathBuf;
use clap::{Args, command, Parser, Subcommand, ValueEnum};
use parse::parse;
use crate::app::{App, AppConfig};
/// A fictional versioning CLI
#[derive(Debug, Parser)] // requires `derive` feature
#[command(name = "rsParser")]
#[command(author = "K.Tsuki . <tsuki@keitsuki.top>")]
#[command(version = "1.0")]
#[command(about = "A fictional versioning CLI", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Debug, Subcommand)]
enum Commands {
#[command(arg_required_else_help = true)]
Parse {
#[arg(required = true)]
path: PathBuf,
#[arg(short = 'd', long = "only-header")]
header: bool,
},
}
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)]
enum ColorWhen {
Always,
Auto,
Never,
}
impl std::fmt::Display for ColorWhen {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.to_possible_value()
.expect("no values are skipped")
.get_name()
.fmt(f)
}
}
#[derive(Debug, Args)]
#[command(args_conflicts_with_subcommands = true)]
struct StashArgs {
#[command(subcommand)]
command: Option<StashCommands>,
#[command(flatten)]
push: StashPushArgs,
}
#[derive(Debug, Subcommand)]
enum StashCommands {
Push(StashPushArgs),
Pop { stash: Option<String> },
Apply { stash: Option<String> },
}
#[derive(Debug, Args)]
struct StashPushArgs {
#[arg(short, long)]
message: Option<String>,
}
use crate::app::App;
use app::{Cli, Commands};
use clap::Parser;
use cmds::parse::{ParseCmd, ParseCmdConfig};
use cmds::Cmd;
fn main() {
let args = Cli::parse();
let config = args.appConfig();
let app = App::new(config);
match args.command {
Commands::Parse { path, header } => {
let config = AppConfig{
only_header:header,
multi_threading:true
};
let app = App::new(vec![path], config).unwrap();
Commands::Parse { path, output_path } => {
let parse_conf = ParseCmdConfig::new(path, output_path);
let p = ParseCmd;
p.parse(&app, parse_conf).unwrap().unwrap();
}
}
}

View File

@ -25,7 +25,7 @@ pub struct BlockInfo {
pub dimension_des: Vec<String>,
pub dimension_start: Vec<f64>,
pub dimension_end: Vec<f64>,
pub dimension_size: Vec<u32>,
pub dimension_size: Vec<usize>,
pub dimension_res: Vec<f64>,
pub fill_value: f64,
pub dimension_values: Vec<Vec<f64>>,

View File

@ -8,6 +8,17 @@ fn print_vec_in_e<T:Num + Display>(v:&Vec<T>) -> String {
return v.iter().map(|v| {return format!("{:.2}",*v);}).collect::<Vec<String>>().join(",");
}
fn typechar_to_name(input_type: &str) -> &'static str {
match input_type {
"b" => "b (int 8)",
"B" => "B (uint 8)",
"f" => "f (float 32)",
"i" => "i (int 32)",
"u" => "u (uint 32)",
_ => ""
}
}
impl Display for Parsed {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let fv: Vec<String> = self
@ -19,18 +30,18 @@ impl Display for Parsed {
return formatdoc! {"
\0 Block {}:
value_name: {},
value_des: {},
dimension: {},
value_offset: {},
value_scale: {},
value_type: {},
dimension_des: {:?},
dimension_start: {},
dimension_end: {},
dimension_size: {},
dimension_res: {},
fill_value: {},
value_name: {}
value_des: {}
dimension: {}
value_offset: {}
value_scale: {}
value_type: {}
dimension_des: {:?}
dimension_start: {}
dimension_end: {}
dimension_size: {}
dimension_res: {}
fill_value: {}
",
index+1,
i.block_info.value_name,
@ -38,7 +49,7 @@ impl Display for Parsed {
i.block_info.dimension,
i.block_info.value_offset,
i.block_info.value_scale,
i.block_info.value_type,
typechar_to_name(i.block_info.value_type.as_str()),
i.block_info.dimension_des,
print_vec_in_e(&i.block_info.dimension_start),
print_vec_in_e(&i.block_info.dimension_end),
@ -52,7 +63,7 @@ impl Display for Parsed {
write!(
f,
"File Infomations: \n filetime: {}\n block num: {}\n\nBlocks:\n{}",
"File Infomations: \n filetime: {}\n block num: {}\n\nBlocks:\n{}",
self.common_info.header_info.file_time, self.common_info.header_info.block_num, message
)
}

View File

@ -1,3 +1,4 @@
pub fn is_gz(data:&[u8;2]) -> bool{
use rayon::prelude::*;
pub fn is_gz(data: &[u8; 2]) -> bool {
*data == [0x1f, 0x8b]
}