diff --git a/Cargo.lock b/Cargo.lock index 892d8d3..1b37fe6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,12 +8,155 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "boomphf" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "617e2d952880a00583ddb9237ac3965732e8df6a92a8e7bcc054100ec467ec3b" +dependencies = [ + "crossbeam-utils", + "log", + "rayon", + "serde", + "wyhash", +] + +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "bytecheck" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "rancor", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "clap" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "crc32fast" version = "1.5.0" @@ -23,6 +166,31 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "csv" version = "1.4.0" @@ -44,6 +212,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -53,6 +227,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "flate2" version = "1.1.5" @@ -64,10 +244,67 @@ dependencies = [ ] [[package]] -name = "itoa" -version = "1.0.15" +name = "gen" +version = "0.1.0" +dependencies = [ + "anyhow", + "boomphf", + "byteorder", + "clap", + "csv", + "rkyv", + "serde", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ee5b5339afb4c41626dde77b7a611bd4f2c202b897852b4bcf5d03eddc61010" + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" @@ -85,6 +322,26 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "munge" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] + +[[package]] +name = "munge_macro" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "nom" version = "8.0.0" @@ -94,6 +351,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "proc-macro2" version = "1.0.103" @@ -103,6 +372,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quote" version = "1.0.42" @@ -112,6 +401,41 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rbufr" version = "0.1.0" @@ -125,10 +449,55 @@ dependencies = [ ] [[package]] -name = "ryu" -version = "1.0.20" +name = "rend" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" +dependencies = [ + "bytecheck", + "bytes", + "hashbrown 0.15.5", + "indexmap", + "munge", + "ptr_meta", + "rancor", + "rend", + "rkyv_derive", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" [[package]] name = "serde" @@ -166,6 +535,18 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.111" @@ -197,8 +578,108 @@ dependencies = [ "syn", ] +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wyhash" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295" +dependencies = [ + "rand_core", +] diff --git a/Cargo.toml b/Cargo.toml index b2545e2..1b26e76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,2 @@ -[package] -name = "rbufr" -version = "0.1.0" -edition = "2024" - -[lib] -name = "librbufr" -path = "src/lib.rs" - -[dependencies] -csv = "1.4.0" -encoding_rs = "0.8.35" -flate2 = "1.1.5" -nom = "8.0.0" -serde = { version = "1.0.228", features = ["derive"] } -thiserror = "2.0.17" +[workspace] +members = ["rbufr", "gen"] diff --git a/gen/Cargo.toml b/gen/Cargo.toml new file mode 100644 index 0000000..1b200fa --- /dev/null +++ b/gen/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "gen" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0.100" +boomphf = { version = "0.6.0", features = ["serde"] } +byteorder = "1.5.0" +clap = "4.5.53" +csv = "1.4.0" +rkyv = { version = "0.8.12" } +serde = { version = "1.0.228", features = ["derive"] } diff --git a/gen/src/btable.rs b/gen/src/btable.rs new file mode 100644 index 0000000..c4e4930 --- /dev/null +++ b/gen/src/btable.rs @@ -0,0 +1,109 @@ +use crate::{FXY, TableEntryLoader}; +use anyhow::{Context, Result}; +use std::path::Path; + +pub struct BTableCsvLoader { + entries: Vec, +} + +#[derive(Debug, serde::Deserialize)] +pub struct RawBTableEntry { + #[serde(rename = "ClassNo")] + pub class_no: String, + #[serde(rename = "ClassName_en")] + pub class_name_en: String, + #[serde(rename = "FXY")] + pub fxy: String, + #[serde(rename = "ElementName_en")] + pub element_name_en: String, + #[serde(rename = "BUFR_Unit")] + pub bufr_unit: String, + #[serde(rename = "BUFR_Scale")] + pub bufr_scale: u32, + #[serde(rename = "BUFR_ReferenceValue")] + pub bufr_reference_value: u32, + #[serde(rename = "BUFR_DataWidth_Bits")] + pub bufr_datawidth_bits: u32, + #[serde(rename = "CREX_Unit")] + pub crex_unit: String, + #[serde(rename = "CREX_Scale")] + pub crex_scale: u32, + #[serde(rename = "CREX_DataWidth_Char")] + pub crex_datawidth_char: u32, + #[serde(rename = "Note_en")] + pub note_en: Option, + #[serde(rename = "noteIDs")] + pub note_ids: Option, + #[serde(rename = "Status")] + pub status: String, +} + +pub struct BTableEntry { + fxy: FXY, + class_name_en: String, + element_name_en: String, + bufr_unit: String, + bufr_scale: u32, + bufr_reference_value: u32, + bufr_datawidth_bits: u32, + note_en: Option, + note_ids: Option, + status: String, +} + +impl BTableCsvLoader { + pub fn new() -> Self { + BTableCsvLoader { + entries: Vec::new(), + } + } + + pub fn from_wmo_csv>(path: P) -> Result { + let mut loader = Self::new(); + let table_loader = crate::TableLoader::new(); + table_loader.load_table(path, &mut loader)?; + Ok(loader) + } + + pub fn entries(&self) -> &[BTableEntry] { + &self.entries + } +} + +impl TableEntryLoader for BTableCsvLoader { + type RawEntry = RawBTableEntry; + + fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> { + // Parse FXY string (e.g., "001001") to u32 + let fxy = FXY::from_str(&raw.fxy)?; + + let entry = BTableEntry { + fxy, + class_name_en: raw.class_name_en, + element_name_en: raw.element_name_en, + bufr_unit: raw.bufr_unit, + bufr_scale: raw.bufr_scale, + bufr_reference_value: raw.bufr_reference_value, + bufr_datawidth_bits: raw.bufr_datawidth_bits, + note_en: raw.note_en, + note_ids: raw.note_ids, + status: raw.status, + }; + + self.entries.push(entry); + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_btable_csv_loader() { + let _loader = BTableCsvLoader::from_wmo_csv( + "/Users/tsuki/projects/rbufr/BUFR4/BUFRCREX_TableB_en_00.csv", + ) + .unwrap(); + } +} diff --git a/gen/src/dtable.rs b/gen/src/dtable.rs new file mode 100644 index 0000000..b49425e --- /dev/null +++ b/gen/src/dtable.rs @@ -0,0 +1,98 @@ +use crate::{FXY, TableEntryLoader}; + +pub struct DTableCsvLoader { + current_chain: Option, + entries: Vec, +} + +// Category,CategoryOfSequences_en,FXY1,Title_en,SubTitle_en,FXY2,ElementName_en,ElementDescription_en,Note_en,noteIDs,Status +#[derive(Debug, serde::Deserialize)] +pub struct RawDTableEntry { + #[serde(rename = "Category")] + pub category: String, + #[serde(rename = "CategoryOfSequences_en")] + pub category_of_sequences_en: String, + #[serde(rename = "FXY1")] + pub fxy1: String, + #[serde(rename = "Title_en")] + pub title_en: Option, + #[serde(rename = "SubTitle_en")] + pub subtitle_en: Option, + #[serde(rename = "FXY2")] + pub fxy2: String, + #[serde(rename = "ElementName_en")] + pub element_name_en: Option, + #[serde(rename = "ElementDescription_en")] + pub element_description_en: Option, + #[serde(rename = "Note_en")] + pub note_en: Option, + #[serde(rename = "noteIDs")] + pub note_ids: String, + #[serde(rename = "Status")] + pub status: String, +} + +impl TableEntryLoader for DTableCsvLoader { + type RawEntry = RawDTableEntry; + + fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> { + // Process the raw entry as needed + if self.current_chain.is_none() { + let entry = DTableEntry { + fxy: FXY::from_str(&raw.fxy1)?, + fxy_chain: vec![FXY::from_str(&raw.fxy2)?], + category: raw.category, + category_of_sequences_en: raw.category_of_sequences_en, + title_en: raw.title_en, + subtitle_en: raw.subtitle_en, + note_en: raw.note_en, + note_ids: raw.note_ids, + status: raw.status, + }; + self.current_chain = Some(entry); + } else { + let fxy = FXY::from_str(&raw.fxy1)?; + if self.current_chain.as_ref().unwrap().fxy != fxy { + self.entries.push(self.current_chain.take().unwrap()); + let entry = DTableEntry { + fxy, + fxy_chain: vec![FXY::from_str(&raw.fxy2)?], + category: raw.category, + category_of_sequences_en: raw.category_of_sequences_en, + title_en: raw.title_en, + subtitle_en: raw.subtitle_en, + note_en: raw.note_en, + note_ids: raw.note_ids, + status: raw.status, + }; + self.current_chain = Some(entry); + } else { + self.current_chain + .as_mut() + .unwrap() + .fxy_chain + .push(FXY::from_str(&raw.fxy2)?); + } + } + Ok(()) + } + + fn finish(&mut self) -> anyhow::Result<()> { + if let Some(entry) = self.current_chain.take() { + self.entries.push(entry); + } + Ok(()) + } +} + +pub struct DTableEntry { + fxy: FXY, + fxy_chain: Vec, + category: String, + category_of_sequences_en: String, + title_en: Option, + subtitle_en: Option, + note_en: Option, + note_ids: String, + status: String, +} diff --git a/gen/src/lib.rs b/gen/src/lib.rs new file mode 100644 index 0000000..21c4437 --- /dev/null +++ b/gen/src/lib.rs @@ -0,0 +1,364 @@ +mod btable; +mod dtable; +mod utils; +use anyhow::Context; +use boomphf::Mphf; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use rkyv::Archive; +use rkyv::ser::serializers::AllocSerializer; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use std::fmt::Debug; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::path::Path; + +use csv::{DeserializeRecordsIter, ReaderBuilder}; + +pub struct TableLoader; + +impl TableLoader { + pub fn load_table, T: TableEntryLoader>( + &self, + path: P, + loader: &mut T, + ) -> anyhow::Result<()> { + let mut rdr = ReaderBuilder::new() + .has_headers(true) + .delimiter(b',') + .from_path(path)?; + + for result in rdr.deserialize() { + let record: T::RawEntry = result?; + loader.process_entry(record)?; + } + + Ok(()) + } +} + +pub trait TableEntryLoader: Sized { + /// The raw CSV entry type that will be deserialized + type RawEntry: for<'de> serde::Deserialize<'de> + Debug; + + type TableEntry: TableEntry; + + /// Process a single entry from the CSV file + fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()>; + + fn finish(&mut self) -> anyhow::Result<()> { + Ok(()) + } +} + +pub trait TableEntry: Serialize + DeserializeOwned + Debug + Clone { + fn fxy(&self) -> FXY; +} + +#[derive(Serialize, Deserialize)] +struct MphMetadata { + mphf: Mphf, + offsets: Vec, +} + +struct BufrTableMph { + mphf: Mphf, + offsets: Vec, + data_file: File, + _marker: std::marker::PhantomData, +} + +impl BufrTableMph { + /// 构建 MPH 表 + fn build(entries: &Vec, output_path: &str) -> std::io::Result { + let keys: Vec = entries.iter().map(|e| e.fxy()).collect(); + + let mphf = Mphf::new(2.0, &keys); + + let mut sorted_entries: Vec<(usize, T::TableEntry)> = entries + .into_iter() + .map(|e| { + let hash = mphf.hash(&e.fxy()) as usize; + (hash, e) + }) + .collect(); + sorted_entries.sort_by_key(|(hash, _)| *hash); + + let data_path = format!("{}.data", output_path); + let mut data_file = File::create(&data_path)?; + let mut offsets = Vec::with_capacity(sorted_entries.len()); + + for (_, entry) in sorted_entries { + let offset = data_file.stream_position()?; + offsets.push(offset); + + // rkyv 序列化 + let mut serializer = AllocSerializer::<256>::default(); + serializer.serialize_value(&entry).unwrap(); + let bytes = serializer.into_serializer().into_inner(); + + // 写入长度 + 数据 + data_file.write_u32::(bytes.len() as u32)?; + data_file.write_all(&bytes)?; + } + + // 5. 保存元数据 + let meta_path = format!("{}.meta", output_path); + let meta = MphMetadata { + mphf: mphf.clone(), + offsets: offsets.clone(), + }; + + // 元数据仍然用 bincode 或者也可以用 rkyv + let mut meta_serializer = AllocSerializer::<4096>::default(); + meta_serializer.serialize_value(&meta).unwrap(); + let meta_bytes = meta_serializer.into_serializer().into_inner(); + std::fs::write(meta_path, meta_bytes)?; + + Ok(Self { + mphf, + offsets, + data_file: File::open(data_path)?, + _marker: std::marker::PhantomData, + }) + } + + /// 从磁盘加载 + fn load(path: &str) -> std::io::Result { + let meta_bytes = std::fs::read(format!("{}.meta", path))?; + + // rkyv 反序列化元数据(零拷贝) + let archived = unsafe { rkyv::archived_root::(&meta_bytes) }; + + // 如果需要拥有的版本,可以 deserialize + let meta: MphMetadata = archived.deserialize(&mut rkyv::Infallible).unwrap(); + + let data_file = File::open(format!("{}.data", path))?; + + Ok(Self { + mphf: meta.mphf, + offsets: meta.offsets, + data_file, + _marker: std::marker::PhantomData, + }) + } + + /// 查找条目(零拷贝读取) + fn get(&mut self, fxy: FXY) -> std::io::Result> { + let hash = self.mphf.hash(&fxy) as usize; + let offset = match self.offsets.get(hash) { + Some(&off) => off, + None => return Ok(None), + }; + + // 读取数据 + self.data_file.seek(SeekFrom::Start(offset))?; + let len = self.data_file.read_u32::()? as usize; + let mut buffer = vec![0u8; len]; + self.data_file.read_exact(&mut buffer)?; + + // rkyv 零拷贝访问 + let archived = unsafe { rkyv::archived_root::(&buffer) }; + + // 校验归档数据(可选,生产环境推荐) + #[cfg(feature = "validation")] + { + use rkyv::validation::validators::DefaultValidator; + rkyv::check_archived_root::(&buffer) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + } + + // 反序列化为拥有的类型 + let entry: T::TableEntry = archived.deserialize(&mut rkyv::Infallible).unwrap(); + + if entry.fxy == fxy { + Ok(Some(entry)) + } else { + Ok(None) + } + } + + /// 零拷贝引用访问(更快!) + fn get_archived(&mut self, fxy: u32) -> std::io::Result>> { + let hash = self.mphf.hash(&fxy) as usize; + let offset = match self.offsets.get(hash) { + Some(&off) => off, + None => return Ok(None), + }; + + self.data_file.seek(SeekFrom::Start(offset))?; + let len = self.data_file.read_u32::()? as usize; + let mut buffer = vec![0u8; len]; + self.data_file.read_exact(&mut buffer)?; + + Ok(Some(buffer)) + } +} + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)] +pub struct FXY { + pub f: u16, + pub x: u16, + pub y: u16, +} + +impl FXY { + pub fn from_str(fxy_str: &str) -> anyhow::Result { + // let bytes = fxy_str.as_bytes(); + + if fxy_str.len() != 6 { + return Err(anyhow::anyhow!("Invalid FXY string length: {}", fxy_str)); + } + + let f = fxy_str[0..2] + .parse::() + .with_context(|| format!("Failed to parse F from FXY: {}", fxy_str))?; + + let x = fxy_str[2..4] + .parse::() + .with_context(|| format!("Failed to parse X from FXY: {}", fxy_str))?; + let y = fxy_str[4..6] + .parse::() + .with_context(|| format!("Failed to parse Y from FXY: {}", fxy_str))?; + + Ok(FXY { f, x, y }) + } +} + +pub struct BUFRTableMPH {} + +impl BUFRTableMPH { + pub fn build_from_csv, T: TableEntryLoader>( + mut loader: T, + csv_path: P, + output_path: P, + ) -> anyhow::Result { + TableLoader.load_table(csv_path, &mut loader)?; + loader.finish()?; + + Ok(BUFRTableMPH {}) + } + + pub fn load_from_disk>(path: P) -> anyhow::Result { + // Placeholder for loading MPH table from disk + Ok(BUFRTableMPH {}) + } + + pub fn lookup(&self, fxy: FXY) -> anyhow::Result> { + // Placeholder for looking up an entry by FXY + Ok(None) + } +} + +// #[derive(Serialize, Deserialize, Debug)] +// struct TableEntry { +// fxy: u32, // F-X-Y descriptor (e.g., 001001) +// name: String, // "WMO Block Number" +// unit: String, // "Numeric" +// scale: i8, +// reference: i32, +// data_width: u8, +// } + +// struct BufrTableMph { +// mphf: Mphf, // The MPH function +// offsets: Vec, // Byte offsets into the data file +// data_file: File, // Memory-mapped or regular file +// } + +// impl BufrTableMph { +// /// Build MPH table from entries +// fn build(entries: Vec, output_path: &str) -> std::io::Result { +// // 1. Extract keys (FXY descriptors) +// let keys: Vec = entries.iter().map(|e| e.fxy).collect(); + +// // 2. Build MPH function with gamma=2.0 (space/time tradeoff) +// let mphf = Mphf::new(2.0, &keys); + +// // 3. Create sorted entries by MPH hash +// let mut sorted_entries: Vec<(usize, TableEntry)> = entries +// .into_iter() +// .map(|e| { +// let hash = mphf.hash(&e.fxy) as usize; +// (hash, e) +// }) +// .collect(); +// sorted_entries.sort_by_key(|(hash, _)| *hash); + +// // 4. Write binary data file and collect offsets +// let data_path = format!("{}.data", output_path); +// let mut data_file = File::create(&data_path)?; +// let mut offsets = Vec::with_capacity(sorted_entries.len()); + +// for (_, entry) in sorted_entries { +// let offset = data_file.stream_position()?; +// offsets.push(offset); + +// // Write entry in compact binary format +// let serialized = bincode::serialize(&entry).unwrap(); +// data_file.write_u32::(serialized.len() as u32)?; +// data_file.write_all(&serialized)?; +// } + +// // 5. Write MPH metadata +// let meta_path = format!("{}.meta", output_path); +// let meta = MphMetadata { mphf, offsets }; +// let meta_bytes = bincode::serialize(&meta).unwrap(); +// std::fs::write(meta_path, meta_bytes)?; + +// Ok(Self { +// mphf: meta.mphf, +// offsets: meta.offsets, +// data_file: File::open(data_path)?, +// }) +// } + +// /// Load from disk +// fn load(path: &str) -> std::io::Result { +// let meta_bytes = std::fs::read(format!("{}.meta", path))?; +// let meta: MphMetadata = bincode::deserialize(&meta_bytes).unwrap(); +// let data_file = File::open(format!("{}.data", path))?; + +// Ok(Self { +// mphf: meta.mphf, +// offsets: meta.offsets, +// data_file, +// }) +// } + +// /// Lookup entry by FXY descriptor +// fn get(&mut self, fxy: u32) -> std::io::Result> { +// // 1. Hash the key to get index +// let hash = self.mphf.hash(&fxy) as usize; + +// // 2. Get offset from array +// let offset = match self.offsets.get(hash) { +// Some(&off) => off, +// None => return Ok(None), +// }; + +// // 3. Seek and read +// self.data_file.seek(SeekFrom::Start(offset))?; +// let len = self.data_file.read_u32::()? as usize; +// let mut buffer = vec![0u8; len]; +// self.data_file.read_exact(&mut buffer)?; + +// // 4. Deserialize +// let entry: TableEntry = bincode::deserialize(&buffer).unwrap(); + +// // Verify key matches (MPH guarantees unique index, but verify correctness) +// if entry.fxy == fxy { +// Ok(Some(entry)) +// } else { +// Ok(None) +// } +// } +// } + +// #[derive(Serialize, Deserialize)] +// struct MphMetadata { +// mphf: Mphf, +// offsets: Vec, +// } + +// use byteorder::{ReadBytesExt, WriteBytesExt}; diff --git a/src/main.rs b/gen/src/main.rs similarity index 100% rename from src/main.rs rename to gen/src/main.rs diff --git a/gen/src/utils.rs b/gen/src/utils.rs new file mode 100644 index 0000000..93e559d --- /dev/null +++ b/gen/src/utils.rs @@ -0,0 +1,11 @@ +pub(crate) fn fxy_str_to_u32(fxy: &str) -> Option { + let bytes = fxy.as_bytes(); + if bytes.len() != 5 || bytes[2] != b'-' { + return None; + } + + let class_no = (bytes[0] as char).to_digit(10)? * 10 + (bytes[1] as char).to_digit(10)?; + let element_no = (bytes[3] as char).to_digit(10)? * 100 + (bytes[4] as char).to_digit(10)? * 10; + + Some(class_no * 1000 + element_no) +} diff --git a/rbufr/Cargo.lock b/rbufr/Cargo.lock new file mode 100644 index 0000000..f3d7535 --- /dev/null +++ b/rbufr/Cargo.lock @@ -0,0 +1,208 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "flate2" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "gen" +version = "0.1.0" + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rbufr" +version = "0.1.0" +dependencies = [ + "csv", + "encoding_rs", + "flate2", + "nom", + "serde", + "thiserror", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" diff --git a/rbufr/Cargo.toml b/rbufr/Cargo.toml new file mode 100644 index 0000000..b2545e2 --- /dev/null +++ b/rbufr/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "rbufr" +version = "0.1.0" +edition = "2024" + +[lib] +name = "librbufr" +path = "src/lib.rs" + +[dependencies] +csv = "1.4.0" +encoding_rs = "0.8.35" +flate2 = "1.1.5" +nom = "8.0.0" +serde = { version = "1.0.228", features = ["derive"] } +thiserror = "2.0.17" diff --git a/src/block.rs b/rbufr/src/block.rs similarity index 100% rename from src/block.rs rename to rbufr/src/block.rs diff --git a/src/discriptor_table.rs b/rbufr/src/discriptor_table.rs similarity index 100% rename from src/discriptor_table.rs rename to rbufr/src/discriptor_table.rs diff --git a/src/discriptor_table/btable.rs b/rbufr/src/discriptor_table/btable.rs similarity index 100% rename from src/discriptor_table/btable.rs rename to rbufr/src/discriptor_table/btable.rs diff --git a/src/discriptor_table/dtable.rs b/rbufr/src/discriptor_table/dtable.rs similarity index 100% rename from src/discriptor_table/dtable.rs rename to rbufr/src/discriptor_table/dtable.rs diff --git a/src/errors.rs b/rbufr/src/errors.rs similarity index 100% rename from src/errors.rs rename to rbufr/src/errors.rs diff --git a/src/lib.rs b/rbufr/src/lib.rs similarity index 100% rename from src/lib.rs rename to rbufr/src/lib.rs diff --git a/rbufr/src/main.rs b/rbufr/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/rbufr/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/src/parser.rs b/rbufr/src/parser.rs similarity index 100% rename from src/parser.rs rename to rbufr/src/parser.rs diff --git a/src/structs/bit.rs b/rbufr/src/structs/bit.rs similarity index 100% rename from src/structs/bit.rs rename to rbufr/src/structs/bit.rs diff --git a/src/structs/mod.rs b/rbufr/src/structs/mod.rs similarity index 100% rename from src/structs/mod.rs rename to rbufr/src/structs/mod.rs diff --git a/src/structs/versions/mod.rs b/rbufr/src/structs/versions/mod.rs similarity index 100% rename from src/structs/versions/mod.rs rename to rbufr/src/structs/versions/mod.rs diff --git a/src/structs/versions/v2.rs b/rbufr/src/structs/versions/v2.rs similarity index 100% rename from src/structs/versions/v2.rs rename to rbufr/src/structs/versions/v2.rs diff --git a/src/structs/versions/v4.rs b/rbufr/src/structs/versions/v4.rs similarity index 100% rename from src/structs/versions/v4.rs rename to rbufr/src/structs/versions/v4.rs diff --git a/tables/bufr/bufrtabb_11.csv b/rbufr/tables/bufr/bufrtabb_11.csv similarity index 100% rename from tables/bufr/bufrtabb_11.csv rename to rbufr/tables/bufr/bufrtabb_11.csv diff --git a/tables/bufr/bufrtabd_11.csv b/rbufr/tables/bufr/bufrtabd_11.csv similarity index 100% rename from tables/bufr/bufrtabd_11.csv rename to rbufr/tables/bufr/bufrtabd_11.csv