This commit is contained in:
Tsuki 2025-12-23 09:19:03 +08:00
parent e606448b0a
commit fc44344a01
25 changed files with 1311 additions and 22 deletions

493
Cargo.lock generated
View File

@ -8,12 +8,155 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "anstream"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
[[package]]
name = "anstyle-parse"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "boomphf"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "617e2d952880a00583ddb9237ac3965732e8df6a92a8e7bcc054100ec467ec3b"
dependencies = [
"crossbeam-utils",
"log",
"rayon",
"serde",
"wyhash",
]
[[package]]
name = "bumpalo"
version = "3.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
[[package]]
name = "bytecheck"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b"
dependencies = [
"bytecheck_derive",
"ptr_meta",
"rancor",
"simdutf8",
]
[[package]]
name = "bytecheck_derive"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "clap"
version = "4.5.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
version = "4.5.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_lex"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]]
name = "colorchoice"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "crc32fast"
version = "1.5.0"
@ -23,6 +166,31 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "csv"
version = "1.4.0"
@ -44,6 +212,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -53,6 +227,12 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "flate2"
version = "1.1.5"
@ -64,10 +244,67 @@ dependencies = [
]
[[package]]
name = "itoa"
version = "1.0.15"
name = "gen"
version = "0.1.0"
dependencies = [
"anyhow",
"boomphf",
"byteorder",
"clap",
"csv",
"rkyv",
"serde",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "indexmap"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "itoa"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ee5b5339afb4c41626dde77b7a611bd4f2c202b897852b4bcf5d03eddc61010"
[[package]]
name = "js-sys"
version = "0.3.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "memchr"
@ -85,6 +322,26 @@ dependencies = [
"simd-adler32",
]
[[package]]
name = "munge"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c"
dependencies = [
"munge_macro",
]
[[package]]
name = "munge_macro"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "nom"
version = "8.0.0"
@ -94,6 +351,18 @@ dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "once_cell_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "proc-macro2"
version = "1.0.103"
@ -103,6 +372,26 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "ptr_meta"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79"
dependencies = [
"ptr_meta_derive",
]
[[package]]
name = "ptr_meta_derive"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.42"
@ -112,6 +401,41 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rancor"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee"
dependencies = [
"ptr_meta",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "rbufr"
version = "0.1.0"
@ -125,10 +449,55 @@ dependencies = [
]
[[package]]
name = "ryu"
version = "1.0.20"
name = "rend"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6"
dependencies = [
"bytecheck",
]
[[package]]
name = "rkyv"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4"
dependencies = [
"bytecheck",
"bytes",
"hashbrown 0.15.5",
"indexmap",
"munge",
"ptr_meta",
"rancor",
"rend",
"rkyv_derive",
"tinyvec",
"uuid",
]
[[package]]
name = "rkyv_derive"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "ryu"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea"
[[package]]
name = "serde"
@ -166,6 +535,18 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "simdutf8"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.111"
@ -197,8 +578,108 @@ dependencies = [
"syn",
]
[[package]]
name = "tinyvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
dependencies = [
"unicode-ident",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
[[package]]
name = "wyhash"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295"
dependencies = [
"rand_core",
]

View File

@ -1,16 +1,2 @@
[package]
name = "rbufr"
version = "0.1.0"
edition = "2024"
[lib]
name = "librbufr"
path = "src/lib.rs"
[dependencies]
csv = "1.4.0"
encoding_rs = "0.8.35"
flate2 = "1.1.5"
nom = "8.0.0"
serde = { version = "1.0.228", features = ["derive"] }
thiserror = "2.0.17"
[workspace]
members = ["rbufr", "gen"]

13
gen/Cargo.toml Normal file
View File

@ -0,0 +1,13 @@
[package]
name = "gen"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = "1.0.100"
boomphf = { version = "0.6.0", features = ["serde"] }
byteorder = "1.5.0"
clap = "4.5.53"
csv = "1.4.0"
rkyv = { version = "0.8.12" }
serde = { version = "1.0.228", features = ["derive"] }

109
gen/src/btable.rs Normal file
View File

@ -0,0 +1,109 @@
use crate::{FXY, TableEntryLoader};
use anyhow::{Context, Result};
use std::path::Path;
pub struct BTableCsvLoader {
entries: Vec<BTableEntry>,
}
#[derive(Debug, serde::Deserialize)]
pub struct RawBTableEntry {
#[serde(rename = "ClassNo")]
pub class_no: String,
#[serde(rename = "ClassName_en")]
pub class_name_en: String,
#[serde(rename = "FXY")]
pub fxy: String,
#[serde(rename = "ElementName_en")]
pub element_name_en: String,
#[serde(rename = "BUFR_Unit")]
pub bufr_unit: String,
#[serde(rename = "BUFR_Scale")]
pub bufr_scale: u32,
#[serde(rename = "BUFR_ReferenceValue")]
pub bufr_reference_value: u32,
#[serde(rename = "BUFR_DataWidth_Bits")]
pub bufr_datawidth_bits: u32,
#[serde(rename = "CREX_Unit")]
pub crex_unit: String,
#[serde(rename = "CREX_Scale")]
pub crex_scale: u32,
#[serde(rename = "CREX_DataWidth_Char")]
pub crex_datawidth_char: u32,
#[serde(rename = "Note_en")]
pub note_en: Option<String>,
#[serde(rename = "noteIDs")]
pub note_ids: Option<String>,
#[serde(rename = "Status")]
pub status: String,
}
pub struct BTableEntry {
fxy: FXY,
class_name_en: String,
element_name_en: String,
bufr_unit: String,
bufr_scale: u32,
bufr_reference_value: u32,
bufr_datawidth_bits: u32,
note_en: Option<String>,
note_ids: Option<String>,
status: String,
}
impl BTableCsvLoader {
pub fn new() -> Self {
BTableCsvLoader {
entries: Vec::new(),
}
}
pub fn from_wmo_csv<P: AsRef<Path>>(path: P) -> Result<Self> {
let mut loader = Self::new();
let table_loader = crate::TableLoader::new();
table_loader.load_table(path, &mut loader)?;
Ok(loader)
}
pub fn entries(&self) -> &[BTableEntry] {
&self.entries
}
}
impl TableEntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> {
// Parse FXY string (e.g., "001001") to u32
let fxy = FXY::from_str(&raw.fxy)?;
let entry = BTableEntry {
fxy,
class_name_en: raw.class_name_en,
element_name_en: raw.element_name_en,
bufr_unit: raw.bufr_unit,
bufr_scale: raw.bufr_scale,
bufr_reference_value: raw.bufr_reference_value,
bufr_datawidth_bits: raw.bufr_datawidth_bits,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.entries.push(entry);
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_btable_csv_loader() {
let _loader = BTableCsvLoader::from_wmo_csv(
"/Users/tsuki/projects/rbufr/BUFR4/BUFRCREX_TableB_en_00.csv",
)
.unwrap();
}
}

98
gen/src/dtable.rs Normal file
View File

@ -0,0 +1,98 @@
use crate::{FXY, TableEntryLoader};
pub struct DTableCsvLoader {
current_chain: Option<DTableEntry>,
entries: Vec<DTableEntry>,
}
// Category,CategoryOfSequences_en,FXY1,Title_en,SubTitle_en,FXY2,ElementName_en,ElementDescription_en,Note_en,noteIDs,Status
#[derive(Debug, serde::Deserialize)]
pub struct RawDTableEntry {
#[serde(rename = "Category")]
pub category: String,
#[serde(rename = "CategoryOfSequences_en")]
pub category_of_sequences_en: String,
#[serde(rename = "FXY1")]
pub fxy1: String,
#[serde(rename = "Title_en")]
pub title_en: Option<String>,
#[serde(rename = "SubTitle_en")]
pub subtitle_en: Option<String>,
#[serde(rename = "FXY2")]
pub fxy2: String,
#[serde(rename = "ElementName_en")]
pub element_name_en: Option<String>,
#[serde(rename = "ElementDescription_en")]
pub element_description_en: Option<String>,
#[serde(rename = "Note_en")]
pub note_en: Option<String>,
#[serde(rename = "noteIDs")]
pub note_ids: String,
#[serde(rename = "Status")]
pub status: String,
}
impl TableEntryLoader for DTableCsvLoader {
type RawEntry = RawDTableEntry;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> {
// Process the raw entry as needed
if self.current_chain.is_none() {
let entry = DTableEntry {
fxy: FXY::from_str(&raw.fxy1)?,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
category: raw.category,
category_of_sequences_en: raw.category_of_sequences_en,
title_en: raw.title_en,
subtitle_en: raw.subtitle_en,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.current_chain = Some(entry);
} else {
let fxy = FXY::from_str(&raw.fxy1)?;
if self.current_chain.as_ref().unwrap().fxy != fxy {
self.entries.push(self.current_chain.take().unwrap());
let entry = DTableEntry {
fxy,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
category: raw.category,
category_of_sequences_en: raw.category_of_sequences_en,
title_en: raw.title_en,
subtitle_en: raw.subtitle_en,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.current_chain = Some(entry);
} else {
self.current_chain
.as_mut()
.unwrap()
.fxy_chain
.push(FXY::from_str(&raw.fxy2)?);
}
}
Ok(())
}
fn finish(&mut self) -> anyhow::Result<()> {
if let Some(entry) = self.current_chain.take() {
self.entries.push(entry);
}
Ok(())
}
}
pub struct DTableEntry {
fxy: FXY,
fxy_chain: Vec<FXY>,
category: String,
category_of_sequences_en: String,
title_en: Option<String>,
subtitle_en: Option<String>,
note_en: Option<String>,
note_ids: String,
status: String,
}

364
gen/src/lib.rs Normal file
View File

@ -0,0 +1,364 @@
mod btable;
mod dtable;
mod utils;
use anyhow::Context;
use boomphf::Mphf;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use rkyv::Archive;
use rkyv::ser::serializers::AllocSerializer;
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use std::fmt::Debug;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::Path;
use csv::{DeserializeRecordsIter, ReaderBuilder};
pub struct TableLoader;
impl TableLoader {
pub fn load_table<P: AsRef<Path>, T: TableEntryLoader>(
&self,
path: P,
loader: &mut T,
) -> anyhow::Result<()> {
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.delimiter(b',')
.from_path(path)?;
for result in rdr.deserialize() {
let record: T::RawEntry = result?;
loader.process_entry(record)?;
}
Ok(())
}
}
pub trait TableEntryLoader: Sized {
/// The raw CSV entry type that will be deserialized
type RawEntry: for<'de> serde::Deserialize<'de> + Debug;
type TableEntry: TableEntry;
/// Process a single entry from the CSV file
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()>;
fn finish(&mut self) -> anyhow::Result<()> {
Ok(())
}
}
pub trait TableEntry: Serialize + DeserializeOwned + Debug + Clone {
fn fxy(&self) -> FXY;
}
#[derive(Serialize, Deserialize)]
struct MphMetadata {
mphf: Mphf<FXY>,
offsets: Vec<u64>,
}
struct BufrTableMph<T: TableEntryLoader> {
mphf: Mphf<FXY>,
offsets: Vec<u64>,
data_file: File,
_marker: std::marker::PhantomData<T>,
}
impl<T: TableEntryLoader> BufrTableMph<T> {
/// 构建 MPH 表
fn build(entries: &Vec<T::TableEntry>, output_path: &str) -> std::io::Result<Self> {
let keys: Vec<FXY> = entries.iter().map(|e| e.fxy()).collect();
let mphf = Mphf::new(2.0, &keys);
let mut sorted_entries: Vec<(usize, T::TableEntry)> = entries
.into_iter()
.map(|e| {
let hash = mphf.hash(&e.fxy()) as usize;
(hash, e)
})
.collect();
sorted_entries.sort_by_key(|(hash, _)| *hash);
let data_path = format!("{}.data", output_path);
let mut data_file = File::create(&data_path)?;
let mut offsets = Vec::with_capacity(sorted_entries.len());
for (_, entry) in sorted_entries {
let offset = data_file.stream_position()?;
offsets.push(offset);
// rkyv 序列化
let mut serializer = AllocSerializer::<256>::default();
serializer.serialize_value(&entry).unwrap();
let bytes = serializer.into_serializer().into_inner();
// 写入长度 + 数据
data_file.write_u32::<LittleEndian>(bytes.len() as u32)?;
data_file.write_all(&bytes)?;
}
// 5. 保存元数据
let meta_path = format!("{}.meta", output_path);
let meta = MphMetadata {
mphf: mphf.clone(),
offsets: offsets.clone(),
};
// 元数据仍然用 bincode 或者也可以用 rkyv
let mut meta_serializer = AllocSerializer::<4096>::default();
meta_serializer.serialize_value(&meta).unwrap();
let meta_bytes = meta_serializer.into_serializer().into_inner();
std::fs::write(meta_path, meta_bytes)?;
Ok(Self {
mphf,
offsets,
data_file: File::open(data_path)?,
_marker: std::marker::PhantomData,
})
}
/// 从磁盘加载
fn load(path: &str) -> std::io::Result<Self> {
let meta_bytes = std::fs::read(format!("{}.meta", path))?;
// rkyv 反序列化元数据(零拷贝)
let archived = unsafe { rkyv::archived_root::<MphMetadata>(&meta_bytes) };
// 如果需要拥有的版本,可以 deserialize
let meta: MphMetadata = archived.deserialize(&mut rkyv::Infallible).unwrap();
let data_file = File::open(format!("{}.data", path))?;
Ok(Self {
mphf: meta.mphf,
offsets: meta.offsets,
data_file,
_marker: std::marker::PhantomData,
})
}
/// 查找条目(零拷贝读取)
fn get(&mut self, fxy: FXY) -> std::io::Result<Option<T::TableEntry>> {
let hash = self.mphf.hash(&fxy) as usize;
let offset = match self.offsets.get(hash) {
Some(&off) => off,
None => return Ok(None),
};
// 读取数据
self.data_file.seek(SeekFrom::Start(offset))?;
let len = self.data_file.read_u32::<LittleEndian>()? as usize;
let mut buffer = vec![0u8; len];
self.data_file.read_exact(&mut buffer)?;
// rkyv 零拷贝访问
let archived = unsafe { rkyv::archived_root::<T::TableEntry>(&buffer) };
// 校验归档数据(可选,生产环境推荐)
#[cfg(feature = "validation")]
{
use rkyv::validation::validators::DefaultValidator;
rkyv::check_archived_root::<T::TableEntry>(&buffer)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
}
// 反序列化为拥有的类型
let entry: T::TableEntry = archived.deserialize(&mut rkyv::Infallible).unwrap();
if entry.fxy == fxy {
Ok(Some(entry))
} else {
Ok(None)
}
}
/// 零拷贝引用访问(更快!)
fn get_archived(&mut self, fxy: u32) -> std::io::Result<Option<Vec<u8>>> {
let hash = self.mphf.hash(&fxy) as usize;
let offset = match self.offsets.get(hash) {
Some(&off) => off,
None => return Ok(None),
};
self.data_file.seek(SeekFrom::Start(offset))?;
let len = self.data_file.read_u32::<LittleEndian>()? as usize;
let mut buffer = vec![0u8; len];
self.data_file.read_exact(&mut buffer)?;
Ok(Some(buffer))
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub struct FXY {
pub f: u16,
pub x: u16,
pub y: u16,
}
impl FXY {
pub fn from_str(fxy_str: &str) -> anyhow::Result<Self> {
// let bytes = fxy_str.as_bytes();
if fxy_str.len() != 6 {
return Err(anyhow::anyhow!("Invalid FXY string length: {}", fxy_str));
}
let f = fxy_str[0..2]
.parse::<u16>()
.with_context(|| format!("Failed to parse F from FXY: {}", fxy_str))?;
let x = fxy_str[2..4]
.parse::<u16>()
.with_context(|| format!("Failed to parse X from FXY: {}", fxy_str))?;
let y = fxy_str[4..6]
.parse::<u16>()
.with_context(|| format!("Failed to parse Y from FXY: {}", fxy_str))?;
Ok(FXY { f, x, y })
}
}
pub struct BUFRTableMPH {}
impl BUFRTableMPH {
pub fn build_from_csv<P: AsRef<Path>, T: TableEntryLoader>(
mut loader: T,
csv_path: P,
output_path: P,
) -> anyhow::Result<Self> {
TableLoader.load_table(csv_path, &mut loader)?;
loader.finish()?;
Ok(BUFRTableMPH {})
}
pub fn load_from_disk<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
// Placeholder for loading MPH table from disk
Ok(BUFRTableMPH {})
}
pub fn lookup(&self, fxy: FXY) -> anyhow::Result<Option<()>> {
// Placeholder for looking up an entry by FXY
Ok(None)
}
}
// #[derive(Serialize, Deserialize, Debug)]
// struct TableEntry {
// fxy: u32, // F-X-Y descriptor (e.g., 001001)
// name: String, // "WMO Block Number"
// unit: String, // "Numeric"
// scale: i8,
// reference: i32,
// data_width: u8,
// }
// struct BufrTableMph {
// mphf: Mphf<u32>, // The MPH function
// offsets: Vec<u64>, // Byte offsets into the data file
// data_file: File, // Memory-mapped or regular file
// }
// impl BufrTableMph {
// /// Build MPH table from entries
// fn build(entries: Vec<TableEntry>, output_path: &str) -> std::io::Result<Self> {
// // 1. Extract keys (FXY descriptors)
// let keys: Vec<u32> = entries.iter().map(|e| e.fxy).collect();
// // 2. Build MPH function with gamma=2.0 (space/time tradeoff)
// let mphf = Mphf::new(2.0, &keys);
// // 3. Create sorted entries by MPH hash
// let mut sorted_entries: Vec<(usize, TableEntry)> = entries
// .into_iter()
// .map(|e| {
// let hash = mphf.hash(&e.fxy) as usize;
// (hash, e)
// })
// .collect();
// sorted_entries.sort_by_key(|(hash, _)| *hash);
// // 4. Write binary data file and collect offsets
// let data_path = format!("{}.data", output_path);
// let mut data_file = File::create(&data_path)?;
// let mut offsets = Vec::with_capacity(sorted_entries.len());
// for (_, entry) in sorted_entries {
// let offset = data_file.stream_position()?;
// offsets.push(offset);
// // Write entry in compact binary format
// let serialized = bincode::serialize(&entry).unwrap();
// data_file.write_u32::<byteorder::LittleEndian>(serialized.len() as u32)?;
// data_file.write_all(&serialized)?;
// }
// // 5. Write MPH metadata
// let meta_path = format!("{}.meta", output_path);
// let meta = MphMetadata { mphf, offsets };
// let meta_bytes = bincode::serialize(&meta).unwrap();
// std::fs::write(meta_path, meta_bytes)?;
// Ok(Self {
// mphf: meta.mphf,
// offsets: meta.offsets,
// data_file: File::open(data_path)?,
// })
// }
// /// Load from disk
// fn load(path: &str) -> std::io::Result<Self> {
// let meta_bytes = std::fs::read(format!("{}.meta", path))?;
// let meta: MphMetadata = bincode::deserialize(&meta_bytes).unwrap();
// let data_file = File::open(format!("{}.data", path))?;
// Ok(Self {
// mphf: meta.mphf,
// offsets: meta.offsets,
// data_file,
// })
// }
// /// Lookup entry by FXY descriptor
// fn get(&mut self, fxy: u32) -> std::io::Result<Option<TableEntry>> {
// // 1. Hash the key to get index
// let hash = self.mphf.hash(&fxy) as usize;
// // 2. Get offset from array
// let offset = match self.offsets.get(hash) {
// Some(&off) => off,
// None => return Ok(None),
// };
// // 3. Seek and read
// self.data_file.seek(SeekFrom::Start(offset))?;
// let len = self.data_file.read_u32::<byteorder::LittleEndian>()? as usize;
// let mut buffer = vec![0u8; len];
// self.data_file.read_exact(&mut buffer)?;
// // 4. Deserialize
// let entry: TableEntry = bincode::deserialize(&buffer).unwrap();
// // Verify key matches (MPH guarantees unique index, but verify correctness)
// if entry.fxy == fxy {
// Ok(Some(entry))
// } else {
// Ok(None)
// }
// }
// }
// #[derive(Serialize, Deserialize)]
// struct MphMetadata {
// mphf: Mphf<u32>,
// offsets: Vec<u64>,
// }
// use byteorder::{ReadBytesExt, WriteBytesExt};

11
gen/src/utils.rs Normal file
View File

@ -0,0 +1,11 @@
pub(crate) fn fxy_str_to_u32(fxy: &str) -> Option<u32> {
let bytes = fxy.as_bytes();
if bytes.len() != 5 || bytes[2] != b'-' {
return None;
}
let class_no = (bytes[0] as char).to_digit(10)? * 10 + (bytes[1] as char).to_digit(10)?;
let element_no = (bytes[3] as char).to_digit(10)? * 100 + (bytes[4] as char).to_digit(10)? * 10;
Some(class_no * 1000 + element_no)
}

208
rbufr/Cargo.lock generated Normal file
View File

@ -0,0 +1,208 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "csv"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde_core",
]
[[package]]
name = "csv-core"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
dependencies = [
"memchr",
]
[[package]]
name = "encoding_rs"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
[[package]]
name = "flate2"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "gen"
version = "0.1.0"
[[package]]
name = "itoa"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "nom"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [
"memchr",
]
[[package]]
name = "proc-macro2"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rbufr"
version = "0.1.0"
dependencies = [
"csv",
"encoding_rs",
"flate2",
"nom",
"serde",
"thiserror",
]
[[package]]
name = "ryu"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "syn"
version = "2.0.111"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"

16
rbufr/Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "rbufr"
version = "0.1.0"
edition = "2024"
[lib]
name = "librbufr"
path = "src/lib.rs"
[dependencies]
csv = "1.4.0"
encoding_rs = "0.8.35"
flate2 = "1.1.5"
nom = "8.0.0"
serde = { version = "1.0.228", features = ["derive"] }
thiserror = "2.0.17"

3
rbufr/src/main.rs Normal file
View File

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}