This commit is contained in:
tsuki 2025-12-23 22:04:44 +08:00
parent fc44344a01
commit 3f2a2bcefc
126 changed files with 2767 additions and 431 deletions

1415
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,25 @@
[package] [package]
name = "gen" name = "gentools"
version = "0.1.0" version = "0.1.0"
edition = "2024" edition = "2024"
[lib]
name = "genlib"
path = "src/lib.rs"
[[bin]]
name = "gen-ctl"
path = "src/main.rs"
[dependencies] [dependencies]
anyhow = "1.0.100" anyhow = "1.0.100"
boomphf = { version = "0.6.0", features = ["serde"] } bincode = "1.3.3"
ph = "0.10.0"
byteorder = "1.5.0" byteorder = "1.5.0"
clap = "4.5.53" clap = { version = "4.5.53", features = ["derive"] }
csv = "1.4.0" csv = "1.4.0"
glob = "0.3"
memmap2 = "0.9"
rkyv = { version = "0.8.12" } rkyv = { version = "0.8.12" }
serde = { version = "1.0.228", features = ["derive"] } serde = { version = "1.0.228", features = ["derive"] }
binout = "0.3.1"

View File

@ -1,109 +0,0 @@
use crate::{FXY, TableEntryLoader};
use anyhow::{Context, Result};
use std::path::Path;
pub struct BTableCsvLoader {
entries: Vec<BTableEntry>,
}
#[derive(Debug, serde::Deserialize)]
pub struct RawBTableEntry {
#[serde(rename = "ClassNo")]
pub class_no: String,
#[serde(rename = "ClassName_en")]
pub class_name_en: String,
#[serde(rename = "FXY")]
pub fxy: String,
#[serde(rename = "ElementName_en")]
pub element_name_en: String,
#[serde(rename = "BUFR_Unit")]
pub bufr_unit: String,
#[serde(rename = "BUFR_Scale")]
pub bufr_scale: u32,
#[serde(rename = "BUFR_ReferenceValue")]
pub bufr_reference_value: u32,
#[serde(rename = "BUFR_DataWidth_Bits")]
pub bufr_datawidth_bits: u32,
#[serde(rename = "CREX_Unit")]
pub crex_unit: String,
#[serde(rename = "CREX_Scale")]
pub crex_scale: u32,
#[serde(rename = "CREX_DataWidth_Char")]
pub crex_datawidth_char: u32,
#[serde(rename = "Note_en")]
pub note_en: Option<String>,
#[serde(rename = "noteIDs")]
pub note_ids: Option<String>,
#[serde(rename = "Status")]
pub status: String,
}
pub struct BTableEntry {
fxy: FXY,
class_name_en: String,
element_name_en: String,
bufr_unit: String,
bufr_scale: u32,
bufr_reference_value: u32,
bufr_datawidth_bits: u32,
note_en: Option<String>,
note_ids: Option<String>,
status: String,
}
impl BTableCsvLoader {
pub fn new() -> Self {
BTableCsvLoader {
entries: Vec::new(),
}
}
pub fn from_wmo_csv<P: AsRef<Path>>(path: P) -> Result<Self> {
let mut loader = Self::new();
let table_loader = crate::TableLoader::new();
table_loader.load_table(path, &mut loader)?;
Ok(loader)
}
pub fn entries(&self) -> &[BTableEntry] {
&self.entries
}
}
impl TableEntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> {
// Parse FXY string (e.g., "001001") to u32
let fxy = FXY::from_str(&raw.fxy)?;
let entry = BTableEntry {
fxy,
class_name_en: raw.class_name_en,
element_name_en: raw.element_name_en,
bufr_unit: raw.bufr_unit,
bufr_scale: raw.bufr_scale,
bufr_reference_value: raw.bufr_reference_value,
bufr_datawidth_bits: raw.bufr_datawidth_bits,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.entries.push(entry);
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_btable_csv_loader() {
let _loader = BTableCsvLoader::from_wmo_csv(
"/Users/tsuki/projects/rbufr/BUFR4/BUFRCREX_TableB_en_00.csv",
)
.unwrap();
}
}

154
gen/src/fr/btable.rs Normal file
View File

@ -0,0 +1,154 @@
use crate::{FXY, TableEntry, TableEntryLoader};
use rkyv::Archive;
pub struct BTableCsvLoader;
#[derive(Debug)]
pub struct RawBTableEntry {
pub f: u16,
pub x: u16,
pub y: u16,
}
// Helper function to deserialize empty strings as None
fn deserialize_optional_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() { Ok(None) } else { Ok(Some(s)) }
}
// Helper function to deserialize empty strings as None for u32
fn deserialize_optional_u32<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() {
Ok(None)
} else {
s.parse::<u32>().map(Some).map_err(serde::de::Error::custom)
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct BTableEntry {
fxy: FXY,
class_name_en: String,
element_name_en: String,
bufr_unit: String,
bufr_scale: i32,
bufr_reference_value: i32,
bufr_datawidth_bits: u32,
note_en: Option<String>,
note_ids: Option<String>,
status: String,
}
impl BTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn class_name_en(&self) -> &str {
&self.class_name_en
}
pub fn element_name_en(&self) -> &str {
&self.element_name_en
}
pub fn bufr_unit(&self) -> &str {
&self.bufr_unit
}
pub fn bufr_scale(&self) -> i32 {
self.bufr_scale
}
pub fn bufr_reference_value(&self) -> i32 {
self.bufr_reference_value
}
pub fn bufr_datawidth_bits(&self) -> u32 {
self.bufr_datawidth_bits
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> Option<&str> {
self.note_ids.as_deref()
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for BTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let element_name = if self.element_name_en.len() > 40 {
format!("{}...", &self.element_name_en[..37])
} else {
self.element_name_en.clone()
};
let unit = if self.bufr_unit.len() > 15 {
format!("{}...", &self.bufr_unit[..12])
} else {
self.bufr_unit.clone()
};
write!(
f,
"{:02}{:02}{:03} | {:<40} | {:<15} | {:>5} | {:>8} | {:>8} | {}",
self.fxy.f,
self.fxy.x,
self.fxy.y,
element_name,
unit,
self.bufr_scale,
self.bufr_reference_value,
self.bufr_datawidth_bits,
self.status
)
}
}
impl TableEntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
type TableEntry = BTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::B;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>> {
// Parse FXY string (e.g., "001001") to u32
let fxy = FXY::from_str(&raw.fxy)?;
let entry = BTableEntry {
fxy,
class_name_en: raw.class_name_en,
element_name_en: raw.element_name_en,
bufr_unit: raw.bufr_unit,
bufr_scale: raw.bufr_scale,
bufr_reference_value: raw.bufr_reference_value,
bufr_datawidth_bits: raw.bufr_datawidth_bits,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
Ok(Some(entry))
}
}
impl TableEntry for BTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

151
gen/src/fr/dtable.rs Normal file
View File

@ -0,0 +1,151 @@
use super::TableEntryLoader;
use csv::StringRecord;
use rkyv::Archive;
#[derive(Debug, Clone, Default)]
pub struct DTableCsvLoader {
current_chain: Option<DTableEntry>,
}
impl TableEntryLoader for DTableCsvLoader {
type TableEntry = DTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::D;
fn process_entry(&mut self, raw: &StringRecord) -> anyhow::Result<Option<Self::TableEntry>> {
// Process the raw entry as needed
if self.current_chain.is_none() {
let entry = DTableEntry {
fxy: FXY::from_str(&raw.fxy1)?,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
category: raw.category,
category_of_sequences_en: raw.category_of_sequences_en,
title_en: raw.title_en,
subtitle_en: raw.subtitle_en,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.current_chain = Some(entry);
return Ok(None);
} else {
let fxy = FXY::from_str(&raw.fxy1)?;
if self.current_chain.as_ref().unwrap().fxy != fxy {
// First take out the old completed chain
let finished = self.current_chain.take();
// Then create and save the new chain
let entry = DTableEntry {
fxy,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
category: raw.category,
category_of_sequences_en: raw.category_of_sequences_en,
title_en: raw.title_en,
subtitle_en: raw.subtitle_en,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.current_chain = Some(entry);
// Return the old completed chain
return Ok(finished);
} else {
self.current_chain
.as_mut()
.unwrap()
.fxy_chain
.push(FXY::from_str(&raw.fxy2)?);
return Ok(None);
}
}
}
fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
Ok(self.current_chain.take())
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct DTableEntry {
fxy: FXY,
fxy_chain: Vec<FXY>,
category: String,
category_of_sequences_en: String,
title_en: Option<String>,
subtitle_en: Option<String>,
note_en: Option<String>,
note_ids: String,
status: String,
}
impl DTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn fxy_chain(&self) -> &[FXY] {
&self.fxy_chain
}
pub fn category(&self) -> &str {
&self.category
}
pub fn category_of_sequences_en(&self) -> &str {
&self.category_of_sequences_en
}
pub fn title_en(&self) -> Option<&str> {
self.title_en.as_deref()
}
pub fn subtitle_en(&self) -> Option<&str> {
self.subtitle_en.as_deref()
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> &str {
&self.note_ids
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for DTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let fxy_chain_str: String = self
.fxy_chain
.iter()
.map(|fxy| format!("{:02}{:02}{:03}", fxy.f, fxy.x, fxy.y))
.collect::<Vec<_>>()
.join(", ");
let title = self.title_en.as_deref().unwrap_or("N/A");
let truncated_title = if title.len() > 50 {
format!("{}...", &title[..47])
} else {
title.to_string()
};
write!(
f,
"{:02}{:02}{:03} | {:<50} | {:<12} | [{}]",
self.fxy.f, self.fxy.x, self.fxy.y, truncated_title, self.status, fxy_chain_str
)
}
}
impl TableEntry for DTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

61
gen/src/fr/mod.rs Normal file
View File

@ -0,0 +1,61 @@
use csv::{ReaderBuilder, StringRecord};
use std::path::Path;
pub mod btable;
pub mod dtable;
pub struct TableLoader;
impl TableLoader {
pub fn load_table<P: AsRef<Path>, T: TableEntryLoader>(
&self,
path: P,
loader: &mut T,
) -> anyhow::Result<Vec<T::TableEntry>> {
let mut entries = vec![];
let mut rdr = ReaderBuilder::new()
.has_headers(false)
.delimiter(b';')
.flexible(false) // Allow variable number of fields
.from_path(path.as_ref())?;
let mut line_num = 1; // Start at 1 for header
for result in rdr.records() {
line_num += 1;
match result {
Ok(record) => {
if let Some(processed_entry) = loader.process_entry(record)? {
entries.push(processed_entry);
}
}
Err(e) => {
// Log the error but continue processing
eprintln!(
"Warning: Skipping line {} in {}: {}",
line_num,
path.as_ref().display(),
e
);
}
}
}
if let Some(processed_entry) = loader.finish()? {
entries.push(processed_entry);
}
Ok(entries)
}
}
pub trait TableEntryLoader: Sized {
type TableEntry: TableEntry;
const TABLE_TYPE: TableType;
/// Process a single entry from the CSV file
fn process_entry(&mut self, raw: &StringRecord) -> anyhow::Result<Option<Self::TableEntry>>;
fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
Ok(None)
}
}

View File

@ -1,19 +1,20 @@
mod btable; // pub mod fr;
mod dtable; pub mod prelude;
mod utils; mod utils;
pub mod wmo;
use anyhow::Context; use anyhow::Context;
use boomphf::Mphf; use memmap2::Mmap;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use ph::fmph::GOFunction;
use rkyv::Archive; use rkyv::Archive;
use rkyv::ser::serializers::AllocSerializer; use rkyv::api::high::{HighDeserializer, HighSerializer};
use rkyv::rancor::Error;
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize}; use serde::{Deserialize as SerdeDeserialize, Serialize as SerdeSerialize};
use std::fmt::Debug; use std::fmt::Debug;
use std::fs::File; use std::io::Write;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::Path; use std::path::Path;
use csv::{DeserializeRecordsIter, ReaderBuilder}; use csv::ReaderBuilder;
pub struct TableLoader; pub struct TableLoader;
@ -22,187 +23,265 @@ impl TableLoader {
&self, &self,
path: P, path: P,
loader: &mut T, loader: &mut T,
) -> anyhow::Result<()> { ) -> anyhow::Result<Vec<T::TableEntry>> {
let mut entries = vec![];
let mut rdr = ReaderBuilder::new() let mut rdr = ReaderBuilder::new()
.has_headers(true) .has_headers(true)
.delimiter(b',') .delimiter(b',')
.from_path(path)?; .flexible(true) // Allow variable number of fields
.from_path(path.as_ref())?;
let mut line_num = 1; // Start at 1 for header
for result in rdr.deserialize() { for result in rdr.deserialize() {
let record: T::RawEntry = result?; line_num += 1;
loader.process_entry(record)?; match result {
Ok(record) => {
let record: T::RawEntry = record;
if let Some(processed_entry) = loader.process_entry(record)? {
entries.push(processed_entry);
}
}
Err(e) => {
// Log the error but continue processing
eprintln!(
"Warning: Skipping line {} in {}: {}",
line_num,
path.as_ref().display(),
e
);
}
}
} }
Ok(()) if let Some(processed_entry) = loader.finish()? {
entries.push(processed_entry);
}
Ok(entries)
} }
} }
pub trait TableEntryLoader: Sized { pub trait TableEntryLoader: Sized
where
Self::TableEntry: for<'a> rkyv::Serialize<
HighSerializer<rkyv::util::AlignedVec, rkyv::ser::allocator::ArenaHandle<'a>, Error>,
>,
<Self::TableEntry as Archive>::Archived:
rkyv::Deserialize<Self::TableEntry, HighDeserializer<Error>>,
{
/// The raw CSV entry type that will be deserialized /// The raw CSV entry type that will be deserialized
type RawEntry: for<'de> serde::Deserialize<'de> + Debug; type RawEntry: for<'de> serde::Deserialize<'de> + Debug;
type TableEntry: TableEntry; type TableEntry: TableEntry;
/// Process a single entry from the CSV file const TABLE_TYPE: TableType;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()>;
fn finish(&mut self) -> anyhow::Result<()> { /// Process a single entry from the CSV file
Ok(()) fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>>;
fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
Ok(None)
} }
} }
pub trait TableEntry: Serialize + DeserializeOwned + Debug + Clone { pub trait TableEntry: SerdeSerialize + DeserializeOwned + Debug + Clone + Archive {
fn fxy(&self) -> FXY; fn fxy(&self) -> FXY;
} }
#[derive(Serialize, Deserialize)]
struct MphMetadata {
mphf: Mphf<FXY>,
offsets: Vec<u64>,
}
struct BufrTableMph<T: TableEntryLoader> { struct BufrTableMph<T: TableEntryLoader> {
mphf: Mphf<FXY>, mphf: GOFunction,
offsets: Vec<u64>, offsets: Vec<u64>,
data_file: File, mmap: Mmap,
_marker: std::marker::PhantomData<T>, _marker: std::marker::PhantomData<T>,
} }
impl<T: TableEntryLoader> BufrTableMph<T> { struct BUFRTableM {
/// 构建 MPH 表 mphf: GOFunction,
fn build(entries: &Vec<T::TableEntry>, output_path: &str) -> std::io::Result<Self> { }
impl<T: TableEntryLoader> BufrTableMph<T>
where
for<'a> T::TableEntry: rkyv::Serialize<
HighSerializer<rkyv::util::AlignedVec, rkyv::ser::allocator::ArenaHandle<'a>, Error>,
>,
<T::TableEntry as Archive>::Archived: rkyv::Deserialize<T::TableEntry, HighDeserializer<Error>>,
{
fn build(entries: Vec<T::TableEntry>, output_path: &str) -> std::io::Result<Self> {
println!("Building MPH table with {} entries...", entries.len());
let keys: Vec<FXY> = entries.iter().map(|e| e.fxy()).collect(); let keys: Vec<FXY> = entries.iter().map(|e| e.fxy()).collect();
let mphf = GOFunction::from_slice(&keys);
let mphf = Mphf::new(2.0, &keys);
let mut sorted_entries: Vec<(usize, T::TableEntry)> = entries let mut sorted_entries: Vec<(usize, T::TableEntry)> = entries
.into_iter() .into_iter()
.map(|e| { .map(|e| (mphf.get(&(e.fxy())).unwrap() as usize, e))
let hash = mphf.hash(&e.fxy()) as usize;
(hash, e)
})
.collect(); .collect();
sorted_entries.sort_by_key(|(hash, _)| *hash); sorted_entries.sort_by_key(|(hash, _)| *hash);
let data_path = format!("{}.data", output_path); // Serialize MPHF to bytes
let mut data_file = File::create(&data_path)?; let mut mphf_bytes = Vec::new();
mphf.write(&mut mphf_bytes)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
// Prepare data entries
let mut data_bytes = Vec::new();
let mut offsets = Vec::with_capacity(sorted_entries.len()); let mut offsets = Vec::with_capacity(sorted_entries.len());
for (_, entry) in sorted_entries { for (_, entry) in sorted_entries {
let offset = data_file.stream_position()?; let offset = data_bytes.len() as u64;
offsets.push(offset); offsets.push(offset);
// rkyv 序列化 let bytes = rkyv::to_bytes::<Error>(&entry)
let mut serializer = AllocSerializer::<256>::default(); .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
serializer.serialize_value(&entry).unwrap();
let bytes = serializer.into_serializer().into_inner();
// 写入长度 + 数据 let len = bytes.len() as u32;
data_file.write_u32::<LittleEndian>(bytes.len() as u32)?; data_bytes.write_all(&len.to_le_bytes())?;
data_file.write_all(&bytes)?; data_bytes.write_all(&bytes)?;
} }
// 5. 保存元数据 // Serialize offsets as raw bytes
let meta_path = format!("{}.meta", output_path); let offsets_count = offsets.len() as u32;
let meta = MphMetadata { let mut offsets_bytes = Vec::with_capacity(offsets.len() * 8);
mphf: mphf.clone(), for offset in &offsets {
offsets: offsets.clone(), offsets_bytes.extend_from_slice(&offset.to_le_bytes());
}; }
// 元数据仍然用 bincode 或者也可以用 rkyv // Write merged file
let mut meta_serializer = AllocSerializer::<4096>::default(); let merged_path = format!("{}.bufrtbl", output_path);
meta_serializer.serialize_value(&meta).unwrap(); let mut merged_file = std::fs::File::create(&merged_path)?;
let meta_bytes = meta_serializer.into_serializer().into_inner();
std::fs::write(meta_path, meta_bytes)?; // Write header: mphf_size (u32) + offsets_count (u32)
merged_file.write_all(&(mphf_bytes.len() as u32).to_le_bytes())?;
merged_file.write_all(&offsets_count.to_le_bytes())?;
// Write MPHF data
merged_file.write_all(&mphf_bytes)?;
// Write offsets (already 8-byte aligned since each offset is u64)
merged_file.write_all(&offsets_bytes)?;
// Write table entries data
merged_file.write_all(&data_bytes)?;
println!("Built successfully!");
println!(
" Merged file: {} bytes",
std::fs::metadata(&merged_path)?.len()
);
Self::load(output_path)
}
fn load<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
let mut path = path.as_ref().to_path_buf();
path.set_extension("bufrtbl");
let merged_file = std::fs::File::open(&path)?;
let mmap = unsafe { Mmap::map(&merged_file)? };
// Read header
let mphf_size = u32::from_le_bytes(mmap[0..4].try_into().unwrap()) as usize;
let offsets_count = u32::from_le_bytes(mmap[4..8].try_into().unwrap()) as usize;
// Calculate section positions
let mphf_start = 8;
let mphf_end = mphf_start + mphf_size;
let offsets_start = mphf_end;
let offsets_size = offsets_count * 8; // each offset is u64 (8 bytes)
let offsets_end = offsets_start + offsets_size;
let data_start = offsets_end;
// Load MPHF
let mphf_bytes = &mmap[mphf_start..mphf_end];
let mphf: GOFunction = GOFunction::read(&mut &mphf_bytes[..])
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
// Load offsets
let offsets_bytes = &mmap[offsets_start..offsets_end];
let mut offsets = Vec::with_capacity(offsets_count);
for i in 0..offsets_count {
let offset_bytes = &offsets_bytes[i * 8..(i + 1) * 8];
let offset = u64::from_le_bytes(offset_bytes.try_into().unwrap());
offsets.push(offset + data_start as u64); // Adjust offset for data section
}
Ok(Self { Ok(Self {
mphf, mphf,
offsets, offsets,
data_file: File::open(data_path)?, mmap,
_marker: std::marker::PhantomData, _marker: std::marker::PhantomData,
}) })
} }
/// 从磁盘加载 /// 获取拥有的版本
fn load(path: &str) -> std::io::Result<Self> { fn get(&self, fxy: FXY) -> Option<T::TableEntry> {
let meta_bytes = std::fs::read(format!("{}.meta", path))?; let hash = self.mphf.get(&fxy)? as usize;
let offset = *self.offsets.get(hash)? as usize;
// rkyv 反序列化元数据(零拷贝) let len_bytes = self.mmap.get(offset..offset + 4)?;
let archived = unsafe { rkyv::archived_root::<MphMetadata>(&meta_bytes) }; let len = u32::from_le_bytes(len_bytes.try_into().ok()?) as usize;
// 如果需要拥有的版本,可以 deserialize let data = self.mmap.get(offset + 4..offset + 4 + len)?;
let meta: MphMetadata = archived.deserialize(&mut rkyv::Infallible).unwrap();
let data_file = File::open(format!("{}.data", path))?; let archived =
unsafe { rkyv::access_unchecked::<<T::TableEntry as Archive>::Archived>(data) };
Ok(Self { rkyv::deserialize::<T::TableEntry, Error>(archived).ok()
mphf: meta.mphf,
offsets: meta.offsets,
data_file,
_marker: std::marker::PhantomData,
})
} }
/// 查找条目(零拷贝读取) /// 获取所有条目
fn get(&mut self, fxy: FXY) -> std::io::Result<Option<T::TableEntry>> { fn get_all(&self) -> Vec<T::TableEntry> {
let hash = self.mphf.hash(&fxy) as usize; let mut entries = Vec::new();
let offset = match self.offsets.get(hash) { for offset in &self.offsets {
Some(&off) => off, let offset = *offset as usize;
None => return Ok(None), if let Some(len_bytes) = self.mmap.get(offset..offset + 4) {
}; if let Ok(len_bytes_array) = len_bytes.try_into() {
let len = u32::from_le_bytes(len_bytes_array) as usize;
// 读取数据 if let Some(data) = self.mmap.get(offset + 4..offset + 4 + len) {
self.data_file.seek(SeekFrom::Start(offset))?; let archived = unsafe {
let len = self.data_file.read_u32::<LittleEndian>()? as usize; rkyv::access_unchecked::<<T::TableEntry as Archive>::Archived>(data)
let mut buffer = vec![0u8; len]; };
self.data_file.read_exact(&mut buffer)?; if let Ok(entry) = rkyv::deserialize::<T::TableEntry, Error>(archived) {
entries.push(entry);
// rkyv 零拷贝访问 }
let archived = unsafe { rkyv::archived_root::<T::TableEntry>(&buffer) }; }
}
// 校验归档数据(可选,生产环境推荐) }
#[cfg(feature = "validation")]
{
use rkyv::validation::validators::DefaultValidator;
rkyv::check_archived_root::<T::TableEntry>(&buffer)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
} }
entries
// 反序列化为拥有的类型
let entry: T::TableEntry = archived.deserialize(&mut rkyv::Infallible).unwrap();
if entry.fxy == fxy {
Ok(Some(entry))
} else {
Ok(None)
}
}
/// 零拷贝引用访问(更快!)
fn get_archived(&mut self, fxy: u32) -> std::io::Result<Option<Vec<u8>>> {
let hash = self.mphf.hash(&fxy) as usize;
let offset = match self.offsets.get(hash) {
Some(&off) => off,
None => return Ok(None),
};
self.data_file.seek(SeekFrom::Start(offset))?;
let len = self.data_file.read_u32::<LittleEndian>()? as usize;
let mut buffer = vec![0u8; len];
self.data_file.read_exact(&mut buffer)?;
Ok(Some(buffer))
} }
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)] #[derive(
Archive,
SerdeSerialize,
SerdeDeserialize,
rkyv::Serialize,
rkyv::Deserialize,
Debug,
PartialEq,
Eq,
Clone,
Copy,
std::hash::Hash,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct FXY { pub struct FXY {
pub f: u16, pub f: u16,
pub x: u16, pub x: u16,
pub y: u16, pub y: u16,
} }
// // Custom Hash implementation to work around boomphf's overflow bug
// // We implement Hash by converting to u32 first
// impl std::hash::Hash for FXY {
// fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
// // Convert FXY to a simple u32 value to avoid complex hashing
// self.to_u32().hash(state);
// }
// }
impl FXY { impl FXY {
pub fn new(f: u16, x: u16, y: u16) -> Self {
FXY { f, x, y }
}
pub fn from_str(fxy_str: &str) -> anyhow::Result<Self> { pub fn from_str(fxy_str: &str) -> anyhow::Result<Self> {
// let bytes = fxy_str.as_bytes(); // let bytes = fxy_str.as_bytes();
@ -223,142 +302,72 @@ impl FXY {
Ok(FXY { f, x, y }) Ok(FXY { f, x, y })
} }
/// Convert FXY to u32 for use as hash key
/// Format: F (2 bits) | X (6 bits) | Y (8 bits) = 16 bits total
pub fn to_u32(&self) -> u32 {
((self.f as u32) << 14) | ((self.x as u32) << 8) | (self.y as u32)
}
/// Convert u32 back to FXY
pub fn from_u32(value: u32) -> Self {
FXY {
f: ((value >> 14) & 0x3) as u16,
x: ((value >> 8) & 0x3F) as u16,
y: (value & 0xFF) as u16,
}
}
} }
pub struct BUFRTableMPH {} pub struct BUFRTableMPH<T: TableEntryLoader> {
inner: BufrTableMph<T>,
}
impl BUFRTableMPH { impl<T: TableEntryLoader> BUFRTableMPH<T> {
pub fn build_from_csv<P: AsRef<Path>, T: TableEntryLoader>( pub fn build_from_csv<P: AsRef<Path>>(
mut loader: T, mut loader: T,
csv_path: P, csv_path: P,
output_path: P, output_path: P,
) -> anyhow::Result<Self> { ) -> anyhow::Result<Self> {
TableLoader.load_table(csv_path, &mut loader)?; let entries = TableLoader.load_table(csv_path, &mut loader)?;
loader.finish()?; let bhm = BufrTableMph::<T>::build(entries, output_path.as_ref().to_str().unwrap())?;
Ok(BUFRTableMPH {}) Ok(BUFRTableMPH { inner: bhm })
} }
pub fn load_from_disk<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> { pub fn load_from_disk<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
// Placeholder for loading MPH table from disk let bhm: BufrTableMph<T> = BufrTableMph::load(path)?;
Ok(BUFRTableMPH {}) Ok(BUFRTableMPH { inner: bhm })
} }
pub fn lookup(&self, fxy: FXY) -> anyhow::Result<Option<()>> { pub fn lookup(&self, fxy: FXY) -> anyhow::Result<Option<T::TableEntry>> {
// Placeholder for looking up an entry by FXY Ok(self.inner.get(fxy))
Ok(None) }
pub fn get_all_entries(&self) -> Vec<T::TableEntry> {
self.inner.get_all()
} }
} }
// #[derive(Serialize, Deserialize, Debug)] #[repr(C)]
// struct TableEntry { #[derive(Debug, Clone, Copy, PartialEq, Eq)]
// fxy: u32, // F-X-Y descriptor (e.g., 001001) pub enum TableType {
// name: String, // "WMO Block Number" B,
// unit: String, // "Numeric" D,
// scale: i8, }
// reference: i32,
// data_width: u8,
// }
// struct BufrTableMph { #[cfg(test)]
// mphf: Mphf<u32>, // The MPH function mod test {
// offsets: Vec<u64>, // Byte offsets into the data file use crate::{BUFRTableMPH, dtable::DTableCsvLoader};
// data_file: File, // Memory-mapped or regular file
// }
// impl BufrTableMph { #[test]
// /// Build MPH table from entries fn test() {
// fn build(entries: Vec<TableEntry>, output_path: &str) -> std::io::Result<Self> { let d_loader = DTableCsvLoader::default();
// // 1. Extract keys (FXY descriptors) BUFRTableMPH::build_from_csv(
// let keys: Vec<u32> = entries.iter().map(|e| e.fxy).collect(); d_loader,
"/Users/xiang.li1/projects/rbufr/BUFR4/BUFR_TableD_en_00.csv",
// // 2. Build MPH function with gamma=2.0 (space/time tradeoff) "test_table_d",
// let mphf = Mphf::new(2.0, &keys); )
.unwrap();
// // 3. Create sorted entries by MPH hash }
// let mut sorted_entries: Vec<(usize, TableEntry)> = entries }
// .into_iter()
// .map(|e| {
// let hash = mphf.hash(&e.fxy) as usize;
// (hash, e)
// })
// .collect();
// sorted_entries.sort_by_key(|(hash, _)| *hash);
// // 4. Write binary data file and collect offsets
// let data_path = format!("{}.data", output_path);
// let mut data_file = File::create(&data_path)?;
// let mut offsets = Vec::with_capacity(sorted_entries.len());
// for (_, entry) in sorted_entries {
// let offset = data_file.stream_position()?;
// offsets.push(offset);
// // Write entry in compact binary format
// let serialized = bincode::serialize(&entry).unwrap();
// data_file.write_u32::<byteorder::LittleEndian>(serialized.len() as u32)?;
// data_file.write_all(&serialized)?;
// }
// // 5. Write MPH metadata
// let meta_path = format!("{}.meta", output_path);
// let meta = MphMetadata { mphf, offsets };
// let meta_bytes = bincode::serialize(&meta).unwrap();
// std::fs::write(meta_path, meta_bytes)?;
// Ok(Self {
// mphf: meta.mphf,
// offsets: meta.offsets,
// data_file: File::open(data_path)?,
// })
// }
// /// Load from disk
// fn load(path: &str) -> std::io::Result<Self> {
// let meta_bytes = std::fs::read(format!("{}.meta", path))?;
// let meta: MphMetadata = bincode::deserialize(&meta_bytes).unwrap();
// let data_file = File::open(format!("{}.data", path))?;
// Ok(Self {
// mphf: meta.mphf,
// offsets: meta.offsets,
// data_file,
// })
// }
// /// Lookup entry by FXY descriptor
// fn get(&mut self, fxy: u32) -> std::io::Result<Option<TableEntry>> {
// // 1. Hash the key to get index
// let hash = self.mphf.hash(&fxy) as usize;
// // 2. Get offset from array
// let offset = match self.offsets.get(hash) {
// Some(&off) => off,
// None => return Ok(None),
// };
// // 3. Seek and read
// self.data_file.seek(SeekFrom::Start(offset))?;
// let len = self.data_file.read_u32::<byteorder::LittleEndian>()? as usize;
// let mut buffer = vec![0u8; len];
// self.data_file.read_exact(&mut buffer)?;
// // 4. Deserialize
// let entry: TableEntry = bincode::deserialize(&buffer).unwrap();
// // Verify key matches (MPH guarantees unique index, but verify correctness)
// if entry.fxy == fxy {
// Ok(Some(entry))
// } else {
// Ok(None)
// }
// }
// }
// #[derive(Serialize, Deserialize)]
// struct MphMetadata {
// mphf: Mphf<u32>,
// offsets: Vec<u64>,
// }
// use byteorder::{ReadBytesExt, WriteBytesExt};

View File

@ -1,3 +1,289 @@
fn main() { use anyhow::{Context, Result};
println!("Hello, world!"); use clap::{Parser, Subcommand};
use genlib::{BUFRTableMPH, btable::BTableCsvLoader, dtable::DTableCsvLoader};
use std::path::{Path, PathBuf};
#[derive(Parser)]
#[command(name = "gen-ctl")]
#[command(about = "BUFR Table conversion tool", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Scan a directory and convert all BUFR tables to MPH format
Scan {
/// Input directory containing BUFR CSV files
#[arg(short, long)]
input: PathBuf,
/// Output directory for generated .bufrtbl files
#[arg(short, long)]
output: PathBuf,
/// Table type to process: "d", "b", or "all"
#[arg(short, long, default_value = "all")]
table_type: String,
},
/// Convert a single BUFR table file
Convert {
/// Input CSV file
#[arg(short, long)]
input: PathBuf,
/// Output path (without extension)
#[arg(short, long)]
output: PathBuf,
/// Table type: "d" for Table D, "b" for Table B
#[arg(short, long)]
table_type: String,
},
/// Print a BUFR table in formatted output
Print {
/// Path to .bufrtbl file (without extension)
#[arg(short, long)]
input: PathBuf,
/// Table type: "d" for Table D, "b" for Table B
#[arg(short, long)]
table_type: String,
/// Maximum number of entries to print (optional)
#[arg(short, long)]
limit: Option<usize>,
},
}
fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command {
Commands::Scan {
input,
output,
table_type,
} => {
scan_and_convert(&input, &output, &table_type)?;
}
Commands::Convert {
input,
output,
table_type,
} => {
convert_single_file(&input, &output, &table_type)?;
}
Commands::Print {
input,
table_type,
limit,
} => {
print_table(&input, &table_type, limit)?;
}
}
Ok(())
}
fn scan_and_convert(input_dir: &Path, output_dir: &Path, table_type: &str) -> Result<()> {
// Create output directory if it doesn't exist
std::fs::create_dir_all(output_dir).context("Failed to create output directory")?;
println!("Scanning directory: {}", input_dir.display());
println!("Output directory: {}", output_dir.display());
println!("Table type: {}", table_type);
println!();
let mut processed_count = 0;
let mut error_count = 0;
// Scan for BUFR_TableD files
if table_type == "d" || table_type == "all" {
println!("Processing Table D files...");
let pattern = input_dir.join("BUFR_TableD_*.csv");
for entry in glob::glob(pattern.to_str().unwrap()).context("Failed to read glob pattern")? {
match entry {
Ok(path) => {
let mut filename = path.file_stem().unwrap().to_str().unwrap().to_string();
if filename.contains("BUFRCREX") {
filename = filename.replace("BUFRCREX_", "BUFR_");
}
let output_path = output_dir.join(filename);
print!(" Converting {} ... ", path.display());
match convert_table_d(&path, &output_path) {
Ok(_) => {
println!("OK");
processed_count += 1;
}
Err(e) => {
println!("ERROR: {}", e);
error_count += 1;
}
}
}
Err(e) => {
println!("Error reading file: {}", e);
error_count += 1;
}
}
}
println!();
}
// Scan for BUFRCREX_TableB files
if table_type == "b" || table_type == "all" {
println!("Processing Table B files...");
let pattern = input_dir.join("BUFRCREX_TableB_*.csv");
for entry in glob::glob(pattern.to_str().unwrap()).context("Failed to read glob pattern")? {
match entry {
Ok(path) => {
let mut filename = path.file_stem().unwrap().to_str().unwrap().to_string();
if filename.contains("BUFRCREX") {
filename = filename.replace("BUFRCREX_", "BUFR_");
}
let output_path = output_dir.join(filename);
print!(" Converting {} ... ", path.display());
match convert_table_b(&path, &output_path) {
Ok(_) => {
println!("OK");
processed_count += 1;
}
Err(e) => {
eprintln!("ERROR: {}", e);
error_count += 1;
}
}
}
Err(e) => {
eprintln!("Error reading file: {}", e);
error_count += 1;
}
}
}
println!();
}
println!("Summary:");
println!(" Successfully processed: {}", processed_count);
println!(" Errors: {}", error_count);
if error_count > 0 {
anyhow::bail!("Conversion completed with {} errors", error_count);
}
Ok(())
}
fn convert_single_file(input_path: &Path, output_path: &Path, table_type: &str) -> Result<()> {
println!(
"Converting {} to {}",
input_path.display(),
output_path.display()
);
match table_type.to_lowercase().as_str() {
"d" => convert_table_d(input_path, output_path)?,
"b" => convert_table_b(input_path, output_path)?,
_ => anyhow::bail!("Invalid table type: {}. Use 'd' or 'b'", table_type),
}
println!("Conversion completed successfully!");
Ok(())
}
fn convert_table_d(input_path: &Path, output_path: &Path) -> Result<()> {
let loader = DTableCsvLoader::default();
BUFRTableMPH::build_from_csv(loader, input_path, output_path)?;
Ok(())
}
fn convert_table_b(input_path: &Path, output_path: &Path) -> Result<()> {
let loader = BTableCsvLoader;
BUFRTableMPH::build_from_csv(loader, input_path, output_path)?;
Ok(())
}
fn print_table(input_path: &Path, table_type: &str, limit: Option<usize>) -> Result<()> {
match table_type.to_lowercase().as_str() {
"d" => print_table_d(input_path, limit)?,
"b" => print_table_b(input_path, limit)?,
_ => anyhow::bail!("Invalid table type: {}. Use 'd' or 'b'", table_type),
}
Ok(())
}
fn print_table_d(input_path: &Path, limit: Option<usize>) -> Result<()> {
use genlib::dtable::DTableEntry;
println!("Loading Table D from: {}", input_path.display());
let table: BUFRTableMPH<DTableCsvLoader> = BUFRTableMPH::load_from_disk(input_path)?;
let entries = table.get_all_entries();
println!("\nTable D Entries (Total: {})", entries.len());
println!("{}", "=".repeat(140));
println!(
"{:<7} | {:<50} | {:<12} | {}",
"FXY", "Title", "Status", "FXY Chain"
);
println!("{}", "-".repeat(140));
let display_entries = if let Some(max) = limit {
&entries[..entries.len().min(max)]
} else {
&entries[..]
};
for entry in display_entries {
println!("{}", entry);
}
if let Some(max) = limit {
if entries.len() > max {
println!("\n... ({} more entries omitted)", entries.len() - max);
}
}
Ok(())
}
fn print_table_b(input_path: &Path, limit: Option<usize>) -> Result<()> {
use genlib::btable::BTableEntry;
println!("Loading Table B from: {}", input_path.display());
let table: BUFRTableMPH<BTableCsvLoader> = BUFRTableMPH::load_from_disk(input_path)?;
let entries = table.get_all_entries();
println!("\nTable B Entries (Total: {})", entries.len());
println!("{}", "=".repeat(120));
println!(
"{:<7} | {:<40} | {:<15} | {:<5} | {:<8} | {:<8} | {}",
"FXY", "Element Name", "Unit", "Scale", "Ref Val", "Width", "Status"
);
println!("{}", "-".repeat(120));
let display_entries = if let Some(max) = limit {
&entries[..entries.len().min(max)]
} else {
&entries[..]
};
for entry in display_entries {
println!("{}", entry);
}
if let Some(max) = limit {
if entries.len() > max {
println!("\n... ({} more entries omitted)", entries.len() - max);
}
}
Ok(())
} }

6
gen/src/prelude.rs Normal file
View File

@ -0,0 +1,6 @@
pub use crate::wmo;
// pub type BUFRTableD = crate::BUFRTableMPH<DTableCsvLoader>;
// pub type BUFRTableB = crate::BUFRTableMPH<crate::btable::BTableCsvLoader>;
pub use crate::BUFRTableMPH;
pub use crate::FXY;
pub use crate::TableType;

179
gen/src/wmo/btable.rs Normal file
View File

@ -0,0 +1,179 @@
use crate::{FXY, TableEntry, TableEntryLoader};
use rkyv::Archive;
pub struct BTableCsvLoader;
#[derive(Debug, serde::Deserialize)]
pub struct RawBTableEntry {
#[serde(rename = "ClassNo")]
pub class_no: String,
#[serde(rename = "ClassName_en")]
pub class_name_en: String,
#[serde(rename = "FXY")]
pub fxy: String,
#[serde(rename = "ElementName_en")]
pub element_name_en: String,
#[serde(rename = "BUFR_Unit")]
pub bufr_unit: String,
#[serde(rename = "BUFR_Scale")]
pub bufr_scale: i32,
#[serde(rename = "BUFR_ReferenceValue")]
pub bufr_reference_value: i32,
#[serde(rename = "BUFR_DataWidth_Bits")]
pub bufr_datawidth_bits: u32,
#[serde(rename = "CREX_Unit")]
pub crex_unit: Option<String>,
#[serde(rename = "CREX_Scale")]
pub crex_scale: Option<i32>,
#[serde(rename = "CREX_DataWidth_Char")]
pub crex_datawidth_char: Option<u32>,
#[serde(rename = "Note_en")]
pub note_en: Option<String>,
#[serde(rename = "noteIDs")]
pub note_ids: Option<String>,
#[serde(rename = "Status")]
pub status: String,
}
// Helper function to deserialize empty strings as None
fn deserialize_optional_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() { Ok(None) } else { Ok(Some(s)) }
}
// Helper function to deserialize empty strings as None for u32
fn deserialize_optional_u32<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() {
Ok(None)
} else {
s.parse::<u32>().map(Some).map_err(serde::de::Error::custom)
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct BTableEntry {
fxy: FXY,
class_name_en: String,
element_name_en: String,
bufr_unit: String,
bufr_scale: i32,
bufr_reference_value: i32,
bufr_datawidth_bits: u32,
note_en: Option<String>,
note_ids: Option<String>,
status: String,
}
impl BTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn class_name_en(&self) -> &str {
&self.class_name_en
}
pub fn element_name_en(&self) -> &str {
&self.element_name_en
}
pub fn bufr_unit(&self) -> &str {
&self.bufr_unit
}
pub fn bufr_scale(&self) -> i32 {
self.bufr_scale
}
pub fn bufr_reference_value(&self) -> i32 {
self.bufr_reference_value
}
pub fn bufr_datawidth_bits(&self) -> u32 {
self.bufr_datawidth_bits
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> Option<&str> {
self.note_ids.as_deref()
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for BTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let element_name = if self.element_name_en.len() > 40 {
format!("{}...", &self.element_name_en[..37])
} else {
self.element_name_en.clone()
};
let unit = if self.bufr_unit.len() > 15 {
format!("{}...", &self.bufr_unit[..12])
} else {
self.bufr_unit.clone()
};
write!(
f,
"{:02}{:02}{:03} | {:<40} | {:<15} | {:>5} | {:>8} | {:>8} | {}",
self.fxy.f,
self.fxy.x,
self.fxy.y,
element_name,
unit,
self.bufr_scale,
self.bufr_reference_value,
self.bufr_datawidth_bits,
self.status
)
}
}
impl TableEntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
type TableEntry = BTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::B;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>> {
// Parse FXY string (e.g., "001001") to u32
let fxy = FXY::from_str(&raw.fxy)?;
let entry = BTableEntry {
fxy,
class_name_en: raw.class_name_en,
element_name_en: raw.element_name_en,
bufr_unit: raw.bufr_unit,
bufr_scale: raw.bufr_scale,
bufr_reference_value: raw.bufr_reference_value,
bufr_datawidth_bits: raw.bufr_datawidth_bits,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
Ok(Some(entry))
}
}
impl TableEntry for BTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

View File

@ -1,11 +1,11 @@
use crate::{FXY, TableEntryLoader}; use crate::{FXY, TableEntry, TableEntryLoader};
use rkyv::Archive;
#[derive(Debug, Clone, Default)]
pub struct DTableCsvLoader { pub struct DTableCsvLoader {
current_chain: Option<DTableEntry>, current_chain: Option<DTableEntry>,
entries: Vec<DTableEntry>,
} }
// Category,CategoryOfSequences_en,FXY1,Title_en,SubTitle_en,FXY2,ElementName_en,ElementDescription_en,Note_en,noteIDs,Status
#[derive(Debug, serde::Deserialize)] #[derive(Debug, serde::Deserialize)]
pub struct RawDTableEntry { pub struct RawDTableEntry {
#[serde(rename = "Category")] #[serde(rename = "Category")]
@ -21,9 +21,9 @@ pub struct RawDTableEntry {
#[serde(rename = "FXY2")] #[serde(rename = "FXY2")]
pub fxy2: String, pub fxy2: String,
#[serde(rename = "ElementName_en")] #[serde(rename = "ElementName_en")]
pub element_name_en: Option<String>, pub _element_name_en: Option<String>,
#[serde(rename = "ElementDescription_en")] #[serde(rename = "ElementDescription_en")]
pub element_description_en: Option<String>, pub _element_description_en: Option<String>,
#[serde(rename = "Note_en")] #[serde(rename = "Note_en")]
pub note_en: Option<String>, pub note_en: Option<String>,
#[serde(rename = "noteIDs")] #[serde(rename = "noteIDs")]
@ -34,8 +34,10 @@ pub struct RawDTableEntry {
impl TableEntryLoader for DTableCsvLoader { impl TableEntryLoader for DTableCsvLoader {
type RawEntry = RawDTableEntry; type RawEntry = RawDTableEntry;
type TableEntry = DTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::D;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> { fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>> {
// Process the raw entry as needed // Process the raw entry as needed
if self.current_chain.is_none() { if self.current_chain.is_none() {
let entry = DTableEntry { let entry = DTableEntry {
@ -50,10 +52,14 @@ impl TableEntryLoader for DTableCsvLoader {
status: raw.status, status: raw.status,
}; };
self.current_chain = Some(entry); self.current_chain = Some(entry);
return Ok(None);
} else { } else {
let fxy = FXY::from_str(&raw.fxy1)?; let fxy = FXY::from_str(&raw.fxy1)?;
if self.current_chain.as_ref().unwrap().fxy != fxy { if self.current_chain.as_ref().unwrap().fxy != fxy {
self.entries.push(self.current_chain.take().unwrap()); // First take out the old completed chain
let finished = self.current_chain.take();
// Then create and save the new chain
let entry = DTableEntry { let entry = DTableEntry {
fxy, fxy,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?], fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
@ -66,25 +72,30 @@ impl TableEntryLoader for DTableCsvLoader {
status: raw.status, status: raw.status,
}; };
self.current_chain = Some(entry); self.current_chain = Some(entry);
// Return the old completed chain
return Ok(finished);
} else { } else {
self.current_chain self.current_chain
.as_mut() .as_mut()
.unwrap() .unwrap()
.fxy_chain .fxy_chain
.push(FXY::from_str(&raw.fxy2)?); .push(FXY::from_str(&raw.fxy2)?);
return Ok(None);
} }
} }
Ok(())
} }
fn finish(&mut self) -> anyhow::Result<()> { fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
if let Some(entry) = self.current_chain.take() { Ok(self.current_chain.take())
self.entries.push(entry);
}
Ok(())
} }
} }
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct DTableEntry { pub struct DTableEntry {
fxy: FXY, fxy: FXY,
fxy_chain: Vec<FXY>, fxy_chain: Vec<FXY>,
@ -96,3 +107,71 @@ pub struct DTableEntry {
note_ids: String, note_ids: String,
status: String, status: String,
} }
impl DTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn fxy_chain(&self) -> &[FXY] {
&self.fxy_chain
}
pub fn category(&self) -> &str {
&self.category
}
pub fn category_of_sequences_en(&self) -> &str {
&self.category_of_sequences_en
}
pub fn title_en(&self) -> Option<&str> {
self.title_en.as_deref()
}
pub fn subtitle_en(&self) -> Option<&str> {
self.subtitle_en.as_deref()
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> &str {
&self.note_ids
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for DTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let fxy_chain_str: String = self
.fxy_chain
.iter()
.map(|fxy| format!("{:02}{:02}{:03}", fxy.f, fxy.x, fxy.y))
.collect::<Vec<_>>()
.join(", ");
let title = self.title_en.as_deref().unwrap_or("N/A");
let truncated_title = if title.len() > 50 {
format!("{}...", &title[..47])
} else {
title.to_string()
};
write!(
f,
"{:02}{:02}{:03} | {:<50} | {:<12} | [{}]",
self.fxy.f, self.fxy.x, self.fxy.y, truncated_title, self.status, fxy_chain_str
)
}
}
impl TableEntry for DTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

2
gen/src/wmo/mod.rs Normal file
View File

@ -0,0 +1,2 @@
pub mod btable;
pub mod dtable;

View File

@ -14,3 +14,5 @@ flate2 = "1.1.5"
nom = "8.0.0" nom = "8.0.0"
serde = { version = "1.0.228", features = ["derive"] } serde = { version = "1.0.228", features = ["derive"] }
thiserror = "2.0.17" thiserror = "2.0.17"
gentools = { path = "../gen" }
anyhow = "1.0.100"

View File

@ -1,5 +1,8 @@
use genlib::{BUFRTableMPH, TableEntryLoader};
use crate::errors::Result; use crate::errors::Result;
use crate::{discriptor_table::*, structs::versions::BUFRMessage}; use crate::structs::versions::BUFRMessage;
use crate::tables::*;
pub struct MessageBlock { pub struct MessageBlock {
message: BUFRMessage, message: BUFRMessage,
@ -18,32 +21,47 @@ impl MessageBlock {
pub fn load_data(&self) -> Result<()> { pub fn load_data(&self) -> Result<()> {
let table_info = self.message.table_info(); let table_info = self.message.table_info();
let master_table_version = table_info.master_table_version;
let mut b_table_loader = TableLoader::<BTable>::new(); let master_b: BUFRTableB = self.load_first_validable_table(master_table_version)?;
let master_b_sequences = let master_d: BUFRTableD = self.load_first_validable_table(master_table_version)?;
b_table_loader.load_table(TT::Standard, table_info.master_table_version)?;
let mut d_table_loader = TableLoader::<DTable>::new();
let master_d_sequences =
d_table_loader.load_table(TT::Standard, table_info.master_table_version)?;
let local_table_version = table_info.local_table_version as u32; let local_table_version = table_info.local_table_version as u32;
if local_table_version > 0 { if local_table_version > 0 {
let local_b_sequences = b_table_loader.load_table( let local_b: BUFRTableB = TableLoader.load_table(LocalTable::new(
TT::Localized(local_table_version), Some(table_info.subcenter_id),
table_info.local_table_version, table_info.local_table_version,
)?; ))?;
let local_d_sequences = d_table_loader.load_table( let local_d: BUFRTableD = TableLoader.load_table(LocalTable::new(
TT::Localized(local_table_version), Some(table_info.subcenter_id),
table_info.local_table_version, table_info.local_table_version,
)?; ))?;
} }
Ok(()) Ok(())
// master_b_table.load_table(TT::Standard); // master_b_table.load_table(TT::Standard);
} }
fn load_first_validable_table<E: TableEntryLoader>(
&self,
table_version: u8,
) -> Result<BUFRTableMPH<E>> {
(0..=table_version)
.rev()
.find_map(|version| {
TableLoader
.load_table(MasterTable::new(version))
.ok()
.inspect(|_| {
if version != table_version {
eprintln!("Falling back to Master Table version {}", version);
}
})
})
.ok_or(crate::errors::Error::TableNotFoundEmpty)
}
} }
pub struct BUFRFile { pub struct BUFRFile {

View File

@ -9,8 +9,11 @@ pub enum Error {
#[error("CSV Error: {0}")] #[error("CSV Error: {0}")]
Csv(#[from] csv::Error), Csv(#[from] csv::Error),
#[error("Table not found for type {0:?}, sub_center {1:?}, version {2}")] #[error("Table not found: {0}")]
TableNotFound(crate::discriptor_table::TableType, Option<u32>, u8), TableNotFound(#[from] anyhow::Error),
#[error("Table not found")]
TableNotFoundEmpty,
#[error("Parse Error: {0}")] #[error("Parse Error: {0}")]
ParseError(String), ParseError(String),

View File

@ -1,5 +1,39 @@
mod block; mod block;
mod discriptor_table; mod tables;
// mod discriptor_table;
mod errors; mod errors;
pub mod parser; pub mod parser;
pub mod structs; pub mod structs;
#[cfg(test)]
mod test {
#[test]
fn test() {
use genlib::prelude::*;
let bufr = BUFRTableB::load_from_disk(
"/Users/xiang.li1/projects/rbufr/tables/BUFRCREX_TableB_en_00.bufrtbl",
)
.unwrap();
let entry = bufr.lookup(FXY::new(0, 0, 1)).unwrap().unwrap();
println!("{:#?}", entry);
}
#[test]
fn test_rb() {
use crate::parser::Parser;
let mut parser = Parser::new();
let parsed_file = parser
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
.unwrap();
for msg in parsed_file.messages() {
println!("{}", msg);
msg.load_data().unwrap();
}
}
}

View File

@ -147,19 +147,3 @@ impl Parser {
Ok(file_block) Ok(file_block)
} }
} }
#[cfg(test)]
mod tests {
use super::Parser;
#[test]
fn test() {
let mut parser = Parser::new();
if let Ok(file) = parser.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr") {
for message in file.messages() {
println!("{}", message);
}
}
}
}

102
rbufr/src/tables.rs Normal file
View File

@ -0,0 +1,102 @@
use crate::errors::Result;
pub use genlib::prelude::{BUFRTableB, BUFRTableD, TableType};
use genlib::{TableEntryLoader, prelude::*};
use std::path::PathBuf;
pub trait TableTrait {
fn file_path(&self, table_type: TableType) -> PathBuf;
}
#[derive(Debug, Clone, Copy)]
pub struct MasterTable {
version: u8,
}
impl MasterTable {
pub fn new(version: u8) -> Self {
MasterTable { version }
}
}
#[derive(Debug, Clone, Copy)]
pub struct LocalTable {
sub_center: Option<u16>,
version: u8,
}
impl LocalTable {
pub fn new(sub_center: Option<u16>, version: u8) -> Self {
LocalTable {
sub_center,
version,
}
}
}
impl TableTrait for MasterTable {
fn file_path(&self, table_type: TableType) -> PathBuf {
match table_type {
TableType::B => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/master");
let file_name = format!("BUFR_TableB_en_{:0>2}.bufrtbl", self.version);
base_dir.join(file_name)
}
TableType::D => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/master");
let file_name = format!("BUFR_TableD_en_{:0>2}.bufrtbl", self.version);
base_dir.join(file_name)
}
_ => {
unreachable!("Table type not supported for MasterTable")
}
}
}
}
impl TableTrait for LocalTable {
fn file_path(&self, table_type: TableType) -> PathBuf {
match table_type {
TableType::B => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/local");
let sub_center_str = match self.sub_center {
Some(sc) => format!("sc{:0>2}", sc),
None => "sc00".to_string(),
};
let file_name = format!(
"BUFR_TableB_{}_en_{:0>2}.bufrtbl",
sub_center_str, self.version
);
base_dir.join(file_name)
}
TableType::D => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/local");
let sub_center_str = match self.sub_center {
Some(sc) => format!("sc{:0>2}", sc),
None => "sc00".to_string(),
};
let file_name = format!(
"BUFR_TableD_{}_en_{:0>2}.bufrtbl",
sub_center_str, self.version
);
base_dir.join(file_name)
}
_ => {
unreachable!("Table type not supported for LocalTable")
}
}
}
}
pub struct TableLoader;
impl TableLoader {
pub fn load_table<T>(&self, table_type: impl TableTrait) -> Result<BUFRTableMPH<T>>
where
T: TableEntryLoader,
{
let path = table_type.file_path(T::TABLE_TYPE);
BUFRTableMPH::<T>::load_from_disk(path).map_err(|e| e.into())
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More