This commit is contained in:
tsuki 2025-12-23 22:04:44 +08:00
parent fc44344a01
commit 3f2a2bcefc
126 changed files with 2767 additions and 431 deletions

1415
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,25 @@
[package]
name = "gen"
name = "gentools"
version = "0.1.0"
edition = "2024"
[lib]
name = "genlib"
path = "src/lib.rs"
[[bin]]
name = "gen-ctl"
path = "src/main.rs"
[dependencies]
anyhow = "1.0.100"
boomphf = { version = "0.6.0", features = ["serde"] }
bincode = "1.3.3"
ph = "0.10.0"
byteorder = "1.5.0"
clap = "4.5.53"
clap = { version = "4.5.53", features = ["derive"] }
csv = "1.4.0"
glob = "0.3"
memmap2 = "0.9"
rkyv = { version = "0.8.12" }
serde = { version = "1.0.228", features = ["derive"] }
binout = "0.3.1"

View File

@ -1,109 +0,0 @@
use crate::{FXY, TableEntryLoader};
use anyhow::{Context, Result};
use std::path::Path;
pub struct BTableCsvLoader {
entries: Vec<BTableEntry>,
}
#[derive(Debug, serde::Deserialize)]
pub struct RawBTableEntry {
#[serde(rename = "ClassNo")]
pub class_no: String,
#[serde(rename = "ClassName_en")]
pub class_name_en: String,
#[serde(rename = "FXY")]
pub fxy: String,
#[serde(rename = "ElementName_en")]
pub element_name_en: String,
#[serde(rename = "BUFR_Unit")]
pub bufr_unit: String,
#[serde(rename = "BUFR_Scale")]
pub bufr_scale: u32,
#[serde(rename = "BUFR_ReferenceValue")]
pub bufr_reference_value: u32,
#[serde(rename = "BUFR_DataWidth_Bits")]
pub bufr_datawidth_bits: u32,
#[serde(rename = "CREX_Unit")]
pub crex_unit: String,
#[serde(rename = "CREX_Scale")]
pub crex_scale: u32,
#[serde(rename = "CREX_DataWidth_Char")]
pub crex_datawidth_char: u32,
#[serde(rename = "Note_en")]
pub note_en: Option<String>,
#[serde(rename = "noteIDs")]
pub note_ids: Option<String>,
#[serde(rename = "Status")]
pub status: String,
}
pub struct BTableEntry {
fxy: FXY,
class_name_en: String,
element_name_en: String,
bufr_unit: String,
bufr_scale: u32,
bufr_reference_value: u32,
bufr_datawidth_bits: u32,
note_en: Option<String>,
note_ids: Option<String>,
status: String,
}
impl BTableCsvLoader {
pub fn new() -> Self {
BTableCsvLoader {
entries: Vec::new(),
}
}
pub fn from_wmo_csv<P: AsRef<Path>>(path: P) -> Result<Self> {
let mut loader = Self::new();
let table_loader = crate::TableLoader::new();
table_loader.load_table(path, &mut loader)?;
Ok(loader)
}
pub fn entries(&self) -> &[BTableEntry] {
&self.entries
}
}
impl TableEntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> {
// Parse FXY string (e.g., "001001") to u32
let fxy = FXY::from_str(&raw.fxy)?;
let entry = BTableEntry {
fxy,
class_name_en: raw.class_name_en,
element_name_en: raw.element_name_en,
bufr_unit: raw.bufr_unit,
bufr_scale: raw.bufr_scale,
bufr_reference_value: raw.bufr_reference_value,
bufr_datawidth_bits: raw.bufr_datawidth_bits,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.entries.push(entry);
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_btable_csv_loader() {
let _loader = BTableCsvLoader::from_wmo_csv(
"/Users/tsuki/projects/rbufr/BUFR4/BUFRCREX_TableB_en_00.csv",
)
.unwrap();
}
}

154
gen/src/fr/btable.rs Normal file
View File

@ -0,0 +1,154 @@
use crate::{FXY, TableEntry, TableEntryLoader};
use rkyv::Archive;
pub struct BTableCsvLoader;
#[derive(Debug)]
pub struct RawBTableEntry {
pub f: u16,
pub x: u16,
pub y: u16,
}
// Helper function to deserialize empty strings as None
fn deserialize_optional_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() { Ok(None) } else { Ok(Some(s)) }
}
// Helper function to deserialize empty strings as None for u32
fn deserialize_optional_u32<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() {
Ok(None)
} else {
s.parse::<u32>().map(Some).map_err(serde::de::Error::custom)
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct BTableEntry {
fxy: FXY,
class_name_en: String,
element_name_en: String,
bufr_unit: String,
bufr_scale: i32,
bufr_reference_value: i32,
bufr_datawidth_bits: u32,
note_en: Option<String>,
note_ids: Option<String>,
status: String,
}
impl BTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn class_name_en(&self) -> &str {
&self.class_name_en
}
pub fn element_name_en(&self) -> &str {
&self.element_name_en
}
pub fn bufr_unit(&self) -> &str {
&self.bufr_unit
}
pub fn bufr_scale(&self) -> i32 {
self.bufr_scale
}
pub fn bufr_reference_value(&self) -> i32 {
self.bufr_reference_value
}
pub fn bufr_datawidth_bits(&self) -> u32 {
self.bufr_datawidth_bits
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> Option<&str> {
self.note_ids.as_deref()
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for BTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let element_name = if self.element_name_en.len() > 40 {
format!("{}...", &self.element_name_en[..37])
} else {
self.element_name_en.clone()
};
let unit = if self.bufr_unit.len() > 15 {
format!("{}...", &self.bufr_unit[..12])
} else {
self.bufr_unit.clone()
};
write!(
f,
"{:02}{:02}{:03} | {:<40} | {:<15} | {:>5} | {:>8} | {:>8} | {}",
self.fxy.f,
self.fxy.x,
self.fxy.y,
element_name,
unit,
self.bufr_scale,
self.bufr_reference_value,
self.bufr_datawidth_bits,
self.status
)
}
}
impl TableEntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
type TableEntry = BTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::B;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>> {
// Parse FXY string (e.g., "001001") to u32
let fxy = FXY::from_str(&raw.fxy)?;
let entry = BTableEntry {
fxy,
class_name_en: raw.class_name_en,
element_name_en: raw.element_name_en,
bufr_unit: raw.bufr_unit,
bufr_scale: raw.bufr_scale,
bufr_reference_value: raw.bufr_reference_value,
bufr_datawidth_bits: raw.bufr_datawidth_bits,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
Ok(Some(entry))
}
}
impl TableEntry for BTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

151
gen/src/fr/dtable.rs Normal file
View File

@ -0,0 +1,151 @@
use super::TableEntryLoader;
use csv::StringRecord;
use rkyv::Archive;
#[derive(Debug, Clone, Default)]
pub struct DTableCsvLoader {
current_chain: Option<DTableEntry>,
}
impl TableEntryLoader for DTableCsvLoader {
type TableEntry = DTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::D;
fn process_entry(&mut self, raw: &StringRecord) -> anyhow::Result<Option<Self::TableEntry>> {
// Process the raw entry as needed
if self.current_chain.is_none() {
let entry = DTableEntry {
fxy: FXY::from_str(&raw.fxy1)?,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
category: raw.category,
category_of_sequences_en: raw.category_of_sequences_en,
title_en: raw.title_en,
subtitle_en: raw.subtitle_en,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.current_chain = Some(entry);
return Ok(None);
} else {
let fxy = FXY::from_str(&raw.fxy1)?;
if self.current_chain.as_ref().unwrap().fxy != fxy {
// First take out the old completed chain
let finished = self.current_chain.take();
// Then create and save the new chain
let entry = DTableEntry {
fxy,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
category: raw.category,
category_of_sequences_en: raw.category_of_sequences_en,
title_en: raw.title_en,
subtitle_en: raw.subtitle_en,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
self.current_chain = Some(entry);
// Return the old completed chain
return Ok(finished);
} else {
self.current_chain
.as_mut()
.unwrap()
.fxy_chain
.push(FXY::from_str(&raw.fxy2)?);
return Ok(None);
}
}
}
fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
Ok(self.current_chain.take())
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct DTableEntry {
fxy: FXY,
fxy_chain: Vec<FXY>,
category: String,
category_of_sequences_en: String,
title_en: Option<String>,
subtitle_en: Option<String>,
note_en: Option<String>,
note_ids: String,
status: String,
}
impl DTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn fxy_chain(&self) -> &[FXY] {
&self.fxy_chain
}
pub fn category(&self) -> &str {
&self.category
}
pub fn category_of_sequences_en(&self) -> &str {
&self.category_of_sequences_en
}
pub fn title_en(&self) -> Option<&str> {
self.title_en.as_deref()
}
pub fn subtitle_en(&self) -> Option<&str> {
self.subtitle_en.as_deref()
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> &str {
&self.note_ids
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for DTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let fxy_chain_str: String = self
.fxy_chain
.iter()
.map(|fxy| format!("{:02}{:02}{:03}", fxy.f, fxy.x, fxy.y))
.collect::<Vec<_>>()
.join(", ");
let title = self.title_en.as_deref().unwrap_or("N/A");
let truncated_title = if title.len() > 50 {
format!("{}...", &title[..47])
} else {
title.to_string()
};
write!(
f,
"{:02}{:02}{:03} | {:<50} | {:<12} | [{}]",
self.fxy.f, self.fxy.x, self.fxy.y, truncated_title, self.status, fxy_chain_str
)
}
}
impl TableEntry for DTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

61
gen/src/fr/mod.rs Normal file
View File

@ -0,0 +1,61 @@
use csv::{ReaderBuilder, StringRecord};
use std::path::Path;
pub mod btable;
pub mod dtable;
pub struct TableLoader;
impl TableLoader {
pub fn load_table<P: AsRef<Path>, T: TableEntryLoader>(
&self,
path: P,
loader: &mut T,
) -> anyhow::Result<Vec<T::TableEntry>> {
let mut entries = vec![];
let mut rdr = ReaderBuilder::new()
.has_headers(false)
.delimiter(b';')
.flexible(false) // Allow variable number of fields
.from_path(path.as_ref())?;
let mut line_num = 1; // Start at 1 for header
for result in rdr.records() {
line_num += 1;
match result {
Ok(record) => {
if let Some(processed_entry) = loader.process_entry(record)? {
entries.push(processed_entry);
}
}
Err(e) => {
// Log the error but continue processing
eprintln!(
"Warning: Skipping line {} in {}: {}",
line_num,
path.as_ref().display(),
e
);
}
}
}
if let Some(processed_entry) = loader.finish()? {
entries.push(processed_entry);
}
Ok(entries)
}
}
pub trait TableEntryLoader: Sized {
type TableEntry: TableEntry;
const TABLE_TYPE: TableType;
/// Process a single entry from the CSV file
fn process_entry(&mut self, raw: &StringRecord) -> anyhow::Result<Option<Self::TableEntry>>;
fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
Ok(None)
}
}

View File

@ -1,19 +1,20 @@
mod btable;
mod dtable;
// pub mod fr;
pub mod prelude;
mod utils;
pub mod wmo;
use anyhow::Context;
use boomphf::Mphf;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use memmap2::Mmap;
use ph::fmph::GOFunction;
use rkyv::Archive;
use rkyv::ser::serializers::AllocSerializer;
use rkyv::api::high::{HighDeserializer, HighSerializer};
use rkyv::rancor::Error;
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use serde::{Deserialize as SerdeDeserialize, Serialize as SerdeSerialize};
use std::fmt::Debug;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
use std::io::Write;
use std::path::Path;
use csv::{DeserializeRecordsIter, ReaderBuilder};
use csv::ReaderBuilder;
pub struct TableLoader;
@ -22,187 +23,265 @@ impl TableLoader {
&self,
path: P,
loader: &mut T,
) -> anyhow::Result<()> {
) -> anyhow::Result<Vec<T::TableEntry>> {
let mut entries = vec![];
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.delimiter(b',')
.from_path(path)?;
.flexible(true) // Allow variable number of fields
.from_path(path.as_ref())?;
let mut line_num = 1; // Start at 1 for header
for result in rdr.deserialize() {
let record: T::RawEntry = result?;
loader.process_entry(record)?;
line_num += 1;
match result {
Ok(record) => {
let record: T::RawEntry = record;
if let Some(processed_entry) = loader.process_entry(record)? {
entries.push(processed_entry);
}
}
Err(e) => {
// Log the error but continue processing
eprintln!(
"Warning: Skipping line {} in {}: {}",
line_num,
path.as_ref().display(),
e
);
}
Ok(())
}
}
pub trait TableEntryLoader: Sized {
if let Some(processed_entry) = loader.finish()? {
entries.push(processed_entry);
}
Ok(entries)
}
}
pub trait TableEntryLoader: Sized
where
Self::TableEntry: for<'a> rkyv::Serialize<
HighSerializer<rkyv::util::AlignedVec, rkyv::ser::allocator::ArenaHandle<'a>, Error>,
>,
<Self::TableEntry as Archive>::Archived:
rkyv::Deserialize<Self::TableEntry, HighDeserializer<Error>>,
{
/// The raw CSV entry type that will be deserialized
type RawEntry: for<'de> serde::Deserialize<'de> + Debug;
type TableEntry: TableEntry;
const TABLE_TYPE: TableType;
/// Process a single entry from the CSV file
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()>;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>>;
fn finish(&mut self) -> anyhow::Result<()> {
Ok(())
}
}
pub trait TableEntry: Serialize + DeserializeOwned + Debug + Clone {
fn fxy(&self) -> FXY;
}
#[derive(Serialize, Deserialize)]
struct MphMetadata {
mphf: Mphf<FXY>,
offsets: Vec<u64>,
}
struct BufrTableMph<T: TableEntryLoader> {
mphf: Mphf<FXY>,
offsets: Vec<u64>,
data_file: File,
_marker: std::marker::PhantomData<T>,
}
impl<T: TableEntryLoader> BufrTableMph<T> {
/// 构建 MPH 表
fn build(entries: &Vec<T::TableEntry>, output_path: &str) -> std::io::Result<Self> {
let keys: Vec<FXY> = entries.iter().map(|e| e.fxy()).collect();
let mphf = Mphf::new(2.0, &keys);
let mut sorted_entries: Vec<(usize, T::TableEntry)> = entries
.into_iter()
.map(|e| {
let hash = mphf.hash(&e.fxy()) as usize;
(hash, e)
})
.collect();
sorted_entries.sort_by_key(|(hash, _)| *hash);
let data_path = format!("{}.data", output_path);
let mut data_file = File::create(&data_path)?;
let mut offsets = Vec::with_capacity(sorted_entries.len());
for (_, entry) in sorted_entries {
let offset = data_file.stream_position()?;
offsets.push(offset);
// rkyv 序列化
let mut serializer = AllocSerializer::<256>::default();
serializer.serialize_value(&entry).unwrap();
let bytes = serializer.into_serializer().into_inner();
// 写入长度 + 数据
data_file.write_u32::<LittleEndian>(bytes.len() as u32)?;
data_file.write_all(&bytes)?;
}
// 5. 保存元数据
let meta_path = format!("{}.meta", output_path);
let meta = MphMetadata {
mphf: mphf.clone(),
offsets: offsets.clone(),
};
// 元数据仍然用 bincode 或者也可以用 rkyv
let mut meta_serializer = AllocSerializer::<4096>::default();
meta_serializer.serialize_value(&meta).unwrap();
let meta_bytes = meta_serializer.into_serializer().into_inner();
std::fs::write(meta_path, meta_bytes)?;
Ok(Self {
mphf,
offsets,
data_file: File::open(data_path)?,
_marker: std::marker::PhantomData,
})
}
/// 从磁盘加载
fn load(path: &str) -> std::io::Result<Self> {
let meta_bytes = std::fs::read(format!("{}.meta", path))?;
// rkyv 反序列化元数据(零拷贝)
let archived = unsafe { rkyv::archived_root::<MphMetadata>(&meta_bytes) };
// 如果需要拥有的版本,可以 deserialize
let meta: MphMetadata = archived.deserialize(&mut rkyv::Infallible).unwrap();
let data_file = File::open(format!("{}.data", path))?;
Ok(Self {
mphf: meta.mphf,
offsets: meta.offsets,
data_file,
_marker: std::marker::PhantomData,
})
}
/// 查找条目(零拷贝读取)
fn get(&mut self, fxy: FXY) -> std::io::Result<Option<T::TableEntry>> {
let hash = self.mphf.hash(&fxy) as usize;
let offset = match self.offsets.get(hash) {
Some(&off) => off,
None => return Ok(None),
};
// 读取数据
self.data_file.seek(SeekFrom::Start(offset))?;
let len = self.data_file.read_u32::<LittleEndian>()? as usize;
let mut buffer = vec![0u8; len];
self.data_file.read_exact(&mut buffer)?;
// rkyv 零拷贝访问
let archived = unsafe { rkyv::archived_root::<T::TableEntry>(&buffer) };
// 校验归档数据(可选,生产环境推荐)
#[cfg(feature = "validation")]
{
use rkyv::validation::validators::DefaultValidator;
rkyv::check_archived_root::<T::TableEntry>(&buffer)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
}
// 反序列化为拥有的类型
let entry: T::TableEntry = archived.deserialize(&mut rkyv::Infallible).unwrap();
if entry.fxy == fxy {
Ok(Some(entry))
} else {
fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
Ok(None)
}
}
/// 零拷贝引用访问(更快!)
fn get_archived(&mut self, fxy: u32) -> std::io::Result<Option<Vec<u8>>> {
let hash = self.mphf.hash(&fxy) as usize;
let offset = match self.offsets.get(hash) {
Some(&off) => off,
None => return Ok(None),
pub trait TableEntry: SerdeSerialize + DeserializeOwned + Debug + Clone + Archive {
fn fxy(&self) -> FXY;
}
struct BufrTableMph<T: TableEntryLoader> {
mphf: GOFunction,
offsets: Vec<u64>,
mmap: Mmap,
_marker: std::marker::PhantomData<T>,
}
struct BUFRTableM {
mphf: GOFunction,
}
impl<T: TableEntryLoader> BufrTableMph<T>
where
for<'a> T::TableEntry: rkyv::Serialize<
HighSerializer<rkyv::util::AlignedVec, rkyv::ser::allocator::ArenaHandle<'a>, Error>,
>,
<T::TableEntry as Archive>::Archived: rkyv::Deserialize<T::TableEntry, HighDeserializer<Error>>,
{
fn build(entries: Vec<T::TableEntry>, output_path: &str) -> std::io::Result<Self> {
println!("Building MPH table with {} entries...", entries.len());
let keys: Vec<FXY> = entries.iter().map(|e| e.fxy()).collect();
let mphf = GOFunction::from_slice(&keys);
let mut sorted_entries: Vec<(usize, T::TableEntry)> = entries
.into_iter()
.map(|e| (mphf.get(&(e.fxy())).unwrap() as usize, e))
.collect();
sorted_entries.sort_by_key(|(hash, _)| *hash);
// Serialize MPHF to bytes
let mut mphf_bytes = Vec::new();
mphf.write(&mut mphf_bytes)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
// Prepare data entries
let mut data_bytes = Vec::new();
let mut offsets = Vec::with_capacity(sorted_entries.len());
for (_, entry) in sorted_entries {
let offset = data_bytes.len() as u64;
offsets.push(offset);
let bytes = rkyv::to_bytes::<Error>(&entry)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
let len = bytes.len() as u32;
data_bytes.write_all(&len.to_le_bytes())?;
data_bytes.write_all(&bytes)?;
}
// Serialize offsets as raw bytes
let offsets_count = offsets.len() as u32;
let mut offsets_bytes = Vec::with_capacity(offsets.len() * 8);
for offset in &offsets {
offsets_bytes.extend_from_slice(&offset.to_le_bytes());
}
// Write merged file
let merged_path = format!("{}.bufrtbl", output_path);
let mut merged_file = std::fs::File::create(&merged_path)?;
// Write header: mphf_size (u32) + offsets_count (u32)
merged_file.write_all(&(mphf_bytes.len() as u32).to_le_bytes())?;
merged_file.write_all(&offsets_count.to_le_bytes())?;
// Write MPHF data
merged_file.write_all(&mphf_bytes)?;
// Write offsets (already 8-byte aligned since each offset is u64)
merged_file.write_all(&offsets_bytes)?;
// Write table entries data
merged_file.write_all(&data_bytes)?;
println!("Built successfully!");
println!(
" Merged file: {} bytes",
std::fs::metadata(&merged_path)?.len()
);
Self::load(output_path)
}
fn load<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
let mut path = path.as_ref().to_path_buf();
path.set_extension("bufrtbl");
let merged_file = std::fs::File::open(&path)?;
let mmap = unsafe { Mmap::map(&merged_file)? };
// Read header
let mphf_size = u32::from_le_bytes(mmap[0..4].try_into().unwrap()) as usize;
let offsets_count = u32::from_le_bytes(mmap[4..8].try_into().unwrap()) as usize;
// Calculate section positions
let mphf_start = 8;
let mphf_end = mphf_start + mphf_size;
let offsets_start = mphf_end;
let offsets_size = offsets_count * 8; // each offset is u64 (8 bytes)
let offsets_end = offsets_start + offsets_size;
let data_start = offsets_end;
// Load MPHF
let mphf_bytes = &mmap[mphf_start..mphf_end];
let mphf: GOFunction = GOFunction::read(&mut &mphf_bytes[..])
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
// Load offsets
let offsets_bytes = &mmap[offsets_start..offsets_end];
let mut offsets = Vec::with_capacity(offsets_count);
for i in 0..offsets_count {
let offset_bytes = &offsets_bytes[i * 8..(i + 1) * 8];
let offset = u64::from_le_bytes(offset_bytes.try_into().unwrap());
offsets.push(offset + data_start as u64); // Adjust offset for data section
}
Ok(Self {
mphf,
offsets,
mmap,
_marker: std::marker::PhantomData,
})
}
/// 获取拥有的版本
fn get(&self, fxy: FXY) -> Option<T::TableEntry> {
let hash = self.mphf.get(&fxy)? as usize;
let offset = *self.offsets.get(hash)? as usize;
let len_bytes = self.mmap.get(offset..offset + 4)?;
let len = u32::from_le_bytes(len_bytes.try_into().ok()?) as usize;
let data = self.mmap.get(offset + 4..offset + 4 + len)?;
let archived =
unsafe { rkyv::access_unchecked::<<T::TableEntry as Archive>::Archived>(data) };
rkyv::deserialize::<T::TableEntry, Error>(archived).ok()
}
/// 获取所有条目
fn get_all(&self) -> Vec<T::TableEntry> {
let mut entries = Vec::new();
for offset in &self.offsets {
let offset = *offset as usize;
if let Some(len_bytes) = self.mmap.get(offset..offset + 4) {
if let Ok(len_bytes_array) = len_bytes.try_into() {
let len = u32::from_le_bytes(len_bytes_array) as usize;
if let Some(data) = self.mmap.get(offset + 4..offset + 4 + len) {
let archived = unsafe {
rkyv::access_unchecked::<<T::TableEntry as Archive>::Archived>(data)
};
self.data_file.seek(SeekFrom::Start(offset))?;
let len = self.data_file.read_u32::<LittleEndian>()? as usize;
let mut buffer = vec![0u8; len];
self.data_file.read_exact(&mut buffer)?;
Ok(Some(buffer))
if let Ok(entry) = rkyv::deserialize::<T::TableEntry, Error>(archived) {
entries.push(entry);
}
}
}
}
}
entries
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[derive(
Archive,
SerdeSerialize,
SerdeDeserialize,
rkyv::Serialize,
rkyv::Deserialize,
Debug,
PartialEq,
Eq,
Clone,
Copy,
std::hash::Hash,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct FXY {
pub f: u16,
pub x: u16,
pub y: u16,
}
// // Custom Hash implementation to work around boomphf's overflow bug
// // We implement Hash by converting to u32 first
// impl std::hash::Hash for FXY {
// fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
// // Convert FXY to a simple u32 value to avoid complex hashing
// self.to_u32().hash(state);
// }
// }
impl FXY {
pub fn new(f: u16, x: u16, y: u16) -> Self {
FXY { f, x, y }
}
pub fn from_str(fxy_str: &str) -> anyhow::Result<Self> {
// let bytes = fxy_str.as_bytes();
@ -223,142 +302,72 @@ impl FXY {
Ok(FXY { f, x, y })
}
/// Convert FXY to u32 for use as hash key
/// Format: F (2 bits) | X (6 bits) | Y (8 bits) = 16 bits total
pub fn to_u32(&self) -> u32 {
((self.f as u32) << 14) | ((self.x as u32) << 8) | (self.y as u32)
}
pub struct BUFRTableMPH {}
/// Convert u32 back to FXY
pub fn from_u32(value: u32) -> Self {
FXY {
f: ((value >> 14) & 0x3) as u16,
x: ((value >> 8) & 0x3F) as u16,
y: (value & 0xFF) as u16,
}
}
}
impl BUFRTableMPH {
pub fn build_from_csv<P: AsRef<Path>, T: TableEntryLoader>(
pub struct BUFRTableMPH<T: TableEntryLoader> {
inner: BufrTableMph<T>,
}
impl<T: TableEntryLoader> BUFRTableMPH<T> {
pub fn build_from_csv<P: AsRef<Path>>(
mut loader: T,
csv_path: P,
output_path: P,
) -> anyhow::Result<Self> {
TableLoader.load_table(csv_path, &mut loader)?;
loader.finish()?;
let entries = TableLoader.load_table(csv_path, &mut loader)?;
let bhm = BufrTableMph::<T>::build(entries, output_path.as_ref().to_str().unwrap())?;
Ok(BUFRTableMPH {})
Ok(BUFRTableMPH { inner: bhm })
}
pub fn load_from_disk<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
// Placeholder for loading MPH table from disk
Ok(BUFRTableMPH {})
let bhm: BufrTableMph<T> = BufrTableMph::load(path)?;
Ok(BUFRTableMPH { inner: bhm })
}
pub fn lookup(&self, fxy: FXY) -> anyhow::Result<Option<()>> {
// Placeholder for looking up an entry by FXY
Ok(None)
pub fn lookup(&self, fxy: FXY) -> anyhow::Result<Option<T::TableEntry>> {
Ok(self.inner.get(fxy))
}
pub fn get_all_entries(&self) -> Vec<T::TableEntry> {
self.inner.get_all()
}
}
// #[derive(Serialize, Deserialize, Debug)]
// struct TableEntry {
// fxy: u32, // F-X-Y descriptor (e.g., 001001)
// name: String, // "WMO Block Number"
// unit: String, // "Numeric"
// scale: i8,
// reference: i32,
// data_width: u8,
// }
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TableType {
B,
D,
}
// struct BufrTableMph {
// mphf: Mphf<u32>, // The MPH function
// offsets: Vec<u64>, // Byte offsets into the data file
// data_file: File, // Memory-mapped or regular file
// }
#[cfg(test)]
mod test {
use crate::{BUFRTableMPH, dtable::DTableCsvLoader};
// impl BufrTableMph {
// /// Build MPH table from entries
// fn build(entries: Vec<TableEntry>, output_path: &str) -> std::io::Result<Self> {
// // 1. Extract keys (FXY descriptors)
// let keys: Vec<u32> = entries.iter().map(|e| e.fxy).collect();
// // 2. Build MPH function with gamma=2.0 (space/time tradeoff)
// let mphf = Mphf::new(2.0, &keys);
// // 3. Create sorted entries by MPH hash
// let mut sorted_entries: Vec<(usize, TableEntry)> = entries
// .into_iter()
// .map(|e| {
// let hash = mphf.hash(&e.fxy) as usize;
// (hash, e)
// })
// .collect();
// sorted_entries.sort_by_key(|(hash, _)| *hash);
// // 4. Write binary data file and collect offsets
// let data_path = format!("{}.data", output_path);
// let mut data_file = File::create(&data_path)?;
// let mut offsets = Vec::with_capacity(sorted_entries.len());
// for (_, entry) in sorted_entries {
// let offset = data_file.stream_position()?;
// offsets.push(offset);
// // Write entry in compact binary format
// let serialized = bincode::serialize(&entry).unwrap();
// data_file.write_u32::<byteorder::LittleEndian>(serialized.len() as u32)?;
// data_file.write_all(&serialized)?;
// }
// // 5. Write MPH metadata
// let meta_path = format!("{}.meta", output_path);
// let meta = MphMetadata { mphf, offsets };
// let meta_bytes = bincode::serialize(&meta).unwrap();
// std::fs::write(meta_path, meta_bytes)?;
// Ok(Self {
// mphf: meta.mphf,
// offsets: meta.offsets,
// data_file: File::open(data_path)?,
// })
// }
// /// Load from disk
// fn load(path: &str) -> std::io::Result<Self> {
// let meta_bytes = std::fs::read(format!("{}.meta", path))?;
// let meta: MphMetadata = bincode::deserialize(&meta_bytes).unwrap();
// let data_file = File::open(format!("{}.data", path))?;
// Ok(Self {
// mphf: meta.mphf,
// offsets: meta.offsets,
// data_file,
// })
// }
// /// Lookup entry by FXY descriptor
// fn get(&mut self, fxy: u32) -> std::io::Result<Option<TableEntry>> {
// // 1. Hash the key to get index
// let hash = self.mphf.hash(&fxy) as usize;
// // 2. Get offset from array
// let offset = match self.offsets.get(hash) {
// Some(&off) => off,
// None => return Ok(None),
// };
// // 3. Seek and read
// self.data_file.seek(SeekFrom::Start(offset))?;
// let len = self.data_file.read_u32::<byteorder::LittleEndian>()? as usize;
// let mut buffer = vec![0u8; len];
// self.data_file.read_exact(&mut buffer)?;
// // 4. Deserialize
// let entry: TableEntry = bincode::deserialize(&buffer).unwrap();
// // Verify key matches (MPH guarantees unique index, but verify correctness)
// if entry.fxy == fxy {
// Ok(Some(entry))
// } else {
// Ok(None)
// }
// }
// }
// #[derive(Serialize, Deserialize)]
// struct MphMetadata {
// mphf: Mphf<u32>,
// offsets: Vec<u64>,
// }
// use byteorder::{ReadBytesExt, WriteBytesExt};
#[test]
fn test() {
let d_loader = DTableCsvLoader::default();
BUFRTableMPH::build_from_csv(
d_loader,
"/Users/xiang.li1/projects/rbufr/BUFR4/BUFR_TableD_en_00.csv",
"test_table_d",
)
.unwrap();
}
}

View File

@ -1,3 +1,289 @@
fn main() {
println!("Hello, world!");
use anyhow::{Context, Result};
use clap::{Parser, Subcommand};
use genlib::{BUFRTableMPH, btable::BTableCsvLoader, dtable::DTableCsvLoader};
use std::path::{Path, PathBuf};
#[derive(Parser)]
#[command(name = "gen-ctl")]
#[command(about = "BUFR Table conversion tool", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Scan a directory and convert all BUFR tables to MPH format
Scan {
/// Input directory containing BUFR CSV files
#[arg(short, long)]
input: PathBuf,
/// Output directory for generated .bufrtbl files
#[arg(short, long)]
output: PathBuf,
/// Table type to process: "d", "b", or "all"
#[arg(short, long, default_value = "all")]
table_type: String,
},
/// Convert a single BUFR table file
Convert {
/// Input CSV file
#[arg(short, long)]
input: PathBuf,
/// Output path (without extension)
#[arg(short, long)]
output: PathBuf,
/// Table type: "d" for Table D, "b" for Table B
#[arg(short, long)]
table_type: String,
},
/// Print a BUFR table in formatted output
Print {
/// Path to .bufrtbl file (without extension)
#[arg(short, long)]
input: PathBuf,
/// Table type: "d" for Table D, "b" for Table B
#[arg(short, long)]
table_type: String,
/// Maximum number of entries to print (optional)
#[arg(short, long)]
limit: Option<usize>,
},
}
fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command {
Commands::Scan {
input,
output,
table_type,
} => {
scan_and_convert(&input, &output, &table_type)?;
}
Commands::Convert {
input,
output,
table_type,
} => {
convert_single_file(&input, &output, &table_type)?;
}
Commands::Print {
input,
table_type,
limit,
} => {
print_table(&input, &table_type, limit)?;
}
}
Ok(())
}
fn scan_and_convert(input_dir: &Path, output_dir: &Path, table_type: &str) -> Result<()> {
// Create output directory if it doesn't exist
std::fs::create_dir_all(output_dir).context("Failed to create output directory")?;
println!("Scanning directory: {}", input_dir.display());
println!("Output directory: {}", output_dir.display());
println!("Table type: {}", table_type);
println!();
let mut processed_count = 0;
let mut error_count = 0;
// Scan for BUFR_TableD files
if table_type == "d" || table_type == "all" {
println!("Processing Table D files...");
let pattern = input_dir.join("BUFR_TableD_*.csv");
for entry in glob::glob(pattern.to_str().unwrap()).context("Failed to read glob pattern")? {
match entry {
Ok(path) => {
let mut filename = path.file_stem().unwrap().to_str().unwrap().to_string();
if filename.contains("BUFRCREX") {
filename = filename.replace("BUFRCREX_", "BUFR_");
}
let output_path = output_dir.join(filename);
print!(" Converting {} ... ", path.display());
match convert_table_d(&path, &output_path) {
Ok(_) => {
println!("OK");
processed_count += 1;
}
Err(e) => {
println!("ERROR: {}", e);
error_count += 1;
}
}
}
Err(e) => {
println!("Error reading file: {}", e);
error_count += 1;
}
}
}
println!();
}
// Scan for BUFRCREX_TableB files
if table_type == "b" || table_type == "all" {
println!("Processing Table B files...");
let pattern = input_dir.join("BUFRCREX_TableB_*.csv");
for entry in glob::glob(pattern.to_str().unwrap()).context("Failed to read glob pattern")? {
match entry {
Ok(path) => {
let mut filename = path.file_stem().unwrap().to_str().unwrap().to_string();
if filename.contains("BUFRCREX") {
filename = filename.replace("BUFRCREX_", "BUFR_");
}
let output_path = output_dir.join(filename);
print!(" Converting {} ... ", path.display());
match convert_table_b(&path, &output_path) {
Ok(_) => {
println!("OK");
processed_count += 1;
}
Err(e) => {
eprintln!("ERROR: {}", e);
error_count += 1;
}
}
}
Err(e) => {
eprintln!("Error reading file: {}", e);
error_count += 1;
}
}
}
println!();
}
println!("Summary:");
println!(" Successfully processed: {}", processed_count);
println!(" Errors: {}", error_count);
if error_count > 0 {
anyhow::bail!("Conversion completed with {} errors", error_count);
}
Ok(())
}
fn convert_single_file(input_path: &Path, output_path: &Path, table_type: &str) -> Result<()> {
println!(
"Converting {} to {}",
input_path.display(),
output_path.display()
);
match table_type.to_lowercase().as_str() {
"d" => convert_table_d(input_path, output_path)?,
"b" => convert_table_b(input_path, output_path)?,
_ => anyhow::bail!("Invalid table type: {}. Use 'd' or 'b'", table_type),
}
println!("Conversion completed successfully!");
Ok(())
}
fn convert_table_d(input_path: &Path, output_path: &Path) -> Result<()> {
let loader = DTableCsvLoader::default();
BUFRTableMPH::build_from_csv(loader, input_path, output_path)?;
Ok(())
}
fn convert_table_b(input_path: &Path, output_path: &Path) -> Result<()> {
let loader = BTableCsvLoader;
BUFRTableMPH::build_from_csv(loader, input_path, output_path)?;
Ok(())
}
fn print_table(input_path: &Path, table_type: &str, limit: Option<usize>) -> Result<()> {
match table_type.to_lowercase().as_str() {
"d" => print_table_d(input_path, limit)?,
"b" => print_table_b(input_path, limit)?,
_ => anyhow::bail!("Invalid table type: {}. Use 'd' or 'b'", table_type),
}
Ok(())
}
fn print_table_d(input_path: &Path, limit: Option<usize>) -> Result<()> {
use genlib::dtable::DTableEntry;
println!("Loading Table D from: {}", input_path.display());
let table: BUFRTableMPH<DTableCsvLoader> = BUFRTableMPH::load_from_disk(input_path)?;
let entries = table.get_all_entries();
println!("\nTable D Entries (Total: {})", entries.len());
println!("{}", "=".repeat(140));
println!(
"{:<7} | {:<50} | {:<12} | {}",
"FXY", "Title", "Status", "FXY Chain"
);
println!("{}", "-".repeat(140));
let display_entries = if let Some(max) = limit {
&entries[..entries.len().min(max)]
} else {
&entries[..]
};
for entry in display_entries {
println!("{}", entry);
}
if let Some(max) = limit {
if entries.len() > max {
println!("\n... ({} more entries omitted)", entries.len() - max);
}
}
Ok(())
}
fn print_table_b(input_path: &Path, limit: Option<usize>) -> Result<()> {
use genlib::btable::BTableEntry;
println!("Loading Table B from: {}", input_path.display());
let table: BUFRTableMPH<BTableCsvLoader> = BUFRTableMPH::load_from_disk(input_path)?;
let entries = table.get_all_entries();
println!("\nTable B Entries (Total: {})", entries.len());
println!("{}", "=".repeat(120));
println!(
"{:<7} | {:<40} | {:<15} | {:<5} | {:<8} | {:<8} | {}",
"FXY", "Element Name", "Unit", "Scale", "Ref Val", "Width", "Status"
);
println!("{}", "-".repeat(120));
let display_entries = if let Some(max) = limit {
&entries[..entries.len().min(max)]
} else {
&entries[..]
};
for entry in display_entries {
println!("{}", entry);
}
if let Some(max) = limit {
if entries.len() > max {
println!("\n... ({} more entries omitted)", entries.len() - max);
}
}
Ok(())
}

6
gen/src/prelude.rs Normal file
View File

@ -0,0 +1,6 @@
pub use crate::wmo;
// pub type BUFRTableD = crate::BUFRTableMPH<DTableCsvLoader>;
// pub type BUFRTableB = crate::BUFRTableMPH<crate::btable::BTableCsvLoader>;
pub use crate::BUFRTableMPH;
pub use crate::FXY;
pub use crate::TableType;

179
gen/src/wmo/btable.rs Normal file
View File

@ -0,0 +1,179 @@
use crate::{FXY, TableEntry, TableEntryLoader};
use rkyv::Archive;
pub struct BTableCsvLoader;
#[derive(Debug, serde::Deserialize)]
pub struct RawBTableEntry {
#[serde(rename = "ClassNo")]
pub class_no: String,
#[serde(rename = "ClassName_en")]
pub class_name_en: String,
#[serde(rename = "FXY")]
pub fxy: String,
#[serde(rename = "ElementName_en")]
pub element_name_en: String,
#[serde(rename = "BUFR_Unit")]
pub bufr_unit: String,
#[serde(rename = "BUFR_Scale")]
pub bufr_scale: i32,
#[serde(rename = "BUFR_ReferenceValue")]
pub bufr_reference_value: i32,
#[serde(rename = "BUFR_DataWidth_Bits")]
pub bufr_datawidth_bits: u32,
#[serde(rename = "CREX_Unit")]
pub crex_unit: Option<String>,
#[serde(rename = "CREX_Scale")]
pub crex_scale: Option<i32>,
#[serde(rename = "CREX_DataWidth_Char")]
pub crex_datawidth_char: Option<u32>,
#[serde(rename = "Note_en")]
pub note_en: Option<String>,
#[serde(rename = "noteIDs")]
pub note_ids: Option<String>,
#[serde(rename = "Status")]
pub status: String,
}
// Helper function to deserialize empty strings as None
fn deserialize_optional_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() { Ok(None) } else { Ok(Some(s)) }
}
// Helper function to deserialize empty strings as None for u32
fn deserialize_optional_u32<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() {
Ok(None)
} else {
s.parse::<u32>().map(Some).map_err(serde::de::Error::custom)
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct BTableEntry {
fxy: FXY,
class_name_en: String,
element_name_en: String,
bufr_unit: String,
bufr_scale: i32,
bufr_reference_value: i32,
bufr_datawidth_bits: u32,
note_en: Option<String>,
note_ids: Option<String>,
status: String,
}
impl BTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn class_name_en(&self) -> &str {
&self.class_name_en
}
pub fn element_name_en(&self) -> &str {
&self.element_name_en
}
pub fn bufr_unit(&self) -> &str {
&self.bufr_unit
}
pub fn bufr_scale(&self) -> i32 {
self.bufr_scale
}
pub fn bufr_reference_value(&self) -> i32 {
self.bufr_reference_value
}
pub fn bufr_datawidth_bits(&self) -> u32 {
self.bufr_datawidth_bits
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> Option<&str> {
self.note_ids.as_deref()
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for BTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let element_name = if self.element_name_en.len() > 40 {
format!("{}...", &self.element_name_en[..37])
} else {
self.element_name_en.clone()
};
let unit = if self.bufr_unit.len() > 15 {
format!("{}...", &self.bufr_unit[..12])
} else {
self.bufr_unit.clone()
};
write!(
f,
"{:02}{:02}{:03} | {:<40} | {:<15} | {:>5} | {:>8} | {:>8} | {}",
self.fxy.f,
self.fxy.x,
self.fxy.y,
element_name,
unit,
self.bufr_scale,
self.bufr_reference_value,
self.bufr_datawidth_bits,
self.status
)
}
}
impl TableEntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
type TableEntry = BTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::B;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>> {
// Parse FXY string (e.g., "001001") to u32
let fxy = FXY::from_str(&raw.fxy)?;
let entry = BTableEntry {
fxy,
class_name_en: raw.class_name_en,
element_name_en: raw.element_name_en,
bufr_unit: raw.bufr_unit,
bufr_scale: raw.bufr_scale,
bufr_reference_value: raw.bufr_reference_value,
bufr_datawidth_bits: raw.bufr_datawidth_bits,
note_en: raw.note_en,
note_ids: raw.note_ids,
status: raw.status,
};
Ok(Some(entry))
}
}
impl TableEntry for BTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

View File

@ -1,11 +1,11 @@
use crate::{FXY, TableEntryLoader};
use crate::{FXY, TableEntry, TableEntryLoader};
use rkyv::Archive;
#[derive(Debug, Clone, Default)]
pub struct DTableCsvLoader {
current_chain: Option<DTableEntry>,
entries: Vec<DTableEntry>,
}
// Category,CategoryOfSequences_en,FXY1,Title_en,SubTitle_en,FXY2,ElementName_en,ElementDescription_en,Note_en,noteIDs,Status
#[derive(Debug, serde::Deserialize)]
pub struct RawDTableEntry {
#[serde(rename = "Category")]
@ -21,9 +21,9 @@ pub struct RawDTableEntry {
#[serde(rename = "FXY2")]
pub fxy2: String,
#[serde(rename = "ElementName_en")]
pub element_name_en: Option<String>,
pub _element_name_en: Option<String>,
#[serde(rename = "ElementDescription_en")]
pub element_description_en: Option<String>,
pub _element_description_en: Option<String>,
#[serde(rename = "Note_en")]
pub note_en: Option<String>,
#[serde(rename = "noteIDs")]
@ -34,8 +34,10 @@ pub struct RawDTableEntry {
impl TableEntryLoader for DTableCsvLoader {
type RawEntry = RawDTableEntry;
type TableEntry = DTableEntry;
const TABLE_TYPE: crate::TableType = crate::TableType::D;
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<()> {
fn process_entry(&mut self, raw: Self::RawEntry) -> anyhow::Result<Option<Self::TableEntry>> {
// Process the raw entry as needed
if self.current_chain.is_none() {
let entry = DTableEntry {
@ -50,10 +52,14 @@ impl TableEntryLoader for DTableCsvLoader {
status: raw.status,
};
self.current_chain = Some(entry);
return Ok(None);
} else {
let fxy = FXY::from_str(&raw.fxy1)?;
if self.current_chain.as_ref().unwrap().fxy != fxy {
self.entries.push(self.current_chain.take().unwrap());
// First take out the old completed chain
let finished = self.current_chain.take();
// Then create and save the new chain
let entry = DTableEntry {
fxy,
fxy_chain: vec![FXY::from_str(&raw.fxy2)?],
@ -66,25 +72,30 @@ impl TableEntryLoader for DTableCsvLoader {
status: raw.status,
};
self.current_chain = Some(entry);
// Return the old completed chain
return Ok(finished);
} else {
self.current_chain
.as_mut()
.unwrap()
.fxy_chain
.push(FXY::from_str(&raw.fxy2)?);
}
}
Ok(())
}
fn finish(&mut self) -> anyhow::Result<()> {
if let Some(entry) = self.current_chain.take() {
self.entries.push(entry);
return Ok(None);
}
Ok(())
}
}
fn finish(&mut self) -> anyhow::Result<Option<Self::TableEntry>> {
Ok(self.current_chain.take())
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
pub struct DTableEntry {
fxy: FXY,
fxy_chain: Vec<FXY>,
@ -96,3 +107,71 @@ pub struct DTableEntry {
note_ids: String,
status: String,
}
impl DTableEntry {
pub fn fxy(&self) -> FXY {
self.fxy
}
pub fn fxy_chain(&self) -> &[FXY] {
&self.fxy_chain
}
pub fn category(&self) -> &str {
&self.category
}
pub fn category_of_sequences_en(&self) -> &str {
&self.category_of_sequences_en
}
pub fn title_en(&self) -> Option<&str> {
self.title_en.as_deref()
}
pub fn subtitle_en(&self) -> Option<&str> {
self.subtitle_en.as_deref()
}
pub fn note_en(&self) -> Option<&str> {
self.note_en.as_deref()
}
pub fn note_ids(&self) -> &str {
&self.note_ids
}
pub fn status(&self) -> &str {
&self.status
}
}
impl std::fmt::Display for DTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let fxy_chain_str: String = self
.fxy_chain
.iter()
.map(|fxy| format!("{:02}{:02}{:03}", fxy.f, fxy.x, fxy.y))
.collect::<Vec<_>>()
.join(", ");
let title = self.title_en.as_deref().unwrap_or("N/A");
let truncated_title = if title.len() > 50 {
format!("{}...", &title[..47])
} else {
title.to_string()
};
write!(
f,
"{:02}{:02}{:03} | {:<50} | {:<12} | [{}]",
self.fxy.f, self.fxy.x, self.fxy.y, truncated_title, self.status, fxy_chain_str
)
}
}
impl TableEntry for DTableEntry {
fn fxy(&self) -> FXY {
self.fxy
}
}

2
gen/src/wmo/mod.rs Normal file
View File

@ -0,0 +1,2 @@
pub mod btable;
pub mod dtable;

View File

@ -14,3 +14,5 @@ flate2 = "1.1.5"
nom = "8.0.0"
serde = { version = "1.0.228", features = ["derive"] }
thiserror = "2.0.17"
gentools = { path = "../gen" }
anyhow = "1.0.100"

View File

@ -1,5 +1,8 @@
use genlib::{BUFRTableMPH, TableEntryLoader};
use crate::errors::Result;
use crate::{discriptor_table::*, structs::versions::BUFRMessage};
use crate::structs::versions::BUFRMessage;
use crate::tables::*;
pub struct MessageBlock {
message: BUFRMessage,
@ -18,32 +21,47 @@ impl MessageBlock {
pub fn load_data(&self) -> Result<()> {
let table_info = self.message.table_info();
let master_table_version = table_info.master_table_version;
let mut b_table_loader = TableLoader::<BTable>::new();
let master_b_sequences =
b_table_loader.load_table(TT::Standard, table_info.master_table_version)?;
let mut d_table_loader = TableLoader::<DTable>::new();
let master_d_sequences =
d_table_loader.load_table(TT::Standard, table_info.master_table_version)?;
let master_b: BUFRTableB = self.load_first_validable_table(master_table_version)?;
let master_d: BUFRTableD = self.load_first_validable_table(master_table_version)?;
let local_table_version = table_info.local_table_version as u32;
if local_table_version > 0 {
let local_b_sequences = b_table_loader.load_table(
TT::Localized(local_table_version),
let local_b: BUFRTableB = TableLoader.load_table(LocalTable::new(
Some(table_info.subcenter_id),
table_info.local_table_version,
)?;
))?;
let local_d_sequences = d_table_loader.load_table(
TT::Localized(local_table_version),
let local_d: BUFRTableD = TableLoader.load_table(LocalTable::new(
Some(table_info.subcenter_id),
table_info.local_table_version,
)?;
))?;
}
Ok(())
// master_b_table.load_table(TT::Standard);
}
fn load_first_validable_table<E: TableEntryLoader>(
&self,
table_version: u8,
) -> Result<BUFRTableMPH<E>> {
(0..=table_version)
.rev()
.find_map(|version| {
TableLoader
.load_table(MasterTable::new(version))
.ok()
.inspect(|_| {
if version != table_version {
eprintln!("Falling back to Master Table version {}", version);
}
})
})
.ok_or(crate::errors::Error::TableNotFoundEmpty)
}
}
pub struct BUFRFile {

View File

@ -9,8 +9,11 @@ pub enum Error {
#[error("CSV Error: {0}")]
Csv(#[from] csv::Error),
#[error("Table not found for type {0:?}, sub_center {1:?}, version {2}")]
TableNotFound(crate::discriptor_table::TableType, Option<u32>, u8),
#[error("Table not found: {0}")]
TableNotFound(#[from] anyhow::Error),
#[error("Table not found")]
TableNotFoundEmpty,
#[error("Parse Error: {0}")]
ParseError(String),

View File

@ -1,5 +1,39 @@
mod block;
mod discriptor_table;
mod tables;
// mod discriptor_table;
mod errors;
pub mod parser;
pub mod structs;
#[cfg(test)]
mod test {
#[test]
fn test() {
use genlib::prelude::*;
let bufr = BUFRTableB::load_from_disk(
"/Users/xiang.li1/projects/rbufr/tables/BUFRCREX_TableB_en_00.bufrtbl",
)
.unwrap();
let entry = bufr.lookup(FXY::new(0, 0, 1)).unwrap().unwrap();
println!("{:#?}", entry);
}
#[test]
fn test_rb() {
use crate::parser::Parser;
let mut parser = Parser::new();
let parsed_file = parser
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
.unwrap();
for msg in parsed_file.messages() {
println!("{}", msg);
msg.load_data().unwrap();
}
}
}

View File

@ -147,19 +147,3 @@ impl Parser {
Ok(file_block)
}
}
#[cfg(test)]
mod tests {
use super::Parser;
#[test]
fn test() {
let mut parser = Parser::new();
if let Ok(file) = parser.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr") {
for message in file.messages() {
println!("{}", message);
}
}
}
}

102
rbufr/src/tables.rs Normal file
View File

@ -0,0 +1,102 @@
use crate::errors::Result;
pub use genlib::prelude::{BUFRTableB, BUFRTableD, TableType};
use genlib::{TableEntryLoader, prelude::*};
use std::path::PathBuf;
pub trait TableTrait {
fn file_path(&self, table_type: TableType) -> PathBuf;
}
#[derive(Debug, Clone, Copy)]
pub struct MasterTable {
version: u8,
}
impl MasterTable {
pub fn new(version: u8) -> Self {
MasterTable { version }
}
}
#[derive(Debug, Clone, Copy)]
pub struct LocalTable {
sub_center: Option<u16>,
version: u8,
}
impl LocalTable {
pub fn new(sub_center: Option<u16>, version: u8) -> Self {
LocalTable {
sub_center,
version,
}
}
}
impl TableTrait for MasterTable {
fn file_path(&self, table_type: TableType) -> PathBuf {
match table_type {
TableType::B => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/master");
let file_name = format!("BUFR_TableB_en_{:0>2}.bufrtbl", self.version);
base_dir.join(file_name)
}
TableType::D => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/master");
let file_name = format!("BUFR_TableD_en_{:0>2}.bufrtbl", self.version);
base_dir.join(file_name)
}
_ => {
unreachable!("Table type not supported for MasterTable")
}
}
}
}
impl TableTrait for LocalTable {
fn file_path(&self, table_type: TableType) -> PathBuf {
match table_type {
TableType::B => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/local");
let sub_center_str = match self.sub_center {
Some(sc) => format!("sc{:0>2}", sc),
None => "sc00".to_string(),
};
let file_name = format!(
"BUFR_TableB_{}_en_{:0>2}.bufrtbl",
sub_center_str, self.version
);
base_dir.join(file_name)
}
TableType::D => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/local");
let sub_center_str = match self.sub_center {
Some(sc) => format!("sc{:0>2}", sc),
None => "sc00".to_string(),
};
let file_name = format!(
"BUFR_TableD_{}_en_{:0>2}.bufrtbl",
sub_center_str, self.version
);
base_dir.join(file_name)
}
_ => {
unreachable!("Table type not supported for LocalTable")
}
}
}
}
pub struct TableLoader;
impl TableLoader {
pub fn load_table<T>(&self, table_type: impl TableTrait) -> Result<BUFRTableMPH<T>>
where
T: TableEntryLoader,
{
let path = table_type.file_path(T::TABLE_TYPE);
BUFRTableMPH::<T>::load_from_disk(path).map_err(|e| e.into())
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More