This commit is contained in:
tsuki 2025-12-26 23:47:02 +08:00
parent b8b31c4e69
commit c3323841d5
282 changed files with 2133 additions and 4567 deletions

61
Cargo.lock generated
View File

@ -614,8 +614,10 @@ dependencies = [
"glob",
"memmap2",
"ph",
"regex",
"rkyv",
"serde",
"toml",
]
[[package]]
@ -1444,6 +1446,15 @@ dependencies = [
"serde_core",
]
[[package]]
name = "serde_spanned"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
]
[[package]]
name = "shlex"
version = "1.3.0"
@ -1653,6 +1664,47 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "toml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"toml_write",
"winnow",
]
[[package]]
name = "toml_write"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]]
name = "unicode-ident"
version = "1.0.22"
@ -1987,6 +2039,15 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "winnow"
version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
dependencies = [
"memchr",
]
[[package]]
name = "wit-bindgen"
version = "0.46.0"

View File

@ -20,6 +20,12 @@ clap = { version = "4.5.53", features = ["derive"] }
csv = "1.4.0"
glob = "0.3"
memmap2 = "0.9"
regex = "1.11"
rkyv = { version = "0.8.12" }
serde = { version = "1.0.228", features = ["derive"] }
toml = "0.8"
binout = "0.3.1"
[features]
default = ['opera']
opera = []

250
gen/src/config.rs Normal file
View File

@ -0,0 +1,250 @@
use anyhow::{Context, Result};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::path::Path;
use crate::pattern::{TableFilePattern, TableKind, TableMetadata};
/// Configuration for custom table patterns
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PatternConfig {
/// Name/description of this pattern
pub name: String,
/// Regular expression pattern to match filenames
/// Capture groups should be in order: kind, version, subcenter, center, language
pub regex: String,
/// Glob pattern for scanning directories
pub glob: String,
/// Mapping of regex capture groups to metadata fields
pub mapping: FieldMapping,
}
/// Defines which capture group corresponds to which metadata field
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldMapping {
/// Capture group index for table kind (B or D)
pub kind_group: usize,
/// Optional capture group index for version
pub version_group: Option<usize>,
/// Optional capture group index for subcenter
pub subcenter_group: Option<usize>,
/// Optional capture group index for center
pub center_group: Option<usize>,
/// Optional capture group index for language
pub language_group: Option<usize>,
/// Whether this pattern matches local tables
pub is_local: bool,
}
/// Runtime pattern compiled from configuration
pub struct ConfigurablePattern {
name: String,
regex: Regex,
glob: String,
mapping: FieldMapping,
}
impl ConfigurablePattern {
pub fn from_config(config: &PatternConfig) -> Result<Self> {
let regex = Regex::new(&config.regex)
.with_context(|| format!("Invalid regex pattern: {}", config.regex))?;
Ok(Self {
name: config.name.clone(),
regex,
glob: config.glob.clone(),
mapping: config.mapping.clone(),
})
}
}
impl TableFilePattern for ConfigurablePattern {
fn matches(&self, filename: &str) -> Option<TableMetadata> {
let caps = self.regex.captures(filename)?;
// Extract table kind
let kind_str = caps.get(self.mapping.kind_group)?.as_str();
let kind = match kind_str.to_lowercase().as_str() {
"b" => TableKind::B,
"d" => TableKind::D,
_ => return None,
};
// Extract version
let version = if let Some(idx) = self.mapping.version_group {
caps.get(idx).and_then(|m| m.as_str().parse().ok())
} else {
None
};
// Extract subcenter
let subcenter = if let Some(idx) = self.mapping.subcenter_group {
caps.get(idx).and_then(|m| m.as_str().parse().ok())
} else {
None
};
// Extract center
let center = if let Some(idx) = self.mapping.center_group {
caps.get(idx).and_then(|m| m.as_str().parse().ok())
} else {
None
};
// Extract language
let language = if let Some(idx) = self.mapping.language_group {
caps.get(idx).map(|m| m.as_str().to_string())
} else {
None
};
Some(TableMetadata {
kind,
version,
subcenter,
center,
language,
is_local: self.mapping.is_local,
filename: filename.to_string(),
})
}
fn glob_pattern(&self) -> &str {
&self.glob
}
fn description(&self) -> &str {
&self.name
}
}
/// Full configuration file structure
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ScanConfig {
/// List of custom patterns
#[serde(default)]
pub patterns: Vec<PatternConfig>,
}
impl ScanConfig {
/// Load configuration from a TOML file
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
let content = std::fs::read_to_string(path.as_ref())
.with_context(|| format!("Failed to read config file: {}", path.as_ref().display()))?;
let config: ScanConfig = toml::from_str(&content)
.with_context(|| format!("Failed to parse config file: {}", path.as_ref().display()))?;
Ok(config)
}
/// Create default configuration with example patterns
pub fn default_example() -> Self {
Self {
patterns: vec![
PatternConfig {
name: "ECMWF local tables".to_string(),
regex: r"^ecmwf_table([bd])_v(\d+)\.csv$".to_string(),
glob: "ecmwf_table*.csv".to_string(),
mapping: FieldMapping {
kind_group: 1,
version_group: Some(2),
subcenter_group: None,
center_group: None,
language_group: None,
is_local: true,
},
},
PatternConfig {
name: "NCEP local tables".to_string(),
regex: r"^ncep_bufrtab\.(\d+)\.([bd])$".to_string(),
glob: "ncep_bufrtab.*".to_string(),
mapping: FieldMapping {
kind_group: 2,
version_group: Some(1),
subcenter_group: None,
center_group: None,
language_group: None,
is_local: true,
},
},
],
}
}
/// Save configuration to a TOML file
pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let content = toml::to_string_pretty(self)
.context("Failed to serialize config")?;
std::fs::write(path.as_ref(), content)
.with_context(|| format!("Failed to write config file: {}", path.as_ref().display()))?;
Ok(())
}
/// Compile all patterns from this configuration
pub fn compile_patterns(&self) -> Result<Vec<Box<dyn TableFilePattern>>> {
let mut patterns: Vec<Box<dyn TableFilePattern>> = Vec::new();
for config in &self.patterns {
let pattern = ConfigurablePattern::from_config(config)
.with_context(|| format!("Failed to compile pattern: {}", config.name))?;
patterns.push(Box::new(pattern));
}
Ok(patterns)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_configurable_pattern() {
let config = PatternConfig {
name: "Test pattern".to_string(),
regex: r"^test_table([bd])_v(\d+)\.csv$".to_string(),
glob: "test_table*.csv".to_string(),
mapping: FieldMapping {
kind_group: 1,
version_group: Some(2),
subcenter_group: None,
center_group: None,
language_group: None,
is_local: true,
},
};
let pattern = ConfigurablePattern::from_config(&config).unwrap();
let meta = pattern.matches("test_tableb_v20.csv").unwrap();
assert_eq!(meta.kind, TableKind::B);
assert_eq!(meta.version, Some(20));
assert!(meta.is_local);
let meta = pattern.matches("test_tabled_v15.csv").unwrap();
assert_eq!(meta.kind, TableKind::D);
assert_eq!(meta.version, Some(15));
}
#[test]
fn test_config_serialization() {
let config = ScanConfig::default_example();
let toml_str = toml::to_string_pretty(&config).unwrap();
println!("Example config:\n{}", toml_str);
let parsed: ScanConfig = toml::from_str(&toml_str).unwrap();
assert_eq!(parsed.patterns.len(), config.patterns.len());
}
}

View File

@ -19,43 +19,36 @@ impl EntryLoader for BTableLoader {
type TableType = BTable;
fn process_entry(&mut self, raw: csv::StringRecord) -> anyhow::Result<Option<Self::Output>> {
let f = raw
.get(0)
.ok_or_else(|| anyhow::anyhow!("Missing F field"))?
.parse()?;
let parse_num_field = |index: usize| {
raw.get(index)
.map(|s| {
let mut s = s.to_string();
s.retain(|c| c.is_alphanumeric());
s
})
.ok_or_else(|| anyhow::anyhow!("Missing field at index {}", index))
};
let x = raw
.get(1)
.ok_or_else(|| anyhow::anyhow!("Missing X field"))?
.parse()?;
let parse_field = |index: usize| {
raw.get(index)
.map(|s| {
let s = s.to_string();
s
})
.ok_or_else(|| anyhow::anyhow!("Missing field at index {}", index))
};
let y = raw
.get(2)
.ok_or_else(|| anyhow::anyhow!("Missing Y field"))?
.parse()?;
let f = parse_num_field(0)?.parse()?;
let x = parse_num_field(1)?.parse()?;
let y = parse_num_field(2)?.parse()?;
let fxy = FXY::new(f, x, y);
let class_name_en = raw
.get(3)
.ok_or_else(|| anyhow::anyhow!("Missing Class Name EN"))?
.to_string();
let bufr_unit = raw
.get(4)
.ok_or_else(|| anyhow::anyhow!("Missing BUFR Unit"))?
.to_string();
let bufr_scale = raw
.get(5)
.ok_or_else(|| anyhow::anyhow!("Missing Scaling Field"))?
.parse()?;
let bufr_reference_value = raw
.get(6)
.ok_or_else(|| anyhow::anyhow!("Missing Reference Value Field"))?
.parse()?;
let bufr_datawidth_bits = raw
.get(7)
.ok_or_else(|| anyhow::anyhow!("Missing Datawidth Bits Field"))?
.parse()?;
let class_name_en = parse_field(3)?;
let bufr_unit = parse_field(4)?;
let bufr_scale = parse_num_field(5)?.parse()?;
let bufr_reference_value = parse_num_field(6)?.parse()?;
let bufr_datawidth_bits = parse_num_field(7)?.parse()?;
let entry = BTableEntry {
fxy,

View File

@ -4,10 +4,12 @@ use crate::{
tables::{DTable, DTableEntry},
};
use csv::StringRecord;
use std::collections::HashSet;
#[derive(Default)]
pub struct FRDTableLoader {
current_chain: Option<DTableEntry>,
seen_keys: HashSet<FXY>,
}
impl EntryLoader for FRDTableLoader {
@ -15,38 +17,58 @@ impl EntryLoader for FRDTableLoader {
type TableType = DTable;
fn process_entry(&mut self, raw: StringRecord) -> anyhow::Result<Option<Self::Output>> {
let f = raw
.get(0)
.ok_or_else(|| anyhow::anyhow!("Missing F field"))?
.parse::<u16>()?;
// Skip empty lines
if raw.len() < 6 {
return Ok(None);
}
let x = raw
.get(1)
.ok_or_else(|| anyhow::anyhow!("Missing X field"))?
.parse::<u16>()?;
if raw.iter().all(|s| s.trim().is_empty()) {
return Ok(None);
}
let y = raw
.get(2)
.ok_or_else(|| anyhow::anyhow!("Missing Y field"))?
.parse::<u16>()?;
let parse_field = |index: usize| {
raw.get(index)
.map(|s| {
let mut s = s.to_string();
s.retain(|c| c.is_alphanumeric());
s
})
.filter(|c| !c.is_empty())
.ok_or_else(|| anyhow::anyhow!("Missing field at index {}", index))
};
let f1 = raw
.get(3)
.ok_or_else(|| anyhow::anyhow!("Missing F1 field"))?;
// Check if this is a new sequence (columns 0-2 are not empty) or a continuation line
let is_new_sequence =
parse_field(0).is_ok() && parse_field(1).is_ok() && parse_field(2).is_ok();
let x1 = raw
.get(4)
.ok_or_else(|| anyhow::anyhow!("Missing X1 field"))?;
if is_new_sequence {
// Parse the sequence descriptor (columns 0-2)
let f = parse_field(0)?.parse()?;
let x = parse_field(1)?.parse()?;
let y = parse_field(2)?.parse()?;
let fxy = FXY::new(f, x, y);
let y1 = raw
.get(5)
.ok_or_else(|| anyhow::anyhow!("Missing Y1 field"))?;
// Check for duplicate key and skip if found
if self.seen_keys.contains(&fxy) {
eprintln!(
"Warning: Duplicate sequence descriptor {:?} - skipping",
fxy
);
// Skip duplicate entry - we'll ignore all lines for this sequence
return Ok(None);
}
let fxy1 = FXY::new(f1.parse()?, x1.parse()?, y1.parse()?);
// Parse the first element in the chain (columns 3-5)
let f1 = parse_field(3)?.parse()?;
let x1 = parse_field(4)?.parse()?;
let y1 = parse_field(5)?.parse()?;
let fxy1 = FXY::new(f1, x1, y1);
let fxy = FXY::new(f, x, y);
// Process the raw entry as needed
if self.current_chain.is_none() {
// If we have a current chain, it's finished - return it
let finished = self.current_chain.take();
// Start a new chain
self.seen_keys.insert(fxy);
let entry = DTableEntry {
fxy,
fxy_chain: vec![fxy1],
@ -59,36 +81,37 @@ impl EntryLoader for FRDTableLoader {
status: None,
};
self.current_chain = Some(entry);
return Ok(None);
return Ok(finished);
} else {
let fxy = FXY::new(f, x, y);
if self.current_chain.as_ref().unwrap().fxy != fxy {
// First take out the old completed chain
let finished = self.current_chain.take();
// Then create and save the new chain
let entry = DTableEntry {
fxy,
fxy_chain: vec![fxy1],
category: None,
category_of_sequences_en: None,
title_en: None,
subtitle_en: None,
note_en: None,
note_ids: None,
status: None,
};
self.current_chain = Some(entry);
return Ok(finished);
} else {
self.current_chain.as_mut().unwrap().fxy_chain.push(fxy1);
return Ok(None);
// Continuation line - add to current chain
if self.current_chain.is_none() {
return Err(anyhow::anyhow!(
"Continuation line without a sequence header"
));
}
// Parse the element in the chain (columns 3-5)
let f1 = parse_field(3)?.parse()?;
let x1 = parse_field(4)?.parse()?;
let y1 = parse_field(5)?.parse()?;
let fxy1 = FXY::new(f1, x1, y1);
self.current_chain.as_mut().unwrap().fxy_chain.push(fxy1);
return Ok(None);
}
}
fn finish(&mut self) -> anyhow::Result<Option<Self::Output>> {
Ok(self.current_chain.take())
let result = self.current_chain.take();
if let Some(ref entry) = result {
println!(
"Finishing with sequence: {:?} ({} elements)",
entry.fxy,
entry.fxy_chain.len()
);
}
Ok(result)
}
}

View File

@ -1,6 +1,6 @@
use crate::{
TableConverter,
tables::{TableEntry, TableEntryFull, TableTypeTrait},
tables::{TableEntryFull, TableTypeTrait},
};
use csv::{ReaderBuilder, StringRecord};
pub mod btable;
@ -25,18 +25,28 @@ impl<C: EntryLoader> TableLoader<C> {
let mut rdr = ReaderBuilder::new()
.has_headers(false)
.delimiter(b';')
.flexible(false)
.flexible(true)
.from_path(path)?;
let mut line_num = 1;
for result in rdr.records() {
line_num += 1;
match result {
Ok(record) => {
if let Some(processed_entry) = loader.process_entry(record)? {
entries.push(processed_entry);
Ok(record) => match loader.process_entry(record) {
Ok(Some(processed_entry)) => {
entries.push(processed_entry);
}
}
Err(e) => {
eprintln!(
"Warning: Skipping line {} in {}: {}",
line_num,
path.display(),
e
);
}
_ => {}
},
Err(e) => {
eprintln!(
"Warning: Skipping line {} in {}: {}",

View File

@ -1,4 +1,10 @@
pub mod config;
pub mod fr;
#[cfg(feature = "opera")]
pub mod opera;
#[cfg(feature = "opera")]
pub use opera::bitmap::OPERABitmap;
pub mod pattern;
pub mod prelude;
pub mod tables;
mod utils;
@ -6,14 +12,17 @@ pub mod wmo;
use anyhow::Context;
use memmap2::Mmap;
use ph::fmph::GOFunction;
use rkyv::Archive;
use rkyv::api::high::HighValidator;
use rkyv::bytecheck::CheckBytes;
use rkyv::rancor::Error;
use rkyv::{Archive, Archived, Deserialize, Serialize};
use serde::{Deserialize as SerdeDeserialize, Serialize as SerdeSerialize};
use std::fmt::Debug;
use std::io::Write;
use std::fmt::{Debug, Display};
use std::io::{Cursor, Write};
use std::path::Path;
use crate::tables::{TableEntryFull, TableTypeTrait};
use crate::tables::{TableEntry, TableEntryFull, TableTypeTrait};
use rkyv::{api::high::to_bytes_with_alloc, ser::allocator::Arena};
pub trait TableConverter {
type OutputEntry: TableEntryFull;
@ -27,15 +36,25 @@ pub trait TableConverter {
struct BufrTableMph<T: TableEntryFull> {
mphf: GOFunction,
offsets: Vec<u64>,
mmap: Mmap,
_marker: std::marker::PhantomData<T>,
}
impl<T: TableEntryFull> BufrTableMph<T> {
fn build(entries: Vec<T>, output_path: &str) -> std::io::Result<Self> {
println!("Building MPH table with {} entries...", entries.len());
#[derive(Archive, Deserialize, Serialize, PartialEq)]
#[rkyv(compare(PartialEq))]
struct BUFRTF<T>
where
T: TableEntryFull,
{
pub function_header: Vec<u8>,
pub entries: Vec<T>,
}
impl<T> BUFRTF<T>
where
T: TableEntryFull,
{
fn new(entries: Vec<T>) -> std::io::Result<Self> {
let keys: Vec<FXY> = entries.iter().map(|e| e.fxy()).collect();
let mphf = GOFunction::from_slice(&keys);
let mut sorted_entries: Vec<(usize, T)> = entries
@ -44,134 +63,84 @@ impl<T: TableEntryFull> BufrTableMph<T> {
.collect();
sorted_entries.sort_by_key(|(hash, _)| *hash);
// Serialize MPHF to bytes
let mut mphf_bytes = Vec::new();
mphf.write(&mut mphf_bytes)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
// Prepare data entries
let mut data_bytes = Vec::new();
let mut offsets = Vec::with_capacity(sorted_entries.len());
Ok(Self {
function_header: mphf_bytes,
entries: sorted_entries.into_iter().map(|(_, e)| e).collect(),
})
}
for (_, entry) in sorted_entries {
let offset = data_bytes.len() as u64;
offsets.push(offset);
fn write_to_disk<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
let path = path.as_ref();
let mut file = std::fs::File::create(path)?;
let bytes = rkyv::to_bytes::<Error>(self)?;
file.write_all(&bytes)?;
Ok(())
}
}
let bytes = rkyv::to_bytes::<Error>(&entry)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
impl<T: TableEntryFull> BufrTableMph<T>
where
<T as Archive>::Archived: for<'a> CheckBytes<HighValidator<'a, Error>>,
{
fn bufrtbl_path<P: AsRef<Path>>(path: P) -> std::path::PathBuf {
let mut path = path.as_ref().to_path_buf();
path.set_extension("bufrtbl");
path
}
let len = bytes.len() as u32;
data_bytes.write_all(&len.to_le_bytes())?;
data_bytes.write_all(&bytes)?;
}
// Serialize offsets as raw bytes
let offsets_count = offsets.len() as u32;
let mut offsets_bytes = Vec::with_capacity(offsets.len() * 8);
for offset in &offsets {
offsets_bytes.extend_from_slice(&offset.to_le_bytes());
}
// Write merged file
let merged_path = format!("{}.bufrtbl", output_path);
let mut merged_file = std::fs::File::create(&merged_path)?;
// Write header: mphf_size (u32) + offsets_count (u32)
merged_file.write_all(&(mphf_bytes.len() as u32).to_le_bytes())?;
merged_file.write_all(&offsets_count.to_le_bytes())?;
// Write MPHF data
merged_file.write_all(&mphf_bytes)?;
// Write offsets (already 8-byte aligned since each offset is u64)
merged_file.write_all(&offsets_bytes)?;
// Write table entries data
merged_file.write_all(&data_bytes)?;
println!("Built successfully!");
println!(
" Merged file: {} bytes",
std::fs::metadata(&merged_path)?.len()
);
fn build<P: AsRef<Path>>(entries: Vec<T>, output_path: P) -> anyhow::Result<Self> {
let output_path = Self::bufrtbl_path(output_path);
let bufrtf = BUFRTF::new(entries)?;
bufrtf.write_to_disk(&output_path)?;
Self::load(output_path)
}
fn load<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
let mut path = path.as_ref().to_path_buf();
path.set_extension("bufrtbl");
fn load<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
let path = Self::bufrtbl_path(path);
let merged_file = std::fs::File::open(&path)?;
let mmap = unsafe { Mmap::map(&merged_file)? };
// Read header
let mphf_size = u32::from_le_bytes(mmap[0..4].try_into().unwrap()) as usize;
let offsets_count = u32::from_le_bytes(mmap[4..8].try_into().unwrap()) as usize;
let archived = rkyv::access::<ArchivedBUFRTF<T>, Error>(&mmap)?;
let function_reader = &archived.function_header[..];
// Calculate section positions
let mphf_start = 8;
let mphf_end = mphf_start + mphf_size;
let offsets_start = mphf_end;
let offsets_size = offsets_count * 8; // each offset is u64 (8 bytes)
let offsets_end = offsets_start + offsets_size;
let data_start = offsets_end;
// Load MPHF
let mphf_bytes = &mmap[mphf_start..mphf_end];
let mphf: GOFunction = GOFunction::read(&mut &mphf_bytes[..])
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?;
// Load offsets
let offsets_bytes = &mmap[offsets_start..offsets_end];
let mut offsets = Vec::with_capacity(offsets_count);
for i in 0..offsets_count {
let offset_bytes = &offsets_bytes[i * 8..(i + 1) * 8];
let offset = u64::from_le_bytes(offset_bytes.try_into().unwrap());
offsets.push(offset + data_start as u64); // Adjust offset for data section
}
let mut cursor = Cursor::new(function_reader);
Ok(Self {
mphf,
offsets,
mphf: GOFunction::read(&mut cursor)?,
mmap,
_marker: std::marker::PhantomData,
})
}
/// 获取拥有的版本
fn get(&self, fxy: FXY) -> Option<T> {
fn get(&self, fxy: FXY) -> Option<&<T as Archive>::Archived> {
let hash = self.mphf.get(&fxy)? as usize;
let offset = *self.offsets.get(hash)? as usize;
let len_bytes = self.mmap.get(offset..offset + 4)?;
let len = u32::from_le_bytes(len_bytes.try_into().ok()?) as usize;
self.archived().ok()?.entries.get(hash)
}
let data = self.mmap.get(offset + 4..offset + 4 + len)?;
let archived = unsafe { rkyv::access_unchecked::<<T as TableEntryFull>::Archived>(data) };
rkyv::deserialize::<T, Error>(archived).ok()
fn archived(&self) -> anyhow::Result<&ArchivedBUFRTF<T>> {
let archived = rkyv::access::<ArchivedBUFRTF<T>, Error>(&self.mmap)?;
Ok(archived)
}
/// 获取所有条目
fn get_all(&self) -> Vec<T> {
let mut entries = Vec::new();
for offset in &self.offsets {
let offset = *offset as usize;
if let Some(len_bytes) = self.mmap.get(offset..offset + 4) {
if let Ok(len_bytes_array) = len_bytes.try_into() {
let len = u32::from_le_bytes(len_bytes_array) as usize;
if let Some(data) = self.mmap.get(offset + 4..offset + 4 + len) {
let archived = unsafe {
rkyv::access_unchecked::<<T as TableEntryFull>::Archived>(data)
};
if let Ok(entry) = rkyv::deserialize::<T, Error>(archived) {
entries.push(entry);
}
}
}
}
fn get_all(&self) -> Vec<&<T as Archive>::Archived> {
if let Ok(archived) = self.archived() {
let mut result = vec![];
archived.entries.iter().for_each(|entry| {
result.push(entry);
});
result
} else {
vec![]
}
entries
}
}
@ -188,7 +157,7 @@ impl<T: TableEntryFull> BufrTableMph<T> {
Copy,
std::hash::Hash,
)]
#[rkyv(compare(PartialEq), derive(Debug))]
#[rkyv(compare(PartialEq), derive(Debug, Clone, Copy))]
pub struct FXY {
pub f: u16,
pub x: u16,
@ -240,7 +209,10 @@ pub struct BUFRTableMPH<T: TableTypeTrait> {
inner: BufrTableMph<T::EntryType>,
}
impl<T: TableTypeTrait> BUFRTableMPH<T> {
impl<T: TableTypeTrait> BUFRTableMPH<T>
where
<T::EntryType as Archive>::Archived: for<'a> CheckBytes<HighValidator<'a, Error>>,
{
pub fn build_from_csv<P: AsRef<Path>, L: TableConverter>(
loader: L,
path: P,
@ -249,25 +221,25 @@ impl<T: TableTypeTrait> BUFRTableMPH<T> {
where
L: TableConverter<OutputEntry = T::EntryType>,
L: TableConverter<TableType = T>,
<T::EntryType as Archive>::Archived: for<'a> CheckBytes<HighValidator<'a, Error>>,
{
let entries = loader.convert(path)?;
let bhm =
BufrTableMph::<T::EntryType>::build(entries, output_path.as_ref().to_str().unwrap())?;
let bhm = BufrTableMph::<T::EntryType>::build(entries, output_path)?;
Ok(BUFRTableMPH { inner: bhm })
}
pub fn get_all_entries(&self) -> Vec<&<T::EntryType as Archive>::Archived> {
self.inner.get_all()
}
pub fn load_from_disk<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
let bhm: BufrTableMph<T::EntryType> = BufrTableMph::load(path)?;
let bhm = BufrTableMph::<T::EntryType>::load(path)?;
Ok(BUFRTableMPH { inner: bhm })
}
pub fn lookup(&self, fxy: FXY) -> anyhow::Result<Option<T::EntryType>> {
Ok(self.inner.get(fxy))
}
pub fn get_all_entries(&self) -> Vec<T::EntryType> {
self.inner.get_all()
pub fn lookup(&self, fxy: FXY) -> Option<&<T::EntryType as Archive>::Archived> {
self.inner.get(fxy)
}
}
@ -280,7 +252,32 @@ pub enum TableType {
#[cfg(test)]
mod test {
use crate::{
BUFRTableMPH, BufrTableMph, FXY,
prelude::{BUFRTableB, BUFRTableD},
wmo::{TableLoader, btable::BTableCsvLoader},
};
#[test]
fn test() {}
fn test() {
let table_loader = TableLoader::<BTableCsvLoader>::default();
BUFRTableB::build_from_csv(
table_loader,
"/Users/xiang.li1/projects/rbufr/BUFR4/BUFRCREX_TableB_en_42.csv",
"./test.bufrtbl",
)
.unwrap();
}
#[test]
fn load() {
let table = BUFRTableD::load_from_disk(
"/Users/xiang.li1/projects/rbufr/rbufr/tables/master/BUFR_TableD_16.bufrtbl",
)
.unwrap();
let x = table.lookup(FXY::new(3, 21, 11)).unwrap();
println!("{:#?}", x);
}
}

View File

@ -2,6 +2,8 @@ use anyhow::{Context, Result, anyhow};
use clap::{Parser, Subcommand};
use genlib::{
TableType,
config::ScanConfig,
pattern::{TableKind, TableScanner},
prelude::{BUFRTableB, BUFRTableD},
};
use std::path::{Path, PathBuf};
@ -29,6 +31,14 @@ enum Commands {
/// Table type to process: "d", "b", or "all"
#[arg(short, long, default_value = "all")]
table_type: String,
/// Optional config file with custom patterns
#[arg(short, long)]
config: Option<PathBuf>,
/// Loader type: "auto" (try all), "wmo" (WMO only), "fr" (French only)
#[arg(short, long, default_value = "auto")]
loader: String,
},
/// Convert a single BUFR table file
Convert {
@ -43,6 +53,10 @@ enum Commands {
/// Table type: "d" for Table D, "b" for Table B
#[arg(short, long)]
table_type: String,
/// Loader type: "auto" (try all), "wmo" (WMO only), "fr" (French only)
#[arg(short, long, default_value = "auto")]
loader: String,
},
/// Print a BUFR table in formatted output
Print {
@ -58,6 +72,12 @@ enum Commands {
#[arg(short, long)]
limit: Option<usize>,
},
/// Generate example configuration file
GenConfig {
/// Output path for the configuration file
#[arg(short, long, default_value = "scan-config.toml")]
output: PathBuf,
},
}
fn main() -> Result<()> {
@ -68,15 +88,18 @@ fn main() -> Result<()> {
input,
output,
table_type,
config,
loader,
} => {
scan_and_convert(&input, &output, &table_type)?;
scan_and_convert(&input, &output, &table_type, config.as_deref(), &loader)?;
}
Commands::Convert {
input,
output,
table_type,
loader,
} => {
convert_single_file(&input, &output, &table_type)?;
convert_single_file(&input, &output, &table_type, &loader)?;
}
Commands::Print {
input,
@ -85,50 +108,108 @@ fn main() -> Result<()> {
} => {
print_table(&input, &table_type, limit)?;
}
Commands::GenConfig { output } => {
generate_config_file(&output)?;
}
}
Ok(())
}
fn scan_and_convert(input_dir: &Path, output_dir: &Path, table_type: &str) -> Result<()> {
fn scan_and_convert(
input_dir: &Path,
output_dir: &Path,
table_type: &str,
config_path: Option<&Path>,
loader_type: &str,
) -> Result<()> {
// Create output directory if it doesn't exist
std::fs::create_dir_all(output_dir).context("Failed to create output directory")?;
println!("Scanning directory: {}", input_dir.display());
println!("Output directory: {}", output_dir.display());
println!("Table type: {}", table_type);
println!("Loader type: {}", loader_type);
println!();
// Create scanner with built-in patterns
let mut scanner = TableScanner::new();
// Load custom patterns from config file if provided
if let Some(config_file) = config_path {
println!("Loading custom patterns from: {}", config_file.display());
let config =
ScanConfig::load_from_file(config_file).context("Failed to load config file")?;
let custom_patterns = config
.compile_patterns()
.context("Failed to compile custom patterns")?;
println!("Loaded {} custom patterns", custom_patterns.len());
for pattern in custom_patterns {
scanner.add_pattern(pattern);
}
println!();
}
// Display registered patterns
println!("Registered patterns:");
for pattern in scanner.patterns() {
println!(" - {}", pattern.description());
}
println!();
// Determine which table kinds to process
let kind_filter = match table_type.to_lowercase().as_str() {
"b" => Some(TableKind::B),
"d" => Some(TableKind::D),
"all" => None,
_ => anyhow::bail!("Invalid table type: {}. Use 'b', 'd', or 'all'", table_type),
};
// Scan directory
let files = scanner
.scan_directory(input_dir, kind_filter)
.context("Failed to scan directory")?;
println!("Found {} matching files", files.len());
println!();
let mut processed_count = 0;
let mut error_count = 0;
// Scan for BUFR_TableD files
if table_type == "d" || table_type == "all" {
println!("Processing Table D files...");
let pattern = input_dir.join("BUFR_TableD_*.csv");
for entry in glob::glob(pattern.to_str().unwrap()).context("Failed to read glob pattern")? {
match entry {
Ok(path) => {
let mut filename = path.file_stem().unwrap().to_str().unwrap().to_string();
if filename.contains("BUFRCREX") {
filename = filename.replace("BUFRCREX_", "BUFR_");
}
let output_path = output_dir.join(filename);
// Group files by table kind for organized output
let mut table_b_files = Vec::new();
let mut table_d_files = Vec::new();
print!(" Converting {} ... ", path.display());
match convert_table_d(&path, &output_path) {
Ok(_) => {
println!("OK");
processed_count += 1;
}
Err(e) => {
println!("ERROR: {}", e);
error_count += 1;
}
}
for (path, metadata) in files {
match metadata.kind {
TableKind::B => table_b_files.push((path, metadata)),
TableKind::D => table_d_files.push((path, metadata)),
}
}
// Process Table D files
if !table_d_files.is_empty() {
println!("Processing Table D files ({})...", table_d_files.len());
for (path, metadata) in table_d_files {
let output_name = metadata.output_name();
let output_path = output_dir.join(&output_name);
let file_type = if metadata.is_local { "local" } else { "WMO" };
print!(
" Converting {} ({}) ... ",
path.file_name().unwrap().to_str().unwrap(),
file_type
);
match convert_table_d(&path, &output_path, loader_type) {
Ok(_) => {
println!("OK -> {}", output_name);
processed_count += 1;
}
Err(e) => {
println!("Error reading file: {}", e);
println!("ERROR: {}", e);
error_count += 1;
}
}
@ -136,34 +217,27 @@ fn scan_and_convert(input_dir: &Path, output_dir: &Path, table_type: &str) -> Re
println!();
}
// Scan for BUFRCREX_TableB files
if table_type == "b" || table_type == "all" {
println!("Processing Table B files...");
let pattern = input_dir.join("BUFRCREX_TableB_*.csv");
// Process Table B files
if !table_b_files.is_empty() {
println!("Processing Table B files ({})...", table_b_files.len());
for (path, metadata) in table_b_files {
let output_name = metadata.output_name();
let output_path = output_dir.join(&output_name);
for entry in glob::glob(pattern.to_str().unwrap()).context("Failed to read glob pattern")? {
match entry {
Ok(path) => {
let mut filename = path.file_stem().unwrap().to_str().unwrap().to_string();
if filename.contains("BUFRCREX") {
filename = filename.replace("BUFRCREX_", "BUFR_");
}
let output_path = output_dir.join(filename);
let file_type = if metadata.is_local { "local" } else { "WMO" };
print!(
" Converting {} ({}) ... ",
path.file_name().unwrap().to_str().unwrap(),
file_type
);
print!(" Converting {} ... ", path.display());
match convert_table_b(&path, &output_path) {
Ok(_) => {
println!("OK");
processed_count += 1;
}
Err(e) => {
eprintln!("ERROR: {}", e);
error_count += 1;
}
}
match convert_table_b(&path, &output_path, loader_type) {
Ok(_) => {
println!("OK -> {}", output_name);
processed_count += 1;
}
Err(e) => {
eprintln!("Error reading file: {}", e);
eprintln!("ERROR: {}", e);
error_count += 1;
}
}
@ -182,16 +256,22 @@ fn scan_and_convert(input_dir: &Path, output_dir: &Path, table_type: &str) -> Re
Ok(())
}
fn convert_single_file(input_path: &Path, output_path: &Path, table_type: &str) -> Result<()> {
fn convert_single_file(
input_path: &Path,
output_path: &Path,
table_type: &str,
loader_type: &str,
) -> Result<()> {
println!(
"Converting {} to {}",
input_path.display(),
output_path.display()
);
println!("Loader type: {}", loader_type);
match table_type.to_lowercase().as_str() {
"d" => convert_table_d(input_path, output_path)?,
"b" => convert_table_b(input_path, output_path)?,
"d" => convert_table_d(input_path, output_path, loader_type)?,
"b" => convert_table_b(input_path, output_path, loader_type)?,
_ => anyhow::bail!("Invalid table type: {}. Use 'd' or 'b'", table_type),
}
@ -232,12 +312,29 @@ fn build_fr_d(input_path: &Path, output_path: &Path) -> Result<()> {
BUFRTableD::build_from_csv(loader, input_path, output_path).map(|_| ())
}
fn convert_table_d(input_path: &Path, output_path: &Path) -> Result<()> {
const ATTEMPTS: &[(&str, BuildFn)] = &[
("WMO Table D loader", build_wmo_d),
("FR Table D loader", build_fr_d),
];
run_with_fallbacks(TableType::D, input_path, output_path, ATTEMPTS)
fn convert_table_d(input_path: &Path, output_path: &Path, loader_type: &str) -> Result<()> {
match loader_type.to_lowercase().as_str() {
"wmo" => {
// WMO only
build_wmo_d(input_path, output_path)
}
"fr" => {
// French only
build_fr_d(input_path, output_path)
}
"auto" => {
// Try all loaders
const ATTEMPTS: &[(&str, BuildFn)] = &[
("WMO Table D loader", build_wmo_d),
("FR Table D loader", build_fr_d),
];
run_with_fallbacks(TableType::D, input_path, output_path, ATTEMPTS)
}
_ => anyhow::bail!(
"Invalid loader type: {}. Use 'auto', 'wmo', or 'fr'",
loader_type
),
}
}
fn build_wmo_b(input_path: &Path, output_path: &Path) -> Result<()> {
@ -250,12 +347,29 @@ fn build_fr_b(input_path: &Path, output_path: &Path) -> Result<()> {
BUFRTableB::build_from_csv(loader, input_path, output_path).map(|_| ())
}
fn convert_table_b(input_path: &Path, output_path: &Path) -> Result<()> {
const ATTEMPTS: &[(&str, BuildFn)] = &[
("WMO Table B loader", build_wmo_b),
("FR Table B loader", build_fr_b),
];
run_with_fallbacks(TableType::B, input_path, output_path, ATTEMPTS)
fn convert_table_b(input_path: &Path, output_path: &Path, loader_type: &str) -> Result<()> {
match loader_type.to_lowercase().as_str() {
"wmo" => {
// WMO only
build_wmo_b(input_path, output_path)
}
"fr" => {
// French only
build_fr_b(input_path, output_path)
}
"auto" => {
// Try all loaders
const ATTEMPTS: &[(&str, BuildFn)] = &[
("WMO Table B loader", build_wmo_b),
("FR Table B loader", build_fr_b),
];
run_with_fallbacks(TableType::B, input_path, output_path, ATTEMPTS)
}
_ => anyhow::bail!(
"Invalid loader type: {}. Use 'auto', 'wmo', or 'fr'",
loader_type
),
}
}
fn print_table(input_path: &Path, table_type: &str, limit: Option<usize>) -> Result<()> {
@ -333,3 +447,33 @@ fn print_table_b(input_path: &Path, limit: Option<usize>) -> Result<()> {
Ok(())
}
fn generate_config_file(output_path: &Path) -> Result<()> {
println!(
"Generating example configuration file: {}",
output_path.display()
);
// Create example configuration
let config = ScanConfig::default_example();
// Save to file
config
.save_to_file(output_path)
.context("Failed to save configuration file")?;
println!("Configuration file generated successfully!");
println!();
println!("The configuration file contains example patterns for:");
for pattern_config in &config.patterns {
println!(" - {}", pattern_config.name);
}
println!();
println!("Edit this file to add your own custom patterns.");
println!(
"Use it with: gen-ctl scan -i <input> -o <output> -c {}",
output_path.display()
);
Ok(())
}

82
gen/src/opera/bitmap.rs Normal file
View File

@ -0,0 +1,82 @@
use csv::ReaderBuilder;
use std::{fs::File, path::Path};
pub struct OPERABitmap {
bitmap: Vec<OPERABitmapEntry>,
}
impl OPERABitmap {
pub fn load<P: AsRef<Path>>(p: P) -> anyhow::Result<Self> {
let mut rdr = ReaderBuilder::new()
.has_headers(false)
.delimiter(b';')
.from_reader(File::open(p)?);
let mut results = default_bitmap();
let mut line: usize = 0;
for result in rdr.records() {
let record = result?;
let parse_field = |idx: usize| {
record
.get(idx)
.map(|s| s.trim().to_string())
.ok_or_else(|| {
anyhow::anyhow!("Parse Opera Bitmap File failed at index {}", idx)
})
};
let f = parse_field(0)?.parse()?;
let x = parse_field(1)?.parse()?;
let y = parse_field(2)?.parse()?;
let dw = parse_field(3)?.parse()?;
let entry = OPERABitmapEntry {
f,
x,
y,
datawidth_bits: dw,
};
if let Some(line) = results.get_mut(line) {
*line = entry;
} else {
results.push(entry);
}
line += 1;
}
Ok(OPERABitmap { bitmap: results })
}
}
pub struct OPERABitmapEntry {
pub f: u16,
pub x: u16,
pub y: u16,
pub datawidth_bits: u8,
}
fn default_bitmap() -> Vec<OPERABitmapEntry> {
const VALUES: [(u16, u16, u16, u8); 8] = [
(3, 21, 192, 1),
(3, 21, 193, 1),
(3, 21, 194, 1),
(3, 21, 195, 1),
(3, 21, 196, 1),
(3, 21, 197, 1),
(3, 21, 200, 2),
(3, 21, 202, 2),
];
VALUES
.iter()
.map(|(f, x, y, dw)| OPERABitmapEntry {
f: *f,
x: *x,
y: *y,
datawidth_bits: *dw,
})
.collect()
}

1
gen/src/opera/mod.rs Normal file
View File

@ -0,0 +1 @@
pub mod bitmap;

535
gen/src/pattern.rs Normal file
View File

@ -0,0 +1,535 @@
use anyhow::{Context, Result};
use regex::Regex;
use std::path::{Path, PathBuf};
/// Represents the type of BUFR table
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TableKind {
B,
D,
}
impl TableKind {
pub fn as_str(&self) -> &str {
match self {
TableKind::B => "b",
TableKind::D => "d",
}
}
}
/// Metadata extracted from a table filename
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TableMetadata {
/// Table type (B or D)
pub kind: TableKind,
/// Version number (e.g., 35 for BUFRCREX_TableB_en_35.csv)
pub version: Option<u32>,
/// Subcenter ID (for local tables)
pub subcenter: Option<u32>,
/// Originating center ID (for local tables)
pub center: Option<u32>,
/// Language code (e.g., "en")
pub language: Option<String>,
/// Whether this is a local table
pub is_local: bool,
/// Original filename
pub filename: String,
}
impl TableMetadata {
/// Generate an output filename based on metadata
///
/// Naming rules:
/// - WMO standard tables: BUFR_TableB_{version} or BUFR_TableD_{version}
/// Example: BUFR_TableB_14
///
/// - Local tables with subcenter: BUFR_TableB_{subcenter}_{version}
/// Example: BUFR_TableB_1_14 (subcenter 1, version 14)
pub fn output_name(&self) -> String {
let kind = match self.kind {
TableKind::B => "TableB",
TableKind::D => "TableD",
};
if self.is_local && self.subcenter.is_some() {
// Format: BUFR_Table{B|D}_{subcenter}_{version}
let subcenter = self.subcenter.unwrap();
let version = self.version.unwrap_or(0);
format!("BUFR_{}_{}_{}", kind, subcenter, version)
} else {
// Format: BUFR_Table{B|D}_{version}
let version = self.version.unwrap_or(0);
format!("BUFR_{}_{}", kind, version)
}
}
}
/// A pattern for matching table filenames
pub trait TableFilePattern: Send + Sync {
/// Try to match a filename and extract metadata
fn matches(&self, filename: &str) -> Option<TableMetadata>;
/// Get a glob pattern for scanning directories
fn glob_pattern(&self) -> &str;
/// Get a description of this pattern
fn description(&self) -> &str;
}
/// WMO standard table pattern
/// Examples:
/// - BUFRCREX_TableB_en_35.csv
/// - BUFR_TableD_en_40.csv
#[derive(Debug)]
pub struct WMOPattern {
regex: Regex,
}
impl Default for WMOPattern {
fn default() -> Self {
Self::new()
}
}
impl WMOPattern {
pub fn new() -> Self {
// Pattern: (BUFR|BUFRCREX)_Table(B|D)_([a-z]{2})_(\d+)\.csv
let regex = Regex::new(r"^(?:BUFR(?:CREX)?)_Table([BD])_([a-z]{2})_(\d+)\.csv$")
.expect("Invalid regex");
Self { regex }
}
}
impl TableFilePattern for WMOPattern {
fn matches(&self, filename: &str) -> Option<TableMetadata> {
let caps = self.regex.captures(filename)?;
let kind = match &caps[1] {
"B" => TableKind::B,
"D" => TableKind::D,
_ => return None,
};
let language = caps[2].to_string();
let version = caps[3].parse().ok()?;
Some(TableMetadata {
kind,
version: Some(version),
subcenter: None,
center: None,
language: Some(language),
is_local: false,
filename: filename.to_string(),
})
}
fn glob_pattern(&self) -> &str {
"*Table[BD]_*.csv"
}
fn description(&self) -> &str {
"WMO standard tables (BUFR_Table[BD]_en_*.csv)"
}
}
/// Local table pattern
/// Examples:
/// - localtabb_85_20.csv (subcenter 85, version 20)
/// - localtabd_100_5.csv (subcenter 100, version 5)
#[derive(Debug)]
pub struct LocalPattern {
regex: Regex,
}
impl Default for LocalPattern {
fn default() -> Self {
Self::new()
}
}
impl LocalPattern {
pub fn new() -> Self {
// Pattern: localtab(b|d)_(\d+)_(\d+)\.csv
let regex = Regex::new(r"^localtab([bd])_(\d+)_(\d+)\.csv$").expect("Invalid regex");
Self { regex }
}
}
impl TableFilePattern for LocalPattern {
fn matches(&self, filename: &str) -> Option<TableMetadata> {
let caps = self.regex.captures(filename)?;
let kind = match &caps[1] {
"b" => TableKind::B,
"d" => TableKind::D,
_ => return None,
};
let subcenter = caps[2].parse().ok()?;
let version = caps[3].parse().ok()?;
Some(TableMetadata {
kind,
version: Some(version),
subcenter: Some(subcenter),
center: None,
language: None,
is_local: true,
filename: filename.to_string(),
})
}
fn glob_pattern(&self) -> &str {
"localtab[bd]_*.csv"
}
fn description(&self) -> &str {
"Local tables (localtab[bd]_subcenter_version.csv)"
}
}
pub struct OldMasterPattern {
regex: Regex,
}
impl OldMasterPattern {
pub fn new() -> Self {
// Pattern: localtab(b|d)_(\d+)_(\d+)\.csv
let regex = Regex::new(r"^bufrtab([bd])_(\d+)\.csv$").expect("Invalid regex");
Self { regex }
}
}
impl TableFilePattern for OldMasterPattern {
fn matches(&self, filename: &str) -> Option<TableMetadata> {
let caps = self.regex.captures(filename)?;
let kind = match &caps[1] {
"b" => TableKind::B,
"d" => TableKind::D,
_ => return None,
};
let version = caps[2].parse().ok()?;
Some(TableMetadata {
kind,
version: Some(version),
subcenter: None,
center: None,
is_local: false,
language: None,
filename: filename.to_string(),
})
}
fn glob_pattern(&self) -> &str {
"bufrtab[bd]_*.csv"
}
fn description(&self) -> &str {
"Old master tables (bufrtab[bd]_version.csv)"
}
}
/// Custom pattern with flexible center/subcenter
/// Examples:
/// - center_7_subcenter_85_tableb_v20.csv
/// - c7_sc85_d_v20.csv
#[derive(Debug)]
pub struct CustomPattern {
regex: Regex,
}
impl Default for CustomPattern {
fn default() -> Self {
Self::new()
}
}
impl CustomPattern {
pub fn new() -> Self {
// Pattern: .*_c(\d+)_sc(\d+)_([bd])_v?(\d+)\.csv
let regex =
Regex::new(r"(?i).*_?c(?:enter)?_?(\d+)_sc(?:enter)?_?(\d+)_table([bd])_v?(\d+)\.csv$")
.expect("Invalid regex");
Self { regex }
}
}
impl TableFilePattern for CustomPattern {
fn matches(&self, filename: &str) -> Option<TableMetadata> {
let caps = self.regex.captures(filename)?;
let center = caps[1].parse().ok()?;
let subcenter = caps[2].parse().ok()?;
let kind = match caps[3].to_lowercase().as_str() {
"b" => TableKind::B,
"d" => TableKind::D,
_ => return None,
};
let version = caps[4].parse().ok()?;
Some(TableMetadata {
kind,
version: Some(version),
subcenter: Some(subcenter),
center: Some(center),
language: None,
is_local: true,
filename: filename.to_string(),
})
}
fn glob_pattern(&self) -> &str {
"*_c*_sc*_table*_*.csv"
}
fn description(&self) -> &str {
"Custom center/subcenter tables (*_c{center}_sc{subcenter}_table[bd]_v{version}.csv)"
}
}
/// Scanner that tries multiple patterns
pub struct TableScanner {
patterns: Vec<Box<dyn TableFilePattern>>,
}
impl Default for TableScanner {
fn default() -> Self {
Self::new()
}
}
impl TableScanner {
pub fn new() -> Self {
Self {
patterns: vec![
Box::new(WMOPattern::new()),
Box::new(OldMasterPattern::new()),
Box::new(LocalPattern::new()),
Box::new(CustomPattern::new()),
],
}
}
/// Create scanner with custom patterns
pub fn with_patterns(patterns: Vec<Box<dyn TableFilePattern>>) -> Self {
Self { patterns }
}
/// Add a pattern to the scanner
pub fn add_pattern(&mut self, pattern: Box<dyn TableFilePattern>) {
self.patterns.push(pattern);
}
/// Try to match a filename with any registered pattern
pub fn match_filename(&self, filename: &str) -> Option<TableMetadata> {
for pattern in &self.patterns {
if let Some(metadata) = pattern.matches(filename) {
return Some(metadata);
}
}
None
}
/// Scan a directory for matching files
pub fn scan_directory<P: AsRef<Path>>(
&self,
dir: P,
kind_filter: Option<TableKind>,
) -> Result<Vec<(PathBuf, TableMetadata)>> {
let dir = dir.as_ref();
let mut results = Vec::new();
// Try each pattern
for pattern in &self.patterns {
let glob_pattern = dir.join(pattern.glob_pattern());
for entry in
glob::glob(glob_pattern.to_str().unwrap()).context("Failed to read glob pattern")?
{
match entry {
Ok(path) => {
if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
if let Some(metadata) = pattern.matches(filename) {
// Apply kind filter if specified
if let Some(filter_kind) = kind_filter {
if metadata.kind != filter_kind {
continue;
}
}
results.push((path, metadata));
}
}
}
Err(e) => {
eprintln!("Warning: Error reading file entry: {}", e);
}
}
}
}
// Remove duplicates (same file matched by multiple patterns)
results.sort_by(|a, b| a.0.cmp(&b.0));
results.dedup_by(|a, b| a.0 == b.0);
Ok(results)
}
/// Get all registered patterns
pub fn patterns(&self) -> &[Box<dyn TableFilePattern>] {
&self.patterns
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_wmo_pattern() {
let pattern = WMOPattern::new();
// Valid WMO patterns
let meta = pattern.matches("BUFRCREX_TableB_en_35.csv").unwrap();
assert_eq!(meta.kind, TableKind::B);
assert_eq!(meta.version, Some(35));
assert_eq!(meta.language, Some("en".to_string()));
assert!(!meta.is_local);
let meta = pattern.matches("BUFR_TableD_en_40.csv").unwrap();
assert_eq!(meta.kind, TableKind::D);
assert_eq!(meta.version, Some(40));
assert!(!meta.is_local);
// Invalid patterns
assert!(pattern.matches("BUFRCREX_TableB_35.csv").is_none());
assert!(pattern.matches("TableB_en_35.csv").is_none());
}
#[test]
fn test_local_pattern() {
let pattern = LocalPattern::new();
// Valid local patterns
let meta = pattern.matches("localtabb_85_20.csv").unwrap();
assert_eq!(meta.kind, TableKind::B);
assert_eq!(meta.subcenter, Some(85));
assert_eq!(meta.version, Some(20));
assert!(meta.is_local);
let meta = pattern.matches("localtabd_100_5.csv").unwrap();
assert_eq!(meta.kind, TableKind::D);
assert_eq!(meta.subcenter, Some(100));
assert_eq!(meta.version, Some(5));
assert!(meta.is_local);
// Invalid patterns
assert!(pattern.matches("local_table_85_20.csv").is_none());
assert!(pattern.matches("localtabb_85.csv").is_none());
}
#[test]
fn test_custom_pattern() {
let pattern = CustomPattern::new();
// Valid custom patterns
let meta = pattern.matches("test_c7_sc85_tableb_v20.csv").unwrap();
assert_eq!(meta.kind, TableKind::B);
assert_eq!(meta.center, Some(7));
assert_eq!(meta.subcenter, Some(85));
assert_eq!(meta.version, Some(20));
assert!(meta.is_local);
let meta = pattern
.matches("data_center_7_scenter_85_tabled_10.csv")
.unwrap();
assert_eq!(meta.kind, TableKind::D);
assert_eq!(meta.center, Some(7));
assert_eq!(meta.subcenter, Some(85));
assert_eq!(meta.version, Some(10));
}
#[test]
fn test_output_name_generation() {
// WMO table (no subcenter) - Format: BUFR_TableB_{version}
let meta = TableMetadata {
kind: TableKind::B,
version: Some(14),
subcenter: None,
center: None,
language: Some("en".to_string()),
is_local: false,
filename: "BUFRCREX_TableB_en_14.csv".to_string(),
};
assert_eq!(meta.output_name(), "BUFR_TableB_14");
// WMO Table D
let meta = TableMetadata {
kind: TableKind::D,
version: Some(40),
subcenter: None,
center: None,
language: Some("en".to_string()),
is_local: false,
filename: "BUFR_TableD_en_40.csv".to_string(),
};
assert_eq!(meta.output_name(), "BUFR_TableD_40");
// Local table with subcenter - Format: BUFR_TableB_{subcenter}_{version}
let meta = TableMetadata {
kind: TableKind::B,
version: Some(14),
subcenter: Some(1),
center: None,
language: None,
is_local: true,
filename: "localtabb_1_14.csv".to_string(),
};
assert_eq!(meta.output_name(), "BUFR_TableB_1_14");
// Local table with larger subcenter number
let meta = TableMetadata {
kind: TableKind::B,
version: Some(20),
subcenter: Some(85),
center: None,
language: None,
is_local: true,
filename: "localtabb_85_20.csv".to_string(),
};
assert_eq!(meta.output_name(), "BUFR_TableB_85_20");
}
#[test]
fn test_scanner() {
let scanner = TableScanner::new();
// Should match WMO pattern
let meta = scanner.match_filename("BUFRCREX_TableB_en_35.csv").unwrap();
assert_eq!(meta.kind, TableKind::B);
assert!(!meta.is_local);
// Should match local pattern
let meta = scanner.match_filename("localtabb_85_20.csv").unwrap();
assert_eq!(meta.kind, TableKind::B);
assert!(meta.is_local);
// Should match custom pattern
let meta = scanner
.match_filename("test_c7_sc85_tableb_v20.csv")
.unwrap();
assert_eq!(meta.kind, TableKind::B);
assert!(meta.is_local);
}
}

View File

@ -1,16 +1,20 @@
use crate::FXY;
use rkyv::Archive;
use rkyv::api::high::{HighDeserializer, HighSerializer};
use rkyv::api::high::{HighDeserializer, HighSerializer, HighValidator};
use rkyv::bytecheck::CheckBytes;
use rkyv::de::Pool;
use rkyv::rancor::{Error, Strategy};
use serde::Serialize as SerdeSerialize;
use serde::de::DeserializeOwned;
use std::fmt::Debug;
use std::fmt::{Debug, Display};
pub struct BTable;
pub struct DTable;
pub trait TableTypeTrait {
pub trait TableTypeTrait
where
<Self::EntryType as Archive>::Archived: for<'a> CheckBytes<HighValidator<'a, Error>>,
{
type EntryType: TableEntryFull;
const TABLE_TYPE: crate::TableType;
}
@ -27,6 +31,7 @@ impl TableTypeTrait for DTable {
pub trait TableEntry:
SerdeSerialize
+ DeserializeOwned
+ std::fmt::Display
+ Debug
+ Clone
+ Sized
@ -38,17 +43,23 @@ pub trait TableEntry:
fn fxy(&self) -> FXY;
}
// 148 | fn get(&self, fxy: FXY) -> Option<T> where for<'a> <T as TableEntryFull>::Archived: CheckBytes<Strategy<Validator<ArchiveValidator<'a>, SharedValidator>, rkyv::rancor::Error>>
pub trait TableEntryFull: TableEntry {
type Archived: for<'a> rkyv::Deserialize<Self, HighDeserializer<Error>>
+ rkyv::Deserialize<Self, Strategy<Pool, rkyv::rancor::Error>>
+ rkyv::Portable;
+ rkyv::Portable
+ std::fmt::Display
+ for<'a> CheckBytes<HighValidator<'a, Error>>;
}
impl<T> TableEntryFull for T
where
T: TableEntry,
<T as Archive>::Archived: for<'a> rkyv::Deserialize<T, HighDeserializer<Error>>
+ rkyv::Deserialize<T, Strategy<Pool, rkyv::rancor::Error>>,
+ rkyv::Deserialize<T, Strategy<Pool, rkyv::rancor::Error>>
+ std::fmt::Display
+ for<'a> CheckBytes<HighValidator<'a, Error>>,
{
type Archived = <T as Archive>::Archived;
}
@ -112,7 +123,7 @@ impl BTableEntry {
}
}
impl std::fmt::Display for BTableEntry {
impl Display for BTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let element_name = if self.element_name_en.len() > 40 {
format!("{}...", &self.element_name_en[..37])
@ -142,6 +153,36 @@ impl std::fmt::Display for BTableEntry {
}
}
impl Display for ArchivedBTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let element_name = if self.element_name_en.len() > 40 {
format!("{}...", &self.element_name_en[..37])
} else {
self.element_name_en.to_string()
};
let unit = if self.bufr_unit.len() > 15 {
format!("{}...", &self.bufr_unit[..12])
} else {
self.bufr_unit.to_string()
};
write!(
f,
"{:02}{:02}{:03} | {:<40} | {:<15} | {:>5} | {:>8} | {:>8} | {}",
self.fxy.f,
self.fxy.x,
self.fxy.y,
element_name,
unit,
self.bufr_scale,
self.bufr_reference_value,
self.bufr_datawidth_bits,
self.status.as_deref().unwrap_or("N/A")
)
}
}
#[derive(
Debug, Clone, serde::Deserialize, serde::Serialize, Archive, rkyv::Serialize, rkyv::Deserialize,
)]
@ -225,6 +266,35 @@ impl std::fmt::Display for DTableEntry {
}
}
impl Display for ArchivedDTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let fxy_chain_str: String = self
.fxy_chain
.iter()
.map(|fxy| format!("{:02}{:02}{:03}", fxy.f, fxy.x, fxy.y))
.collect::<Vec<_>>()
.join(", ");
let title = self.title_en.as_deref().unwrap_or("N/A");
let truncated_title = if title.len() > 50 {
format!("{}...", &title[..47])
} else {
title.to_string()
};
write!(
f,
"{:02}{:02}{:03} | {:<50} | {:<12} | [{}]",
self.fxy.f,
self.fxy.x,
self.fxy.y,
truncated_title,
self.status.as_deref().unwrap_or("N/A"),
fxy_chain_str
)
}
}
impl TableEntry for DTableEntry {
fn fxy(&self) -> FXY {
self.fxy

View File

@ -39,28 +39,6 @@ pub struct RawBTableEntry {
pub status: Option<String>,
}
// Helper function to deserialize empty strings as None
fn deserialize_optional_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() { Ok(None) } else { Ok(Some(s)) }
}
// Helper function to deserialize empty strings as None for u32
fn deserialize_optional_u32<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
if s.is_empty() {
Ok(None)
} else {
s.parse::<u32>().map(Some).map_err(serde::de::Error::custom)
}
}
impl EntryLoader for BTableCsvLoader {
type RawEntry = RawBTableEntry;
type Output = BTableEntry;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,8 @@ use genlib::BUFRTableMPH;
use genlib::tables::TableTypeTrait;
use crate::errors::Result;
use crate::structs::versions::BUFRMessage;
use crate::structs::data_parser::DataParser;
use crate::structs::versions::{BUFRMessage, MessageVersion};
use crate::tables::*;
pub struct MessageBlock {
@ -29,20 +30,33 @@ impl MessageBlock {
let local_table_version = table_info.local_table_version as u32;
if local_table_version > 0 {
let local_tables = if local_table_version > 0 {
let local_b: BUFRTableB = TableLoader.load_table(LocalTable::new(
Some(table_info.subcenter_id),
Some(table_info.subcenter_id * 256 + table_info.center_id),
table_info.local_table_version,
))?;
let local_d: BUFRTableD = TableLoader.load_table(LocalTable::new(
Some(table_info.subcenter_id),
Some(table_info.subcenter_id * 256 + table_info.center_id),
table_info.local_table_version,
))?;
}
Some((local_b, local_d))
} else {
None
};
let (local_b, local_d) = if let Some((b, d)) = local_tables {
(Some(b), Some(d))
} else {
(None, None)
};
let mut parser = DataParser::new(master_b, master_d, local_b, local_d);
parser.parse(&self.message)?;
Ok(())
// master_b_table.load_table(TT::Standard);
}
fn load_first_validable_table<E: TableTypeTrait>(

View File

@ -1,162 +0,0 @@
use crate::errors::Result;
use encoding_rs::WINDOWS_1252;
use std::fs;
use std::path::{Path, PathBuf};
mod btable;
mod dtable;
pub use btable::BTable;
pub use dtable::DTable;
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TableType {
A,
B,
C,
D,
}
pub struct BUFRTable;
pub trait TableTrait {
fn file_path(table_type: TableType, sub_center: Option<u32>, table_version: u8) -> PathBuf;
}
impl BUFRTable {
pub fn file_path(table_type: TableType, table_version: u8) -> PathBuf {
let base_dir = Path::new("tables/bufr");
let file_name = match table_type {
TableType::A => format!("bufrtaba_{}.csv", table_version),
TableType::B => format!("bufrtabb_{}.csv", table_version),
TableType::C => format!("bufrtabc_{}.csv", table_version),
TableType::D => format!("bufrtabd_{}.csv", table_version),
};
base_dir.join(file_name)
}
}
pub struct LocalTable;
impl LocalTable {
pub fn file_path(table_type: TableType, sub_center: u32, table_version: u8) -> PathBuf {
let base_dir = Path::new("tables/local");
let file_name = match table_type {
TableType::A => format!("loctaba_{}_{}.csv", sub_center * 256, table_version),
TableType::B => format!("loctabb_{}_{}.csv", sub_center * 256, table_version),
TableType::C => format!("loctabc_{}_{}.csv", sub_center * 256, table_version),
TableType::D => format!("loctabd_{}_{}.csv", sub_center * 256, table_version),
};
base_dir.join(file_name)
}
}
impl TableTrait for BUFRTable {
fn file_path(table_type: TableType, sub_center: Option<u32>, table_version: u8) -> PathBuf {
BUFRTable::file_path(table_type, table_version)
}
}
impl TableTrait for LocalTable {
fn file_path(table_type: TableType, sub_center: Option<u32>, table_version: u8) -> PathBuf {
let sub_center = sub_center.expect("Sub-center must be provided for LocalTable");
LocalTable::file_path(table_type, sub_center, table_version)
}
}
#[derive(Debug, Clone)]
pub struct TableLoader<T: TableT> {
sequences: Vec<T::Seq>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TT {
Localized(u32), // sub_center
Standard,
}
impl<T: TableT> TableLoader<T> {
pub fn new() -> Self {
Self {
sequences: Vec::new(),
}
}
pub fn load_table(&mut self, table_type: TT, table_version: u8) -> Result<Vec<T::Seq>> {
let table_kind = T::table_type();
let local_table_path = match table_type {
TT::Localized(sc) => LocalTable::file_path(table_kind, sc, table_version),
TT::Standard => BUFRTable::file_path(table_kind, table_version),
};
// Here you would add code to actually load and parse the table from the file at `path`.
println!("Loading table from path: {:?}", local_table_path);
let raw = fs::read(&local_table_path)?;
let fixed = normalize_dashes(raw);
let text = decode_tabd_text(fixed);
let mut table = T::default();
for line in text.lines() {
if let Some(seq) = table.parse_line(line) {
self.sequences.push(seq);
}
}
if let Some(seq) = table.finish() {
self.sequences.push(seq);
}
let mut sequences = Vec::new();
std::mem::swap(&mut sequences, &mut self.sequences);
Ok(sequences)
}
}
fn normalize_dashes(mut bytes: Vec<u8>) -> Vec<u8> {
for b in &mut bytes {
match *b {
0x96 | 0x97 => *b = b'-', // EN / EM dash → '-'
_ => {}
}
}
bytes
}
fn decode_tabd_text(bytes: Vec<u8>) -> String {
let (text, _, _) = WINDOWS_1252.decode(&bytes);
text.into_owned()
}
pub trait TableT: Default {
type Seq;
fn table_type() -> TableType;
fn parse_line(&mut self, line: &str) -> Option<Self::Seq>;
fn finish(&mut self) -> Option<Self::Seq> {
None
}
}
#[derive(Debug, Clone)]
pub struct Descriptor {
pub f: i32,
pub x: i32,
pub y: i32,
}
#[cfg(test)]
mod test {
use crate::discriptor_table::{TableLoader, btable::BTable, dtable::DTable};
#[test]
fn test_read_table() {
// let mut bufr_table: super::Table<super::BUFRTable> = super::Table::new();
let mut bufr_table = TableLoader::<BTable>::new();
bufr_table.load_table(super::TT::Standard, 11).unwrap();
println!("{:#?}", bufr_table.sequences);
}
}

View File

@ -1,51 +0,0 @@
use crate::discriptor_table::{Descriptor, TableT};
#[derive(Debug, Clone, Default)]
pub struct BTable;
#[derive(Debug, Clone)]
pub struct BSeq {
pub d: Descriptor,
pub scale: i32,
pub dw: i32,
pub refval: f32,
pub unit: String,
pub elname: String,
}
impl TableT for BTable {
type Seq = BSeq;
fn table_type() -> super::TableType {
super::TableType::B
}
fn parse_line(&mut self, line: &str) -> Option<Self::Seq> {
let fields: Vec<&str> = line.split(';').collect();
if fields.len() < 8 {
return None;
}
let parse_i = |s: &str| s.parse::<i32>().ok();
let parse_f = |s: &str| s.parse::<f32>().ok();
let f = parse_i(fields[0])?;
let x = parse_i(fields[1])?;
let y = parse_i(fields[2])?;
let name = fields[3];
let unit = fields[4];
let scale = parse_i(fields[5])?;
let refval = parse_f(fields[6])?;
let dw = parse_i(fields[7])?;
Some(BSeq {
d: Descriptor { f, x, y },
scale,
dw,
refval,
unit: unit.to_string(),
elname: name.to_string(),
})
}
}

View File

@ -1,78 +0,0 @@
use crate::discriptor_table::{Descriptor, TableT, TableType};
#[derive(Debug, Clone)]
pub struct DD {
pub f: i32,
pub x: i32,
pub y: i32,
}
#[derive(Debug, Clone)]
pub struct DSeq {
pub d: Descriptor,
pub del: Vec<DD>,
}
#[derive(Debug, Clone, Default)]
pub struct DTable {
current_seq: Option<DSeq>,
}
impl TableT for DTable {
type Seq = DSeq;
fn table_type() -> TableType {
TableType::D
}
fn parse_line(&mut self, line: &str) -> Option<Self::Seq> {
let fields: Vec<&str> = line.split(';').collect();
if fields.len() < 6 {
return None;
}
let parse_i = |s: &str| s.trim().parse::<i32>().unwrap_or(0);
let isf = parse_i(fields[0]);
let isx = parse_i(fields[1]);
let isy = parse_i(fields[2]);
let idf = parse_i(fields[3]);
let idx = parse_i(fields[4]);
let idy = parse_i(fields[5]);
let mut finished_seq = None;
let current = &mut self.current_seq;
if isf == 3 || isx != 0 || isy != 0 {
if let Some(prev) = current.take() {
finished_seq = Some(prev);
}
*current = Some(DSeq {
d: Descriptor {
f: isf,
x: isx,
y: isy,
},
del: Vec::new(),
});
}
if idf != 0 || idx != 0 || idy != 0 {
if let Some(seq) = current.as_mut() {
seq.del.push(DD {
f: idf,
x: idx,
y: idy,
});
}
}
finished_seq
}
fn finish(&mut self) -> Option<Self::Seq> {
self.current_seq.take()
}
}

View File

@ -1,9 +1,9 @@
mod block;
mod tables;
// mod discriptor_table;
mod errors;
mod opera;
pub mod parser;
pub mod structs;
mod tables;
#[cfg(test)]
mod test {
@ -16,7 +16,7 @@ mod test {
)
.unwrap();
let entry = bufr.lookup(FXY::new(0, 0, 1)).unwrap().unwrap();
let entry = bufr.lookup(FXY::new(0, 0, 1)).unwrap();
println!("{:#?}", entry);
}

3
rbufr/src/opera.rs Normal file
View File

@ -0,0 +1,3 @@
/// This Module contains functions specific to handling BUFR Opera files.
pub struct OperaBitmapParser;

View File

@ -1,6 +1,6 @@
use crate::block::{BUFRFile, MessageBlock};
use crate::errors::Result;
use crate::structs::versions::BUFRMessage;
use crate::{block::BUFRFile, structs::versions::MessageVersion};
use flate2::read::GzDecoder;
use std::{
fs::File,
@ -78,14 +78,6 @@ impl Parser {
let total_length =
u32::from_be_bytes([0, section0_buf[4], section0_buf[5], section0_buf[6]]);
// Limit message size for safety
// if total_length as usize > MAX_MESSAGE_SIZE {
// return Err(crate::errors::Error::ParseError(format!(
// "Message too large: {} bytes",
// total_length
// )));
// }
// Read entire message
let mut message_buf = vec![0u8; total_length as usize];
reader.seek(SeekFrom::Start(offset))?;
@ -131,7 +123,7 @@ impl Parser {
for offset in offsets {
match Self::read_message_at_offset(buf_reader, offset) {
Ok(message_data) => match BUFRMessage::parse(&message_data) {
Ok((_, message)) => {
Ok(message) => {
file_block.push_message(message);
}
Err(e) => {

View File

@ -1,9 +1,16 @@
use std::ops::{AddAssign, Shl, Shr};
use nom::IResult;
use nom::bits::{bits, bytes, complete::take};
pub type BitInput<'a> = (&'a [u8], usize);
pub fn parse_arbitrary_bits(input: BitInput, count: usize) -> IResult<BitInput, u32> {
pub fn parse_arbitrary_bits<
T: From<u8> + AddAssign + Shl<usize, Output = T> + Shr<usize, Output = T>,
>(
input: BitInput,
count: usize,
) -> IResult<BitInput, T> {
take(count)(input)
}
@ -15,7 +22,7 @@ mod test {
fn test() {
let data = [0xA0, 0xA0, 0x01, 0xA0];
let result = parse_arbitrary_bits((&data, 0), 16).unwrap();
let result = parse_arbitrary_bits::<u16>((&data, 0), 16).unwrap();
println!("{:?}", result);
}

View File

@ -0,0 +1,381 @@
use crate::{
errors::{Error, Result},
structs::versions::MessageVersion,
};
use genlib::{
FXY, opera,
prelude::{BUFRTableB, BUFRTableD},
tables::{ArchivedBTableEntry, ArchivedDTableEntry, BTableEntry},
};
pub struct DataParser {
master_b: BUFRTableB,
master_d: BUFRTableD,
// local
local_b: Option<BUFRTableB>,
local_d: Option<BUFRTableD>,
// Common State
common_scale: Option<i32>,
common_ref_value: Option<i32>,
common_data_width: Option<u32>,
common_str_width: Option<usize>,
// Localized State
local_data_width: Option<u32>,
// Temporary storage
temp_operator: Option<u32>,
}
impl DataParser {
pub fn new(
master_b: BUFRTableB,
master_d: BUFRTableD,
local_b: Option<BUFRTableB>,
local_d: Option<BUFRTableD>,
) -> Self {
DataParser {
master_b,
master_d,
local_b,
local_d,
common_scale: None,
common_ref_value: None,
common_data_width: None,
common_str_width: None,
local_data_width: None,
temp_operator: None,
}
}
pub fn parse<V: MessageVersion>(&mut self, message: &V) -> Result<()> {
let data_block = message.data_block()?;
let mut descriptors = message.descriptors()?;
let mut data_input = BitInput::new(data_block);
while !descriptors.is_empty() {
let (desc, data) = self.parser_inner(&mut Vec::new(), descriptors, data_input)?;
descriptors = desc;
data_input = data;
}
Ok(())
}
fn parser_inner<'a>(
&mut self,
values: &mut Vec<Value>,
mut descriptors: Vec<genlib::FXY>,
mut data: BitInput<'a>,
) -> Result<(Vec<genlib::FXY>, BitInput<'a>)> {
if descriptors.is_empty() {
return Ok((descriptors, data));
}
let des = descriptors[0];
println!("Processing descriptor {:?}", des);
match des.f {
0 => {
// Element descriptor - parse data
if let Some(e) = self.lookup_b_descriptor(des) {
// let (value, remaining) = e.parse(data);
let (value, remaining) = self.evalute(data, &e)?;
println!("Parsed value: {}", value);
values.push(value);
data = remaining;
descriptors.remove(0);
} else {
return Err(Error::ParseError(format!(
"Descriptor {:?} not found in Table B",
des
)));
}
}
1 => {
let genlib::FXY { x, y, .. } = des;
descriptors.remove(0);
for i in 0..y {
let descriptors_clone = descriptors.clone();
let (cde, cd) =
self.repeat_parser(values, descriptors_clone, data, x as usize)?;
if i == y - 1 {
descriptors = cde;
data = cd;
}
}
}
2 => {
let data = self.deal_with_operator(values, des, data)?;
descriptors.remove(0);
return self.parser_inner(values, descriptors, data);
}
3 => {
if let Some(seq) = self.lookup_d_descriptor(des) {
let mut fxy_chain: Vec<FXY> = seq
.fxy_chain
.iter()
.map(|f| {
let result =
FXY::new(f.f.to_native(), f.x.to_native(), f.y.to_native());
result
})
.collect();
fxy_chain.extend(descriptors[1..].into_iter());
descriptors = fxy_chain;
} else {
return Err(Error::ParseError(format!(
"Sequence descriptor {:?} not found in Table D",
des
)));
}
}
_ => {
return Err(Error::ParseError(format!(
"Invalid descriptor F value: {}",
des.f
)));
}
}
Ok((descriptors, data))
}
fn lookup_b_descriptor(&self, fxy: genlib::FXY) -> Option<&ArchivedBTableEntry> {
self.master_b
.lookup(fxy)
.as_ref()
.map(|entry| {
if FXY::new(
entry.fxy.f.to_native(),
entry.fxy.x.to_native(),
entry.fxy.y.to_native(),
) == fxy
{
Some(*entry)
} else {
None
}
})
.flatten()
.or(self.local_b.as_ref().and_then(|t| t.lookup(fxy)))
}
fn lookup_d_descriptor(&self, fxy: genlib::FXY) -> Option<&ArchivedDTableEntry> {
self.master_d
.lookup(fxy)
.as_ref()
.map(|entry| {
if FXY::new(
entry.fxy.f.to_native(),
entry.fxy.x.to_native(),
entry.fxy.y.to_native(),
) == fxy
{
Some(*entry)
} else {
None
}
})
.flatten()
.or(self.local_d.as_ref().and_then(|t| t.lookup(fxy)))
}
fn repeat_parser<'a>(
&mut self,
values: &mut Vec<Value>,
descriptors: Vec<genlib::FXY>,
data: BitInput<'a>,
count: usize,
) -> Result<(Vec<genlib::FXY>, BitInput<'a>)> {
if count == 0 || descriptors.is_empty() {
return Ok((descriptors, data));
}
let (desc, data) = self.parser_inner(values, descriptors, data)?;
return self.repeat_parser(values, desc, data, count - 1);
}
fn evalute<'a>(
&self,
data: BitInput<'a>,
e: &ArchivedBTableEntry,
) -> Result<(Value, BitInput<'a>)> {
match e.bufr_unit.as_str() {
"CCITT IA5" => {
let total_bytes = self
.common_str_width
.unwrap_or(((e.bufr_datawidth_bits.to_native() as usize) + 7) / 8);
let (s, data) = data.take_string(total_bytes as usize)?;
return Ok((Value::String(s), data));
}
_ => {
let datawidth = self
.common_data_width
.unwrap_or(e.bufr_datawidth_bits.to_native());
let scale = 10f32.powi(-self.common_scale.unwrap_or(e.bufr_scale.to_native()));
let reference_value =
self.common_ref_value
.unwrap_or(e.bufr_reference_value.to_native()) as f32;
let (value, data) = data.get_arbitary_bits(datawidth as usize)?;
let result = (value as f32) / scale + reference_value;
return Ok((Value::Float(result), data));
}
}
}
fn deal_with_operator<'a>(
&mut self,
values: &mut Vec<Value>,
operator: FXY,
data: BitInput<'a>,
) -> Result<(BitInput<'a>)> {
match operator.x {
1 => match operator.y {
0 => {
self.common_data_width = None;
}
_ => {
self.common_data_width = Some(operator.y as u32);
}
},
2 => match operator.y {
0 => {
self.common_scale = None;
}
_ => {
self.common_scale = Some(operator.y as i32);
}
},
3 => match operator.y {
0 => {
self.common_ref_value = None;
}
_ => {
self.common_ref_value = Some(operator.y as i32);
}
},
5 => {
let (string, _data) = data.take_string(operator.y as usize)?;
values.push(Value::String(string));
}
6 => {
let localized_width = operator.y;
self.local_data_width = Some(localized_width as u32);
}
7 => {
self.temp_operator = Some(operator.y as u32);
}
8 => match operator.y {
0 => {
self.common_str_width = None;
}
_ => {
self.common_str_width = Some(operator.y as usize);
}
},
_ => {}
}
Ok(data)
}
// fn seq_parser(descriptors: &[genlib::FXY]) -> Result<()> {}
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub enum Value {
Float(f32),
Double(f64),
Int(i32),
UInt(u32),
String(String),
}
impl std::fmt::Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::Float(v) => write!(f, "{}", v),
Value::Double(v) => write!(f, "{}", v),
Value::Int(v) => write!(f, "{}", v),
Value::UInt(v) => write!(f, "{}", v),
Value::String(v) => write!(f, "{}", v),
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct BitInput<'a>(&'a [u8], usize);
impl<'a> BitInput<'a> {
pub fn new(input: &[u8]) -> BitInput {
BitInput(input, 0)
}
pub fn pointer(&self) -> usize {
self.1
}
pub fn take_string(self, nbytes: usize) -> Result<(String, BitInput<'a>)> {
let total_bits = nbytes * 8;
let mut chars = Vec::with_capacity(nbytes);
let mut remaining_input = self;
for _ in 0..nbytes {
let (byte_value, next_input) = remaining_input.get_arbitary_bits(8)?;
chars.push(byte_value as u8);
remaining_input = next_input;
}
let s = String::from_utf8(chars)
.map_err(|_| Error::ParseError("Invalid UTF-8 string".to_string()))?;
Ok((s, remaining_input))
}
pub fn get_arbitary_bits(self, nbits: usize) -> Result<(u32, BitInput<'a>)> {
if nbits == 0 {
return Ok((0, self));
}
let mut value: u32 = 0;
let mut remaining_bits = nbits;
let mut bit_offset = self.1; // Current bit position in the first byte (0-7)
let mut byte_data = self.0; // Remaining bytes
while remaining_bits > 0 {
if byte_data.is_empty() {
return Err(Error::ParseError(
"Not enough data to parse bits".to_string(),
));
}
let current_byte = byte_data[0];
// How many bits we can read from the current byte
let bits_available_in_byte = 8 - bit_offset;
let bits_to_read = remaining_bits.min(bits_available_in_byte);
// Extract bits from the current byte
// Bits are read from MSB to LSB (left to right)
let shift = bits_available_in_byte - bits_to_read;
let mask = ((1u32 << bits_to_read) - 1) as u8;
let extracted_bits = (current_byte >> shift) & mask;
// Add to value
value = (value << bits_to_read) | extracted_bits as u32;
remaining_bits -= bits_to_read;
bit_offset += bits_to_read;
// Move to next byte if we've consumed all 8 bits
if bit_offset >= 8 {
byte_data = &byte_data[1..];
bit_offset = 0;
}
}
Ok((value, BitInput(byte_data, bit_offset)))
}
}

View File

@ -4,6 +4,8 @@ use nom::{
number::complete::{be_u8, be_u16, be_u24},
};
pub mod bit;
pub mod data_parser;
pub(super) mod tools;
pub mod versions;
#[inline]

View File

@ -0,0 +1,28 @@
use crate::errors::{Error, Result};
use crate::structs::bit::{self, BitInput, parse_arbitrary_bits};
use nom::{IResult, Parser, multi::many1};
pub(super) fn parse_descriptors(input: &[u8]) -> Result<Vec<genlib::FXY>> {
parse_descriptors_inner(input)
.map(|(_, v)| v)
.map_err(|_| Error::ParseError(format!("Can't parse descriptors from section3")))
}
fn parse_descriptors_inner(mut input: &[u8]) -> IResult<BitInput, Vec<genlib::FXY>> {
let mut results = Vec::new();
while input.len() > 1 {
let ((finput, _), fxy) = take_fxy((input, 0))?;
results.push(fxy);
input = finput;
}
Ok(((input, 0), results))
}
fn take_fxy(bit_input: BitInput) -> IResult<BitInput, genlib::FXY> {
let (bit_input, f) = parse_arbitrary_bits(bit_input, 2)?;
let (bit_input, x) = parse_arbitrary_bits(bit_input, 6)?;
let (bit_input, y) = parse_arbitrary_bits(bit_input, 8)?;
Ok((bit_input, genlib::FXY::new(f, x, y)))
}

View File

@ -2,47 +2,127 @@ pub mod v2;
pub mod v4;
pub(super) use super::{skip, skip1, skip2};
use crate::errors::{Error, Result};
use genlib::FXY;
use nom::{
IResult,
bytes::complete::{tag, take},
number::complete::{be_u8, be_u16, be_u24},
};
#[derive(Clone)]
pub enum BUFRMessage {
V2(v2::BUFRMessageV2),
V4(v4::BUFRMessageV4),
macro_rules! message {
($(($version:ident, $t: ty, $v: expr)),+$(,)?) => {
#[derive(Clone)]
pub enum BUFRMessage {
$(
$version($t),
)+
}
impl MessageVersion for BUFRMessage {
fn parse(input: &[u8]) -> Result< Self> {
let (_, section0) = parse_section0(input)?;
match section0.version {
$(
x if x == $v => {
let msg = <$t as MessageVersion>::parse(input)?;
Ok(BUFRMessage::$version(msg))
}
)+
_ => Err(Error::UnsupportedVersion(section0.version)),
}
}
fn description(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
$(
BUFRMessage::$version(msg) => msg.description(f),
)+
}
}
fn table_info(&self) -> TableInfo {
match self {
$(
BUFRMessage::$version(msg) => msg.table_info(),
)+
}
}
fn subcenter_id(&self) -> u16 {
match self {
$(
BUFRMessage::$version(msg) => msg.subcenter_id(),
)+
}
}
fn center_id(&self) -> u16 {
match self {
$(
BUFRMessage::$version(msg) => msg.center_id(),
)+
}
}
fn master_table_version(&self) -> u8 {
match self {
$(
BUFRMessage::$version(msg) => msg.master_table_version(),
)+
}
}
fn local_table_version(&self) -> u8 {
match self {
$(
BUFRMessage::$version(msg) => msg.local_table_version(),
)+
}
}
fn subsets_count(&self) -> u16 {
match self {
$(
BUFRMessage::$version(msg) => msg.subsets_count(),
)+
}
}
fn ndescs(&self) -> usize {
match self {
$(
BUFRMessage::$version(msg) => msg.ndescs(),
)+
}
}
fn descriptors(&self) -> Result<Vec<FXY>> {
match self {
$(
BUFRMessage::$version(msg) => msg.descriptors(),
)+
}
}
fn data_block(&self) -> Result<&[u8]> {
match self {
$(
BUFRMessage::$version(msg) => msg.data_block(),
)+
}
}
}
};
}
impl BUFRMessage {
pub fn parse(input: &[u8]) -> Result<(&[u8], Self)> {
let (_, section0) = parse_section0(input)?;
match section0.version {
2 => {
let (input, msg) = v2::BUFRMessageV2::parse(input)?;
Ok((input, BUFRMessage::V2(msg)))
}
4 => {
let (input, msg) = v4::BUFRMessageV4::parse(input)?;
Ok((input, BUFRMessage::V4(msg)))
}
_ => Err(Error::UnsupportedVersion(section0.version)),
}
}
message!((V2, v2::BUFRMessageV2, 2), (V4, v4::BUFRMessageV4, 4));
impl BUFRMessage {
pub fn version(&self) -> u8 {
match self {
BUFRMessage::V2(_) => 2,
BUFRMessage::V4(_) => 4,
}
}
pub fn table_info(&self) -> TableInfo {
match self {
BUFRMessage::V2(msg) => msg.table_info(),
BUFRMessage::V4(msg) => msg.table_info(),
}
}
}
impl std::fmt::Display for BUFRMessage {
@ -55,11 +135,35 @@ impl std::fmt::Display for BUFRMessage {
}
pub trait MessageVersion: Sized {
fn parse(input: &[u8]) -> IResult<&[u8], Self>;
fn parse(input: &[u8]) -> Result<Self>;
fn description(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result;
fn table_info(&self) -> TableInfo;
fn subcenter_id(&self) -> u16 {
self.table_info().subcenter_id
}
fn center_id(&self) -> u16 {
self.table_info().center_id
}
fn master_table_version(&self) -> u8 {
self.table_info().master_table_version
}
fn local_table_version(&self) -> u8 {
self.table_info().local_table_version
}
fn subsets_count(&self) -> u16;
fn ndescs(&self) -> usize;
fn descriptors(&self) -> Result<Vec<FXY>>;
fn data_block(&self) -> Result<&[u8]>;
}
pub struct TableInfo {

View File

@ -4,7 +4,8 @@ use nom::{
number::complete::{be_u8, be_u16, be_u24},
};
use crate::structs::versions::MessageVersion;
use crate::errors::Result;
use crate::structs::{tools::parse_descriptors, versions::MessageVersion};
use super::skip1;
@ -18,7 +19,7 @@ pub struct BUFRMessageV2 {
}
impl MessageVersion for BUFRMessageV2 {
fn parse(input: &[u8]) -> IResult<&[u8], Self> {
fn parse(input: &[u8]) -> crate::errors::Result<Self> {
let (input, section0) = parse_section0(input)?;
let (input, section1) = parse_section1(input)?;
let (input, section2) = if section1.optional_section_present {
@ -31,16 +32,13 @@ impl MessageVersion for BUFRMessageV2 {
let (input, section4) = parse_section4(input)?;
let (input, _section5) = parse_section5(input)?;
Ok((
input,
BUFRMessageV2 {
section0,
section1,
section2,
section3,
section4,
},
))
Ok(BUFRMessageV2 {
section0,
section1,
section2,
section3,
section4,
})
}
fn description(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@ -56,6 +54,21 @@ impl MessageVersion for BUFRMessageV2 {
subcenter_id: self.section1.subcentre as u16,
}
}
fn subsets_count(&self) -> u16 {
self.section3.number_of_subsets
}
fn ndescs(&self) -> usize {
self.section3.data.len() / 2
}
fn descriptors(&self) -> Result<Vec<genlib::FXY>> {
parse_descriptors(&self.section3.data)
}
fn data_block(&self) -> Result<&[u8]> {
Ok(&self.section4.data)
}
}
#[derive(Clone)]

View File

@ -1,4 +1,5 @@
use crate::structs::versions::MessageVersion;
use crate::errors::Result;
use crate::structs::{tools::parse_descriptors, versions::MessageVersion};
use nom::{
IResult,
bytes::complete::{tag, take},
@ -18,7 +19,7 @@ pub struct BUFRMessageV4 {
}
impl MessageVersion for BUFRMessageV4 {
fn parse(input: &[u8]) -> IResult<&[u8], Self> {
fn parse(input: &[u8]) -> crate::errors::Result<Self> {
let (input, section0) = parse_section0(input)?;
let (input, section1) = parse_section1(input)?;
let (input, section2) = if section1.optional_section_present {
@ -29,18 +30,15 @@ impl MessageVersion for BUFRMessageV4 {
};
let (input, section3) = parse_section3(input)?;
let (input, section4) = parse_section4(input)?;
let (input, _section5) = parse_section5(input)?;
let (_input, _section5) = parse_section5(input)?;
Ok((
input,
BUFRMessageV4 {
section0,
section1,
section2,
section3,
section4,
},
))
Ok(BUFRMessageV4 {
section0,
section1,
section2,
section3,
section4,
})
}
fn description(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@ -57,6 +55,22 @@ impl MessageVersion for BUFRMessageV4 {
subcenter_id: self.section1.subcentre as u16,
}
}
fn subsets_count(&self) -> u16 {
self.section3.number_of_subsets
}
fn ndescs(&self) -> usize {
self.section3.data.len() / 2
}
fn descriptors(&self) -> Result<Vec<genlib::FXY>> {
parse_descriptors(&self.section3.data)
}
fn data_block(&self) -> Result<&[u8]> {
Ok(&self.section4.data)
}
}
#[derive(Clone)]

View File

@ -37,13 +37,13 @@ impl TableTrait for MasterTable {
TableType::B => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/master");
let file_name = format!("BUFR_TableB_en_{:0>2}.bufrtbl", self.version);
let file_name = format!("BUFR_TableB_{}.bufrtbl", self.version);
base_dir.join(file_name)
}
TableType::D => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/master");
let file_name = format!("BUFR_TableD_en_{:0>2}.bufrtbl", self.version);
let file_name = format!("BUFR_TableD_{}.bufrtbl", self.version);
base_dir.join(file_name)
}
_ => {
@ -60,26 +60,20 @@ impl TableTrait for LocalTable {
let mut base_dir = PathBuf::new();
base_dir.push("tables/local");
let sub_center_str = match self.sub_center {
Some(sc) => format!("sc{:0>2}", sc),
None => "sc00".to_string(),
Some(sc) => format!("{}", sc),
None => "0".to_string(),
};
let file_name = format!(
"BUFR_TableB_{}_en_{:0>2}.bufrtbl",
sub_center_str, self.version
);
let file_name = format!("BUFR_TableB_{}_{}.bufrtbl", sub_center_str, self.version);
base_dir.join(file_name)
}
TableType::D => {
let mut base_dir = PathBuf::new();
base_dir.push("tables/local");
let sub_center_str = match self.sub_center {
Some(sc) => format!("sc{:0>2}", sc),
None => "sc00".to_string(),
Some(sc) => format!("{}", sc),
None => "0".to_string(),
};
let file_name = format!(
"BUFR_TableD_{}_en_{:0>2}.bufrtbl",
sub_center_str, self.version
);
let file_name = format!("BUFR_TableD_{}_{}.bufrtbl", sub_center_str, self.version);
base_dir.join(file_name)
}
_ => {
@ -97,6 +91,7 @@ impl TableLoader {
T: TableTypeTrait,
{
let path = table_type.file_path(T::TABLE_TYPE);
println!("Loading table from {:?}", path);
BUFRTableMPH::<T>::load_from_disk(path).map_err(|e| e.into())
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More