This commit is contained in:
tsuki 2026-01-05 23:21:49 +08:00
parent 4fe0a42736
commit c6c15c95da
12 changed files with 1764 additions and 1275 deletions

View File

@ -5,20 +5,11 @@ use std::path::Path;
use crate::pattern::{TableFilePattern, TableKind, TableMetadata}; use crate::pattern::{TableFilePattern, TableKind, TableMetadata};
/// Configuration for custom table patterns
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PatternConfig { pub struct PatternConfig {
/// Name/description of this pattern
pub name: String, pub name: String,
/// Regular expression pattern to match filenames
/// Capture groups should be in order: kind, version, subcenter, center, language
pub regex: String, pub regex: String,
/// Glob pattern for scanning directories
pub glob: String, pub glob: String,
/// Mapping of regex capture groups to metadata fields
pub mapping: FieldMapping, pub mapping: FieldMapping,
} }
@ -182,8 +173,7 @@ impl ScanConfig {
/// Save configuration to a TOML file /// Save configuration to a TOML file
pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> { pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let content = toml::to_string_pretty(self) let content = toml::to_string_pretty(self).context("Failed to serialize config")?;
.context("Failed to serialize config")?;
std::fs::write(path.as_ref(), content) std::fs::write(path.as_ref(), content)
.with_context(|| format!("Failed to write config file: {}", path.as_ref().display()))?; .with_context(|| format!("Failed to write config file: {}", path.as_ref().display()))?;

View File

@ -1,12 +1,14 @@
use std::ops::Deref;
use genlib::BUFRTableMPH; use genlib::BUFRTableMPH;
#[cfg(feature = "opera")] #[cfg(feature = "opera")]
use genlib::prelude::BUFRTableBitMap; use genlib::prelude::BUFRTableBitMap;
use genlib::tables::TableTypeTrait; use genlib::tables::TableTypeTrait;
use crate::decoder::*;
use crate::errors::Result; use crate::errors::Result;
#[cfg(feature = "opera")] #[cfg(feature = "opera")]
use crate::structs::GENCENTER; use crate::structs::GENCENTER;
use crate::structs::data_parser::DataParser;
use crate::structs::versions::{BUFRMessage, MessageVersion}; use crate::structs::versions::{BUFRMessage, MessageVersion};
use crate::tables::*; use crate::tables::*;
@ -20,68 +22,20 @@ impl std::fmt::Display for MessageBlock {
} }
} }
impl Deref for MessageBlock {
type Target = BUFRMessage;
fn deref(&self) -> &Self::Target {
&self.message
}
}
impl MessageBlock { impl MessageBlock {
pub fn new(message: BUFRMessage) -> Self { pub fn new(message: BUFRMessage) -> Self {
MessageBlock { message } MessageBlock { message }
} }
pub fn load_data(&self) -> Result<()> { pub(crate) fn load_first_validable_table<E: TableTypeTrait>(
let table_info = self.message.table_info();
let master_table_version = table_info.master_table_version;
let master_b: BUFRTableB = self.load_first_validable_table(master_table_version)?;
let master_d: BUFRTableD = self.load_first_validable_table(master_table_version)?;
let local_table_version = table_info.local_table_version as u32;
let local_tables = if local_table_version > 0 {
let local_b: BUFRTableB = TableLoader.load_table(LocalTable::new(
Some(table_info.subcenter_id * 256 + table_info.center_id),
table_info.local_table_version,
))?;
let local_d: BUFRTableD = TableLoader.load_table(LocalTable::new(
Some(table_info.subcenter_id * 256 + table_info.center_id),
table_info.local_table_version,
))?;
Some((local_b, local_d))
} else {
None
};
let (local_b, local_d) = if let Some((b, d)) = local_tables {
(Some(b), Some(d))
} else {
(None, None)
};
#[cfg(feature = "opera")]
let opera_bitmap_table = self
.load_opera_bitmap_table(
table_info.center_id,
table_info.subcenter_id,
table_info.local_table_version,
master_table_version,
)
.ok();
let mut parser = DataParser::new(
self.message.version(),
master_b,
master_d,
local_b,
local_d,
#[cfg(feature = "opera")]
opera_bitmap_table,
);
let record = parser.parse(&self.message)?;
Ok(())
}
fn load_first_validable_table<E: TableTypeTrait>(
&self, &self,
table_version: u8, table_version: u8,
) -> Result<BUFRTableMPH<E>> { ) -> Result<BUFRTableMPH<E>> {
@ -101,7 +55,7 @@ impl MessageBlock {
} }
#[cfg(feature = "opera")] #[cfg(feature = "opera")]
fn load_opera_bitmap_table( pub(crate) fn load_opera_bitmap_table(
&self, &self,
subcenter: u16, subcenter: u16,
center: u16, center: u16,
@ -132,7 +86,6 @@ impl BUFRFile {
self.messages.push(MessageBlock::new(message)); self.messages.push(MessageBlock::new(message));
} }
/// Get the number of successfully parsed messages
pub fn message_count(&self) -> usize { pub fn message_count(&self) -> usize {
self.messages.len() self.messages.len()
} }
@ -141,7 +94,6 @@ impl BUFRFile {
self.messages.get(index) self.messages.get(index)
} }
/// Get a reference to all parsed messages
pub fn messages(&self) -> &[MessageBlock] { pub fn messages(&self) -> &[MessageBlock] {
&self.messages &self.messages
} }

1628
rbufr/src/decoder.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,39 +1,11 @@
mod block; pub mod block;
mod errors; pub mod decoder;
mod opera; pub mod errors;
#[cfg(feature = "opera")]
pub mod opera;
pub mod parser; pub mod parser;
pub mod structs; pub mod structs;
mod tables; pub mod tables;
#[cfg(test)] pub use crate::decoder::Decoder;
mod test { pub use crate::parser::*;
#[test]
fn test() {
use genlib::prelude::*;
let bufr = BUFRTableB::load_from_disk(
"/Users/xiang.li1/projects/rbufr/tables/BUFRCREX_TableB_en_00.bufrtbl",
)
.unwrap();
let entry = bufr.lookup(&FXY::new(0, 0, 1)).unwrap();
println!("{:#?}", entry);
}
#[test]
fn test_rb() {
use crate::parser::Parser;
let mut parser = Parser::new();
let parsed_file = parser
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
.unwrap();
for msg in parsed_file.messages() {
println!("{}", msg);
msg.load_data().unwrap();
}
}
}

View File

@ -1,6 +1,4 @@
use crate::structs::data_parser::Value; use crate::decoder::Value;
/// This Module contains functions specific to handling BUFR Opera files.
pub struct OperaBitmapParser { pub struct OperaBitmapParser {
values: Vec<Value>, values: Vec<Value>,

View File

@ -9,17 +9,27 @@ use std::{
}; };
const BUFR_PATTERN: &[u8] = b"BUFR"; const BUFR_PATTERN: &[u8] = b"BUFR";
const BUFFER_SIZE: usize = 8192; // 8KB buffer for scanning const BUFFER_SIZE: usize = 8192;
// const MAX_MESSAGE_SIZE: usize = 500_000; // 500KB max message size
pub struct Parser; pub fn parse<P: AsRef<Path>>(path: P) -> Result<BUFRFile> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
impl Parser { let mut magic_bytes = [0u8; 2];
pub fn new() -> Self { reader.read_exact(&mut magic_bytes)?;
Self reader.seek(SeekFrom::Start(0))?;
if magic_bytes == [0x1F, 0x8B] {
let mut gz_decoder = GzDecoder::new(reader);
let mut bytes = vec![];
gz_decoder.read_to_end(&mut bytes)?;
parse_inner(&mut Cursor::new(bytes))
} else {
reader.seek(SeekFrom::Start(0))?;
parse_inner(&mut reader)
}
} }
/// Find all offsets in the file where "BUFR" appears using streaming approach
fn find_bufr_offsets<R: Read + Seek>(reader: &mut R) -> Result<Vec<u64>> { fn find_bufr_offsets<R: Read + Seek>(reader: &mut R) -> Result<Vec<u64>> {
let mut offsets = Vec::new(); let mut offsets = Vec::new();
let mut buffer = vec![0u8; BUFFER_SIZE]; let mut buffer = vec![0u8; BUFFER_SIZE];
@ -35,12 +45,10 @@ impl Parser {
break; break;
} }
// Create a combined view of overlap + new data
let mut search_buffer = Vec::with_capacity(overlap_len + bytes_read); let mut search_buffer = Vec::with_capacity(overlap_len + bytes_read);
search_buffer.extend_from_slice(&overlap[..overlap_len]); search_buffer.extend_from_slice(&overlap[..overlap_len]);
search_buffer.extend_from_slice(&buffer[..bytes_read]); search_buffer.extend_from_slice(&buffer[..bytes_read]);
// Search for BUFR pattern
for i in 0..search_buffer.len().saturating_sub(BUFR_PATTERN.len() - 1) { for i in 0..search_buffer.len().saturating_sub(BUFR_PATTERN.len() - 1) {
if search_buffer.len() >= i + BUFR_PATTERN.len() if search_buffer.len() >= i + BUFR_PATTERN.len()
&& &search_buffer[i..i + BUFR_PATTERN.len()] == BUFR_PATTERN && &search_buffer[i..i + BUFR_PATTERN.len()] == BUFR_PATTERN
@ -50,11 +58,9 @@ impl Parser {
} }
} }
// Save overlap for next iteration
if bytes_read >= BUFR_PATTERN.len() - 1 { if bytes_read >= BUFR_PATTERN.len() - 1 {
overlap_len = BUFR_PATTERN.len() - 1; overlap_len = BUFR_PATTERN.len() - 1;
overlap[..overlap_len] overlap[..overlap_len].copy_from_slice(&buffer[bytes_read - overlap_len..bytes_read]);
.copy_from_slice(&buffer[bytes_read - overlap_len..bytes_read]);
} else { } else {
overlap_len = bytes_read; overlap_len = bytes_read;
overlap[..overlap_len].copy_from_slice(&buffer[..bytes_read]); overlap[..overlap_len].copy_from_slice(&buffer[..bytes_read]);
@ -66,19 +72,14 @@ impl Parser {
Ok(offsets) Ok(offsets)
} }
/// Read a BUFR message from file at specific offset
fn read_message_at_offset<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<Vec<u8>> { fn read_message_at_offset<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<Vec<u8>> {
reader.seek(SeekFrom::Start(offset))?; reader.seek(SeekFrom::Start(offset))?;
// Read Section 0 to get total length
let mut section0_buf = [0u8; 8]; let mut section0_buf = [0u8; 8];
reader.read_exact(&mut section0_buf)?; reader.read_exact(&mut section0_buf)?;
// Parse total length (3 bytes starting at offset 4) let total_length = u32::from_be_bytes([0, section0_buf[4], section0_buf[5], section0_buf[6]]);
let total_length =
u32::from_be_bytes([0, section0_buf[4], section0_buf[5], section0_buf[6]]);
// Read entire message
let mut message_buf = vec![0u8; total_length as usize]; let mut message_buf = vec![0u8; total_length as usize];
reader.seek(SeekFrom::Start(offset))?; reader.seek(SeekFrom::Start(offset))?;
reader.read_exact(&mut message_buf)?; reader.read_exact(&mut message_buf)?;
@ -86,42 +87,15 @@ impl Parser {
Ok(message_buf) Ok(message_buf)
} }
/// Parse a file containing one or more BUFR messages using streaming approach fn parse_inner<R>(buf_reader: &mut R) -> Result<BUFRFile>
pub fn parse<P: AsRef<Path>>(&mut self, path: P) -> Result<BUFRFile> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
// Try to detect gzip compression
let mut magic_bytes = [0u8; 2];
reader.read_exact(&mut magic_bytes)?;
reader.seek(SeekFrom::Start(0))?;
if magic_bytes == [0x1F, 0x8B] {
// Gzip magic number detected
let mut gz_decoder = GzDecoder::new(reader);
let mut bytes = vec![];
gz_decoder.read_to_end(&mut bytes)?;
self.parse_inner(&mut Cursor::new(bytes))
} else {
// Not compressed
// Rewind reader
reader.seek(SeekFrom::Start(0))?;
self.parse_inner(&mut reader)
}
}
fn parse_inner<R>(&self, buf_reader: &mut R) -> Result<BUFRFile>
where where
R: Read + Seek, R: Read + Seek,
{ {
// Find all BUFR message offsets let offsets = find_bufr_offsets(buf_reader)?;
let offsets = Self::find_bufr_offsets(buf_reader)?;
let mut file_block = BUFRFile::new(); let mut file_block = BUFRFile::new();
// Parse each BUFR message
for offset in offsets { for offset in offsets {
match Self::read_message_at_offset(buf_reader, offset) { match read_message_at_offset(buf_reader, offset) {
Ok(message_data) => match BUFRMessage::parse(&message_data) { Ok(message_data) => match BUFRMessage::parse(&message_data) {
Ok(message) => { Ok(message) => {
file_block.push_message(message); file_block.push_message(message);
@ -138,4 +112,3 @@ impl Parser {
Ok(file_block) Ok(file_block)
} }
}

View File

@ -1,11 +1,9 @@
use std::ops::{AddAssign, Shl, Shr};
use nom::IResult; use nom::IResult;
use nom::bits::{bits, bytes, complete::take}; use nom::bits::complete::take;
use std::ops::{AddAssign, Shl, Shr};
pub(super) type BitInput<'a> = (&'a [u8], usize);
pub type BitInput<'a> = (&'a [u8], usize); pub(super) fn parse_arbitrary_bits<
pub fn parse_arbitrary_bits<
T: From<u8> + AddAssign + Shl<usize, Output = T> + Shr<usize, Output = T>, T: From<u8> + AddAssign + Shl<usize, Output = T> + Shr<usize, Output = T>,
>( >(
input: BitInput, input: BitInput,
@ -13,17 +11,3 @@ pub fn parse_arbitrary_bits<
) -> IResult<BitInput, T> { ) -> IResult<BitInput, T> {
take(count)(input) take(count)(input)
} }
#[cfg(test)]
mod test {
use crate::structs::bit::parse_arbitrary_bits;
#[test]
fn test() {
let data = [0xA0, 0xA0, 0x01, 0xA0];
let result = parse_arbitrary_bits::<u16>((&data, 0), 16).unwrap();
println!("{:?}", result);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -4,10 +4,8 @@ use nom::{
number::complete::{be_u8, be_u16, be_u24}, number::complete::{be_u8, be_u16, be_u24},
}; };
pub mod bit; pub mod bit;
pub mod data_parser;
pub(super) mod tools; pub(super) mod tools;
pub mod versions; pub mod versions;
#[cfg(feature = "opera")] #[cfg(feature = "opera")]
pub(super) const GENCENTER: u16 = 247; pub(super) const GENCENTER: u16 = 247;

View File

@ -126,7 +126,7 @@ impl TableLoader {
T: TableTypeTrait, T: TableTypeTrait,
{ {
let path = table_type.file_path(T::TABLE_TYPE); let path = table_type.file_path(T::TABLE_TYPE);
println!("Loading table from {:?}", path); // println!("Loading table from {:?}", path);
BUFRTableMPH::<T>::load_from_disk(path).map_err(|e| e.into()) BUFRTableMPH::<T>::load_from_disk(path).map_err(|e| e.into())
} }
} }

View File

@ -1,14 +0,0 @@
use librbufr::parser::Parser;
fn test_rb() {
let mut parser = Parser::new();
let parsed_file = parser
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
.unwrap();
for msg in parsed_file.messages() {
println!("{}", msg);
msg.load_data().unwrap();
}
}

11
rbufr/tests/test_rc.rs Normal file
View File

@ -0,0 +1,11 @@
use librbufr::{decoder::Decoder, parser::parse};
fn test_dec() {
let file = parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr").unwrap();
for msg in file.messages() {
let mut decoder = Decoder::from_message(msg).unwrap();
let record = decoder.decode(msg).unwrap();
println!("{}", record);
}
}