This commit is contained in:
tsuki 2026-01-05 12:49:41 +08:00
parent 8c2ac4b300
commit 4fe0a42736
5 changed files with 192 additions and 257 deletions

3
.gitignore vendored
View File

@ -1,3 +1,4 @@
/target
/tmp-out
/tmp-out
.claude

View File

@ -21,3 +21,13 @@ rustc-hash = "2.1.1"
[features]
default = ["opera"]
opera = ["gentools/opera"]
[profile.bench]
debug = true
lto = false
opt-level = 3
[profile.release]
debug = true
lto = false

View File

@ -27,7 +27,7 @@ mod test {
let mut parser = Parser::new();
let parsed_file = parser
.parse("/Users/tsuki/Downloads/36_2025-12-17T09_00_00.bufr.nc")
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
.unwrap();
for msg in parsed_file.messages() {

View File

@ -56,44 +56,24 @@ impl<'a> Cache<'a> {
/// Get or cache B table entry
#[inline(always)]
fn get_b<K: BUFRKey>(&mut self, fxy: &K) -> Option<&'a ArchivedBTableEntry> {
let key = FXY::new(fxy.f(), fxy.x(), fxy.y());
// Check if already cached
if self.b_cache.contains_key(&key) {
return self.b_cache.get(&key).copied();
}
// Cache miss: lookup and cache
let entry = self.lookup_b_descriptor(fxy)?;
self.b_cache.insert(key, entry);
Some(entry)
fn get_b<K: BUFRKey>(&mut self, fxy: &K) -> Option<&ArchivedBTableEntry> {
self.lookup_b_descriptor(fxy)
}
/// Get or cache D table entry
#[inline(always)]
fn get_d<K: BUFRKey>(&mut self, fxy: &K) -> Option<&'a ArchivedDTableEntry> {
let key = FXY::new(fxy.f(), fxy.x(), fxy.y());
// Check if already cached
if self.d_cache.contains_key(&key) {
return self.d_cache.get(&key).copied();
}
// Cache miss: lookup and cache
let entry = self.lookup_d_descriptor(fxy)?;
self.d_cache.insert(key, entry);
Some(entry)
self.lookup_d_descriptor(fxy)
}
#[inline(always)]
fn lookup_b_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&ArchivedBTableEntry> {
fn lookup_b_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&'a ArchivedBTableEntry> {
self.lookup_local_b_descriptor(fxy)
.or_else(|| self.lookup_master_b_descriptor(fxy))
}
#[inline]
fn lookup_local_b_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&ArchivedBTableEntry> {
fn lookup_local_b_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&'a ArchivedBTableEntry> {
self.local_b
.as_ref()
.and_then(|t| t.lookup(fxy))
@ -101,17 +81,17 @@ impl<'a> Cache<'a> {
}
#[inline]
fn lookup_master_b_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&ArchivedBTableEntry> {
fn lookup_master_b_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&'a ArchivedBTableEntry> {
self.master_b.lookup(fxy).filter(|e| &e.fxy == fxy)
}
#[inline]
fn lookup_master_d_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&ArchivedDTableEntry> {
fn lookup_master_d_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&'a ArchivedDTableEntry> {
self.master_d.lookup(fxy).filter(|e| &e.fxy == fxy)
}
#[inline]
fn lookup_local_d_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&ArchivedDTableEntry> {
fn lookup_local_d_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&'a ArchivedDTableEntry> {
self.local_d
.as_ref()
.and_then(|t| t.lookup(fxy))
@ -119,7 +99,7 @@ impl<'a> Cache<'a> {
}
#[inline(always)]
fn lookup_d_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&ArchivedDTableEntry> {
fn lookup_d_descriptor<K: BUFRKey>(&self, fxy: &K) -> Option<&'a ArchivedDTableEntry> {
self.lookup_local_d_descriptor(fxy)
.or_else(|| self.lookup_master_d_descriptor(fxy))
}
@ -257,63 +237,74 @@ impl DataParser {
idx: 0,
});
while let Some(Frame { descs, mut idx }) = stack.pop() {
if idx > descs.len() {
while let Some(Frame { descs, idx }) = stack.pop() {
if idx >= descs.len() {
continue;
}
self.parsed(descs, idx, &mut stack, &mut cache, &mut state, data_input)?;
idx += 1;
stack.push(Frame { descs, idx });
match descs {
Descs::Raw(raw) => {
let des = &raw[idx];
self.parse_d(
des,
idx,
&mut record,
descs,
&mut stack,
&mut cache,
&mut state,
&mut data_input,
)?;
}
Descs::Archived(archived) => {
let des = &archived[idx];
self.parse_d(
des,
idx,
&mut record,
descs,
&mut stack,
&mut cache,
&mut state,
&mut data_input,
)?;
}
}
}
Ok(record)
}
fn parsed<'k, 'c, 'i>(
fn parse_d<'k, 'c, 'i, 's, K: BUFRKey>(
&self,
descs: Descs<'k>,
des: &K,
idx: usize,
stack: &mut Vec<Frame<'k>>,
cache: &mut Cache<'c>,
state: &mut State,
data: BitInput<'i>,
) -> Result<()> {
match descs {
Descs::Raw(raw) => {
let des = &raw[0];
self.parse_d(des, idx, descs, stack, cache, state, data)?;
}
Descs::Archived(archived) => {
let des = &archived[0];
self.parse_d(des, idx, descs, stack, cache, state, data)?;
}
};
Ok(())
}
fn parse_d<'k, 'c, 'i, K: BUFRKey>(
&self,
des: &'k K,
mut idx: usize,
values: &mut BUFRParsed,
descs: Descs<'k>,
// Stack
stack: &mut Vec<Frame<'k>>,
cache: &mut Cache<'c>,
state: &mut State,
data: BitInput<'i>,
) -> Result<()> {
data: &mut BitInput<'i>,
) -> Result<()>
where
'c: 'k,
{
match des.f() {
0 => {
// Element descriptor - parse data
if let Some(e) = cache.get_b(des) {
let (value, remaining) = self.evalute(state, data, &e)?;
let value = self.evalute(state, data, &e)?;
// println!("Parsed Descriptor: {:?}, Value: {}", des, value);
// values.push(value, &e.element_name_en, &e.bufr_unit);
// return Ok((&descriptors[1..], remaining));
// println!(
// "Parsed Descriptor {}: Value = {}",
// &e.element_name_en, value
// );
// values.push(value, e.element_name_en.as_str(), e.bufr_unit.as_str());
return Ok(());
stack.push(Frame {
descs,
idx: idx + 1,
});
} else {
return Err(Error::ParseError(format!(
"Descriptor {:?} not found in Table B",
@ -322,197 +313,97 @@ impl DataParser {
}
}
1 => {
let mut x = des.x() as usize;
let x = des.x() as usize;
let mut y = des.y() as usize;
if y == 0 {
let (count, data) = match descs {
let delay_repeat = y == 0;
if delay_repeat {
let count = match descs {
Descs::Raw(raw) => {
let count_des = &raw[1];
let count_des = &raw[idx + 1];
self.parse_usize(state, cache, count_des, data)?
}
Descs::Archived(archived) => {
let count_des = &archived[1];
let count_des = &archived[idx + 1];
self.parse_usize(state, cache, count_des, data)?
}
};
y = count;
let _ = stack.pop();
stack.push(Frame { descs, idx: 2 });
};
// if x > descriptors.len() {
// return Err(Error::ParseError(format!(
// "Not enough descriptors to repeat: requested {}, available {}",
// x,
// descriptors.len()
// )));
// }
match descs {
Descs::Raw(raw) => {
let body = &raw[idx..idx + x];
let _ = stack.pop();
stack.push(Frame {
descs: Descs::Raw(body),
idx: 0,
});
}
Descs::Archived(archived) => {
let body = &archived[idx..idx + x];
let _ = stack.pop();
stack.push(Frame {
descs: Descs::Archived(body),
idx: 0,
});
}
}
return Ok(());
}
2 => {
let data = self.deal_with_operator(state, values, des, data)?;
return Ok(());
}
3 => {
#[cfg(feature = "opera")]
let opera_dw = self.parse_opera_bitmap(des).map(|e| e.depth);
// Calculate the start of the repeat body
let body_start = if delay_repeat { idx + 2 } else { idx + 1 };
let body_end = body_start + x;
if let Some(seq) = cache.get_d(des) {
let mut fxy_chain = seq.fxy_chain.as_slice();
if opera_dw.is_some() {
// let (_, data) =
// self.parse_opera_array(opera_dw.unwrap(), fxy_chain, data)?;
// TODO
unimplemented!("");
} else {
stack.push(Frame {
descs: Descs::Archived(fxy_chain),
idx: 0,
});
return Ok(());
}
} else {
return Err(Error::ParseError(format!(
"Sequence descriptor {:?} not found in Table D",
des
)));
}
}
_ => {
return Err(Error::ParseError(format!(
"Invalid descriptor F value: {}",
des.f()
)));
}
}
}
#[inline(always)]
fn parser_inner<'s, 'a, 'b, 'c, C: Container<'s>, K: BUFRKey>(
&'s self,
state: &mut State,
cache: &mut Cache<'c>,
values: &mut C,
descriptors: &'a [K],
mut data: BitInput<'b>,
) -> Result<(&'a [K], BitInput<'b>)> {
if descriptors.is_empty() {
return Ok((descriptors, data));
}
let des = &descriptors[0];
match des.f() {
0 => {
// Element descriptor - parse data
if let Some(e) = cache.get_b(des) {
let (value, remaining) = self.evalute(state, data, &e)?;
// println!("Parsed Descriptor: {:?}, Value: {}", des, value);
values.push(value, &e.element_name_en, &e.bufr_unit);
return Ok((&descriptors[1..], remaining));
} else {
return Err(Error::ParseError(format!(
"Descriptor {:?} not found in Table B",
des
)));
}
}
1 => {
let x = des.x();
let y = des.y();
let (descriptors, mut data, x, y) = if y == 0 {
let (count, updated_data) =
self.parse_usize(state, cache, &descriptors[1], data)?;
(&descriptors[2..], updated_data, x as usize, count)
} else {
(&descriptors[1..], data, x as usize, y as usize)
};
if x > descriptors.len() {
if body_end > descs.len() {
return Err(Error::ParseError(format!(
"Not enough descriptors to repeat: requested {}, available {}",
x,
descriptors.len()
descs.len() - body_start
)));
}
let seq = &descriptors[x..];
// Push continuation frame first (will be processed after repeats)
stack.push(Frame {
descs,
idx: body_end,
});
// Fast path: single descriptor repetition
if x == 1 && descriptors[0].f() == 0 {
let mut repeating = values.start_repeating(y);
for _ in 0..y {
let (_desc, cd) =
self.parser_inner(state, cache, &mut repeating, descriptors, data)?;
data = cd;
// Push repeat frames in reverse order (so first repeat executes first)
match descs {
Descs::Raw(raw) => {
let body = &raw[body_start..body_end];
for _ in 0..y {
stack.push(Frame {
descs: Descs::Raw(body),
idx: 0,
});
}
}
repeating.finish();
return Ok((seq, data));
}
// General path: multiple descriptors or complex patterns
let mut repeaing = values.start_repeating(y);
for _ in 0..y {
let mut repeating_descs = &descriptors[0..x];
while !repeating_descs.is_empty() {
let (_desc, cd) =
self.parser_inner(state, cache, &mut repeaing, repeating_descs, data)?;
repeating_descs = _desc;
data = cd;
Descs::Archived(archived) => {
let body = &archived[body_start..body_end];
for _ in 0..y {
stack.push(Frame {
descs: Descs::Archived(body),
idx: 0,
});
}
}
}
repeaing.finish();
return Ok((seq, data));
}
2 => {
let data = self.deal_with_operator(state, values, des, data)?;
return Ok((&descriptors[1..], data));
self.deal_with_operator(state, values, des, data)?;
stack.push(Frame {
descs,
idx: idx + 1,
});
}
3 => {
#[cfg(feature = "opera")]
let opera_dw = self.parse_opera_bitmap(des).map(|e| e.depth);
if let Some(seq) = cache.get_d(des) {
let mut fxy_chain = seq.fxy_chain.as_slice();
let fxy_chain = seq.fxy_chain.as_slice();
#[cfg(feature = "opera")]
if opera_dw.is_some() {
// let (_, data) =
// self.parse_opera_array(opera_dw.unwrap(), fxy_chain, data)?;
// TODO
unimplemented!("");
} else {
while !fxy_chain.is_empty() {
let (desc, cd) =
self.parser_inner(state, cache, values, fxy_chain, data)?;
fxy_chain = desc;
data = cd;
}
return Ok((&descriptors[1..], data));
}
// Push continuation frame (process next descriptor after sequence)
stack.push(Frame {
descs,
idx: idx + 1,
});
// Push sequence expansion frame (will be processed first)
stack.push(Frame {
descs: Descs::Archived(fxy_chain),
idx: 0,
});
} else {
return Err(Error::ParseError(format!(
"Sequence descriptor {:?} not found in Table D",
@ -527,6 +418,8 @@ impl DataParser {
)));
}
}
Ok(())
}
fn parse_usize<'a, 'b, 'c, K: BUFRKey>(
@ -534,15 +427,15 @@ impl DataParser {
state: &State,
cache: &mut Cache<'c>,
des: &'a K,
data: BitInput<'b>,
) -> Result<(usize, BitInput<'b>)> {
data: &mut BitInput<'b>,
) -> Result<usize> {
match des.f() {
0 => {
if let Some(e) = cache.get_b(des) {
let (value, remaining) = self.evalute(state, data, &e)?;
let value = self.evalute(state, data, &e)?;
if let Some(v) = value.as_f64() {
Ok((v.floor() as usize, remaining))
Ok(v.floor() as usize)
} else {
Err(Error::ParseError(format!("Format Error")))
}
@ -564,39 +457,39 @@ impl DataParser {
fn evalute<'a>(
&self,
state: &State,
data: BitInput<'a>,
data: &mut BitInput<'a>,
e: &ArchivedBTableEntry,
) -> Result<(Value, BitInput<'a>)> {
) -> Result<Value> {
match e.bufr_unit.as_str() {
"CCITT IA5" => {
let total_bytes = state
.common_str_width
.unwrap_or(((e.bufr_datawidth_bits.to_native() as usize) + 7) / 8);
let (s, data) = data.take_string(total_bytes as usize)?;
return Ok((Value::String(s), data));
let s = data.take_string(total_bytes as usize)?;
return Ok(Value::String(s));
}
_ => {
let datawidth = state.datawidth(e);
let scale = state.scale(e) as f64;
let reference_value = state.reference_value(e) as f64;
let (value, data) = data.get_arbitary_bits(datawidth as usize)?;
let value = data.get_arbitary_bits(datawidth as usize)?;
let mv = (1 << datawidth) - 1;
if value == mv && e.fxy.x != 31 {
return Ok((Value::Missing, data));
return Ok(Value::Missing);
}
let result = ((value as f64) + reference_value) * 10.0f64.powi(-scale as i32);
return Ok((Value::Number(result), data));
return Ok(Value::Number(result));
}
}
}
fn deal_with_operator<'s, 'a, C: Container<'s>, K: BUFRKey>(
&'s self,
&self,
state: &mut State,
values: &mut C,
operator: &K,
data: BitInput<'a>,
) -> Result<BitInput<'a>> {
data: &mut BitInput<'a>,
) -> Result<()> {
let x = operator.x();
let y = operator.y();
@ -626,7 +519,7 @@ impl DataParser {
}
},
5 => {
let (string, _data) = data.take_string(y as usize)?;
let string = data.take_string(y as usize)?;
values.push(Value::String(string), "", "CAITT IA5");
}
@ -648,7 +541,7 @@ impl DataParser {
_ => {}
}
Ok(data)
Ok(())
}
#[cfg(feature = "opera")]
@ -741,9 +634,9 @@ impl<'a> BitInput<'a> {
}
#[inline]
pub fn take_string(self, nbytes: usize) -> Result<(String, BitInput<'a>)> {
pub fn take_string(&mut self, nbytes: usize) -> Result<String> {
if nbytes == 0 {
return Ok((String::new(), self));
return Ok(String::new());
}
// Fast path: byte-aligned string reads
@ -753,28 +646,29 @@ impl<'a> BitInput<'a> {
}
let s = String::from_utf8(self.0[..nbytes].to_vec())
.map_err(|_| Error::ParseError("Invalid UTF-8 string".to_string()))?;
return Ok((s, BitInput(&self.0[nbytes..], 0)));
self.0 = &self.0[nbytes..];
self.1 = 0;
return Ok(s);
}
// Slow path: unaligned reads
let mut chars = Vec::with_capacity(nbytes);
let mut remaining_input = self;
// let mut remaining_input = self;
for _ in 0..nbytes {
let (byte_value, next_input) = remaining_input.get_arbitary_bits(8)?;
let byte_value = self.get_arbitary_bits(8)?;
chars.push(byte_value as u8);
remaining_input = next_input;
}
let s = String::from_utf8(chars)
.map_err(|_| Error::ParseError("Invalid UTF-8 string".to_string()))?;
Ok((s, remaining_input))
Ok(s)
}
#[inline]
pub fn get_arbitary_bits(self, nbits: usize) -> Result<(u64, BitInput<'a>)> {
pub fn get_arbitary_bits(&mut self, nbits: usize) -> Result<u64> {
if nbits == 0 {
return Ok((0, self));
return Ok(0);
}
// Fast path: byte-aligned reads for common bit widths
@ -788,7 +682,7 @@ impl<'a> BitInput<'a> {
/// Fast path for byte-aligned bit reads
#[inline]
fn get_arbitary_bits_aligned(self, nbits: usize) -> Result<(u64, BitInput<'a>)> {
fn get_arbitary_bits_aligned(&mut self, nbits: usize) -> Result<u64> {
let byte_data = self.0;
// Optimized paths for common bit widths
@ -797,14 +691,18 @@ impl<'a> BitInput<'a> {
if byte_data.is_empty() {
return Err(Error::ParseError("Not enough data".to_string()));
}
Ok((byte_data[0] as u64, BitInput(&byte_data[1..], 0)))
self.0 = &self.0[1..];
self.1 = 0;
Ok(byte_data[0] as u64)
}
16 => {
if byte_data.len() < 2 {
return Err(Error::ParseError("Not enough data".to_string()));
}
let value = u16::from_be_bytes([byte_data[0], byte_data[1]]) as u64;
Ok((value, BitInput(&byte_data[2..], 0)))
self.0 = &self.0[2..];
self.1 = 0;
Ok(value)
}
24 => {
if byte_data.len() < 3 {
@ -813,7 +711,9 @@ impl<'a> BitInput<'a> {
let value = ((byte_data[0] as u64) << 16)
| ((byte_data[1] as u64) << 8)
| (byte_data[2] as u64);
Ok((value, BitInput(&byte_data[3..], 0)))
self.0 = &self.0[3..];
self.1 = 0;
Ok(value)
}
32 => {
if byte_data.len() < 4 {
@ -822,7 +722,9 @@ impl<'a> BitInput<'a> {
let value =
u32::from_be_bytes([byte_data[0], byte_data[1], byte_data[2], byte_data[3]])
as u64;
Ok((value, BitInput(&byte_data[4..], 0)))
self.0 = &self.0[4..];
self.1 = 0;
Ok(value)
}
_ => {
// Generic byte-aligned path
@ -847,9 +749,13 @@ impl<'a> BitInput<'a> {
let mask = ((1u16 << remaining_bits) - 1) as u8;
let bits = (last_byte >> shift) & mask;
value = (value << remaining_bits) | (bits as u64);
Ok((value, BitInput(&byte_data[full_bytes..], remaining_bits)))
self.0 = &self.0[full_bytes..];
self.1 = remaining_bits;
Ok(value)
} else {
Ok((value, BitInput(&byte_data[full_bytes..], 0)))
self.0 = &self.0[full_bytes..];
self.1 = 0;
Ok(value)
}
}
}
@ -857,7 +763,7 @@ impl<'a> BitInput<'a> {
/// Slower path for unaligned bit reads
#[inline]
fn get_arbitary_bits_unaligned(self, nbits: usize) -> Result<(u64, BitInput<'a>)> {
fn get_arbitary_bits_unaligned(&mut self, nbits: usize) -> Result<u64> {
let mut value: u64 = 0;
let mut remaining_bits = nbits;
let mut bit_offset = self.1;
@ -912,7 +818,10 @@ impl<'a> BitInput<'a> {
bit_offset = remaining_bits;
}
Ok((value, BitInput(byte_data, bit_offset)))
self.0 = byte_data;
self.1 = bit_offset;
Ok(value)
}
}
@ -1078,6 +987,7 @@ impl<'v> Frame<'v> {
}
}
#[derive(Clone, Copy)]
enum Descs<'v> {
Raw(&'v [genlib::FXY]),
Archived(&'v [ArchivedFXY]),

14
rbufr/tests/test_rb.rs Normal file
View File

@ -0,0 +1,14 @@
use librbufr::parser::Parser;
fn test_rb() {
let mut parser = Parser::new();
let parsed_file = parser
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
.unwrap();
for msg in parsed_file.messages() {
println!("{}", msg);
msg.load_data().unwrap();
}
}