From 4fe0a427364cd5d6353b591319557fed284b62a6 Mon Sep 17 00:00:00 2001 From: tsuki Date: Mon, 5 Jan 2026 12:49:41 +0800 Subject: [PATCH] sync --- .gitignore | 3 +- rbufr/Cargo.toml | 10 + rbufr/src/lib.rs | 2 +- rbufr/src/structs/data_parser.rs | 420 ++++++++++++------------------- rbufr/tests/test_rb.rs | 14 ++ 5 files changed, 192 insertions(+), 257 deletions(-) create mode 100644 rbufr/tests/test_rb.rs diff --git a/.gitignore b/.gitignore index 5ced689..2e2579a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target -/tmp-out \ No newline at end of file +/tmp-out +.claude \ No newline at end of file diff --git a/rbufr/Cargo.toml b/rbufr/Cargo.toml index c1cc8ec..43b40bc 100644 --- a/rbufr/Cargo.toml +++ b/rbufr/Cargo.toml @@ -21,3 +21,13 @@ rustc-hash = "2.1.1" [features] default = ["opera"] opera = ["gentools/opera"] + + +[profile.bench] +debug = true +lto = false +opt-level = 3 + +[profile.release] +debug = true +lto = false diff --git a/rbufr/src/lib.rs b/rbufr/src/lib.rs index 8d07d76..0eaf77d 100644 --- a/rbufr/src/lib.rs +++ b/rbufr/src/lib.rs @@ -27,7 +27,7 @@ mod test { let mut parser = Parser::new(); let parsed_file = parser - .parse("/Users/tsuki/Downloads/36_2025-12-17T09_00_00.bufr.nc") + .parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr") .unwrap(); for msg in parsed_file.messages() { diff --git a/rbufr/src/structs/data_parser.rs b/rbufr/src/structs/data_parser.rs index 6e42f46..4a9b136 100644 --- a/rbufr/src/structs/data_parser.rs +++ b/rbufr/src/structs/data_parser.rs @@ -56,44 +56,24 @@ impl<'a> Cache<'a> { /// Get or cache B table entry #[inline(always)] - fn get_b(&mut self, fxy: &K) -> Option<&'a ArchivedBTableEntry> { - let key = FXY::new(fxy.f(), fxy.x(), fxy.y()); - - // Check if already cached - if self.b_cache.contains_key(&key) { - return self.b_cache.get(&key).copied(); - } - - // Cache miss: lookup and cache - let entry = self.lookup_b_descriptor(fxy)?; - self.b_cache.insert(key, entry); - Some(entry) + fn get_b(&mut self, fxy: &K) -> Option<&ArchivedBTableEntry> { + self.lookup_b_descriptor(fxy) } /// Get or cache D table entry #[inline(always)] fn get_d(&mut self, fxy: &K) -> Option<&'a ArchivedDTableEntry> { - let key = FXY::new(fxy.f(), fxy.x(), fxy.y()); - - // Check if already cached - if self.d_cache.contains_key(&key) { - return self.d_cache.get(&key).copied(); - } - - // Cache miss: lookup and cache - let entry = self.lookup_d_descriptor(fxy)?; - self.d_cache.insert(key, entry); - Some(entry) + self.lookup_d_descriptor(fxy) } #[inline(always)] - fn lookup_b_descriptor(&self, fxy: &K) -> Option<&ArchivedBTableEntry> { + fn lookup_b_descriptor(&self, fxy: &K) -> Option<&'a ArchivedBTableEntry> { self.lookup_local_b_descriptor(fxy) .or_else(|| self.lookup_master_b_descriptor(fxy)) } #[inline] - fn lookup_local_b_descriptor(&self, fxy: &K) -> Option<&ArchivedBTableEntry> { + fn lookup_local_b_descriptor(&self, fxy: &K) -> Option<&'a ArchivedBTableEntry> { self.local_b .as_ref() .and_then(|t| t.lookup(fxy)) @@ -101,17 +81,17 @@ impl<'a> Cache<'a> { } #[inline] - fn lookup_master_b_descriptor(&self, fxy: &K) -> Option<&ArchivedBTableEntry> { + fn lookup_master_b_descriptor(&self, fxy: &K) -> Option<&'a ArchivedBTableEntry> { self.master_b.lookup(fxy).filter(|e| &e.fxy == fxy) } #[inline] - fn lookup_master_d_descriptor(&self, fxy: &K) -> Option<&ArchivedDTableEntry> { + fn lookup_master_d_descriptor(&self, fxy: &K) -> Option<&'a ArchivedDTableEntry> { self.master_d.lookup(fxy).filter(|e| &e.fxy == fxy) } #[inline] - fn lookup_local_d_descriptor(&self, fxy: &K) -> Option<&ArchivedDTableEntry> { + fn lookup_local_d_descriptor(&self, fxy: &K) -> Option<&'a ArchivedDTableEntry> { self.local_d .as_ref() .and_then(|t| t.lookup(fxy)) @@ -119,7 +99,7 @@ impl<'a> Cache<'a> { } #[inline(always)] - fn lookup_d_descriptor(&self, fxy: &K) -> Option<&ArchivedDTableEntry> { + fn lookup_d_descriptor(&self, fxy: &K) -> Option<&'a ArchivedDTableEntry> { self.lookup_local_d_descriptor(fxy) .or_else(|| self.lookup_master_d_descriptor(fxy)) } @@ -257,63 +237,74 @@ impl DataParser { idx: 0, }); - while let Some(Frame { descs, mut idx }) = stack.pop() { - if idx > descs.len() { + while let Some(Frame { descs, idx }) = stack.pop() { + if idx >= descs.len() { continue; } - - self.parsed(descs, idx, &mut stack, &mut cache, &mut state, data_input)?; - - idx += 1; - stack.push(Frame { descs, idx }); + match descs { + Descs::Raw(raw) => { + let des = &raw[idx]; + self.parse_d( + des, + idx, + &mut record, + descs, + &mut stack, + &mut cache, + &mut state, + &mut data_input, + )?; + } + Descs::Archived(archived) => { + let des = &archived[idx]; + self.parse_d( + des, + idx, + &mut record, + descs, + &mut stack, + &mut cache, + &mut state, + &mut data_input, + )?; + } + } } Ok(record) } - fn parsed<'k, 'c, 'i>( + fn parse_d<'k, 'c, 'i, 's, K: BUFRKey>( &self, - descs: Descs<'k>, + des: &K, idx: usize, - stack: &mut Vec>, - cache: &mut Cache<'c>, - state: &mut State, - data: BitInput<'i>, - ) -> Result<()> { - match descs { - Descs::Raw(raw) => { - let des = &raw[0]; - self.parse_d(des, idx, descs, stack, cache, state, data)?; - } - Descs::Archived(archived) => { - let des = &archived[0]; - self.parse_d(des, idx, descs, stack, cache, state, data)?; - } - }; - Ok(()) - } - - fn parse_d<'k, 'c, 'i, K: BUFRKey>( - &self, - des: &'k K, - mut idx: usize, + values: &mut BUFRParsed, descs: Descs<'k>, + // Stack stack: &mut Vec>, cache: &mut Cache<'c>, state: &mut State, - data: BitInput<'i>, - ) -> Result<()> { + data: &mut BitInput<'i>, + ) -> Result<()> + where + 'c: 'k, + { match des.f() { 0 => { // Element descriptor - parse data if let Some(e) = cache.get_b(des) { - let (value, remaining) = self.evalute(state, data, &e)?; + let value = self.evalute(state, data, &e)?; - // println!("Parsed Descriptor: {:?}, Value: {}", des, value); - // values.push(value, &e.element_name_en, &e.bufr_unit); - // return Ok((&descriptors[1..], remaining)); + // println!( + // "Parsed Descriptor {}: Value = {}", + // &e.element_name_en, value + // ); + // values.push(value, e.element_name_en.as_str(), e.bufr_unit.as_str()); - return Ok(()); + stack.push(Frame { + descs, + idx: idx + 1, + }); } else { return Err(Error::ParseError(format!( "Descriptor {:?} not found in Table B", @@ -322,197 +313,97 @@ impl DataParser { } } 1 => { - let mut x = des.x() as usize; + let x = des.x() as usize; let mut y = des.y() as usize; - if y == 0 { - let (count, data) = match descs { + let delay_repeat = y == 0; + + if delay_repeat { + let count = match descs { Descs::Raw(raw) => { - let count_des = &raw[1]; + let count_des = &raw[idx + 1]; self.parse_usize(state, cache, count_des, data)? } Descs::Archived(archived) => { - let count_des = &archived[1]; + let count_des = &archived[idx + 1]; self.parse_usize(state, cache, count_des, data)? } }; y = count; - - let _ = stack.pop(); - stack.push(Frame { descs, idx: 2 }); - }; - - // if x > descriptors.len() { - // return Err(Error::ParseError(format!( - // "Not enough descriptors to repeat: requested {}, available {}", - // x, - // descriptors.len() - // ))); - // } - - match descs { - Descs::Raw(raw) => { - let body = &raw[idx..idx + x]; - let _ = stack.pop(); - stack.push(Frame { - descs: Descs::Raw(body), - idx: 0, - }); - } - Descs::Archived(archived) => { - let body = &archived[idx..idx + x]; - let _ = stack.pop(); - stack.push(Frame { - descs: Descs::Archived(body), - idx: 0, - }); - } } - return Ok(()); - } - 2 => { - let data = self.deal_with_operator(state, values, des, data)?; - return Ok(()); - } - 3 => { - #[cfg(feature = "opera")] - let opera_dw = self.parse_opera_bitmap(des).map(|e| e.depth); + // Calculate the start of the repeat body + let body_start = if delay_repeat { idx + 2 } else { idx + 1 }; + let body_end = body_start + x; - if let Some(seq) = cache.get_d(des) { - let mut fxy_chain = seq.fxy_chain.as_slice(); - if opera_dw.is_some() { - // let (_, data) = - // self.parse_opera_array(opera_dw.unwrap(), fxy_chain, data)?; - // TODO - unimplemented!(""); - } else { - stack.push(Frame { - descs: Descs::Archived(fxy_chain), - idx: 0, - }); - - return Ok(()); - } - } else { - return Err(Error::ParseError(format!( - "Sequence descriptor {:?} not found in Table D", - des - ))); - } - } - _ => { - return Err(Error::ParseError(format!( - "Invalid descriptor F value: {}", - des.f() - ))); - } - } - } - - #[inline(always)] - fn parser_inner<'s, 'a, 'b, 'c, C: Container<'s>, K: BUFRKey>( - &'s self, - state: &mut State, - cache: &mut Cache<'c>, - values: &mut C, - descriptors: &'a [K], - mut data: BitInput<'b>, - ) -> Result<(&'a [K], BitInput<'b>)> { - if descriptors.is_empty() { - return Ok((descriptors, data)); - } - - let des = &descriptors[0]; - - match des.f() { - 0 => { - // Element descriptor - parse data - if let Some(e) = cache.get_b(des) { - let (value, remaining) = self.evalute(state, data, &e)?; - - // println!("Parsed Descriptor: {:?}, Value: {}", des, value); - values.push(value, &e.element_name_en, &e.bufr_unit); - return Ok((&descriptors[1..], remaining)); - } else { - return Err(Error::ParseError(format!( - "Descriptor {:?} not found in Table B", - des - ))); - } - } - 1 => { - let x = des.x(); - let y = des.y(); - let (descriptors, mut data, x, y) = if y == 0 { - let (count, updated_data) = - self.parse_usize(state, cache, &descriptors[1], data)?; - (&descriptors[2..], updated_data, x as usize, count) - } else { - (&descriptors[1..], data, x as usize, y as usize) - }; - - if x > descriptors.len() { + if body_end > descs.len() { return Err(Error::ParseError(format!( "Not enough descriptors to repeat: requested {}, available {}", x, - descriptors.len() + descs.len() - body_start ))); } - let seq = &descriptors[x..]; + // Push continuation frame first (will be processed after repeats) + stack.push(Frame { + descs, + idx: body_end, + }); - // Fast path: single descriptor repetition - if x == 1 && descriptors[0].f() == 0 { - let mut repeating = values.start_repeating(y); - for _ in 0..y { - let (_desc, cd) = - self.parser_inner(state, cache, &mut repeating, descriptors, data)?; - data = cd; + // Push repeat frames in reverse order (so first repeat executes first) + match descs { + Descs::Raw(raw) => { + let body = &raw[body_start..body_end]; + for _ in 0..y { + stack.push(Frame { + descs: Descs::Raw(body), + idx: 0, + }); + } } - repeating.finish(); - return Ok((seq, data)); - } - - // General path: multiple descriptors or complex patterns - let mut repeaing = values.start_repeating(y); - - for _ in 0..y { - let mut repeating_descs = &descriptors[0..x]; - while !repeating_descs.is_empty() { - let (_desc, cd) = - self.parser_inner(state, cache, &mut repeaing, repeating_descs, data)?; - repeating_descs = _desc; - data = cd; + Descs::Archived(archived) => { + let body = &archived[body_start..body_end]; + for _ in 0..y { + stack.push(Frame { + descs: Descs::Archived(body), + idx: 0, + }); + } } } - repeaing.finish(); - return Ok((seq, data)); } 2 => { - let data = self.deal_with_operator(state, values, des, data)?; - return Ok((&descriptors[1..], data)); + self.deal_with_operator(state, values, des, data)?; + stack.push(Frame { + descs, + idx: idx + 1, + }); } 3 => { #[cfg(feature = "opera")] let opera_dw = self.parse_opera_bitmap(des).map(|e| e.depth); if let Some(seq) = cache.get_d(des) { - let mut fxy_chain = seq.fxy_chain.as_slice(); + let fxy_chain = seq.fxy_chain.as_slice(); + #[cfg(feature = "opera")] if opera_dw.is_some() { // let (_, data) = // self.parse_opera_array(opera_dw.unwrap(), fxy_chain, data)?; // TODO unimplemented!(""); - } else { - while !fxy_chain.is_empty() { - let (desc, cd) = - self.parser_inner(state, cache, values, fxy_chain, data)?; - fxy_chain = desc; - data = cd; - } - return Ok((&descriptors[1..], data)); } + + // Push continuation frame (process next descriptor after sequence) + stack.push(Frame { + descs, + idx: idx + 1, + }); + + // Push sequence expansion frame (will be processed first) + stack.push(Frame { + descs: Descs::Archived(fxy_chain), + idx: 0, + }); } else { return Err(Error::ParseError(format!( "Sequence descriptor {:?} not found in Table D", @@ -527,6 +418,8 @@ impl DataParser { ))); } } + + Ok(()) } fn parse_usize<'a, 'b, 'c, K: BUFRKey>( @@ -534,15 +427,15 @@ impl DataParser { state: &State, cache: &mut Cache<'c>, des: &'a K, - data: BitInput<'b>, - ) -> Result<(usize, BitInput<'b>)> { + data: &mut BitInput<'b>, + ) -> Result { match des.f() { 0 => { if let Some(e) = cache.get_b(des) { - let (value, remaining) = self.evalute(state, data, &e)?; + let value = self.evalute(state, data, &e)?; if let Some(v) = value.as_f64() { - Ok((v.floor() as usize, remaining)) + Ok(v.floor() as usize) } else { Err(Error::ParseError(format!("Format Error"))) } @@ -564,39 +457,39 @@ impl DataParser { fn evalute<'a>( &self, state: &State, - data: BitInput<'a>, + data: &mut BitInput<'a>, e: &ArchivedBTableEntry, - ) -> Result<(Value, BitInput<'a>)> { + ) -> Result { match e.bufr_unit.as_str() { "CCITT IA5" => { let total_bytes = state .common_str_width .unwrap_or(((e.bufr_datawidth_bits.to_native() as usize) + 7) / 8); - let (s, data) = data.take_string(total_bytes as usize)?; - return Ok((Value::String(s), data)); + let s = data.take_string(total_bytes as usize)?; + return Ok(Value::String(s)); } _ => { let datawidth = state.datawidth(e); let scale = state.scale(e) as f64; let reference_value = state.reference_value(e) as f64; - let (value, data) = data.get_arbitary_bits(datawidth as usize)?; + let value = data.get_arbitary_bits(datawidth as usize)?; let mv = (1 << datawidth) - 1; if value == mv && e.fxy.x != 31 { - return Ok((Value::Missing, data)); + return Ok(Value::Missing); } let result = ((value as f64) + reference_value) * 10.0f64.powi(-scale as i32); - return Ok((Value::Number(result), data)); + return Ok(Value::Number(result)); } } } fn deal_with_operator<'s, 'a, C: Container<'s>, K: BUFRKey>( - &'s self, + &self, state: &mut State, values: &mut C, operator: &K, - data: BitInput<'a>, - ) -> Result> { + data: &mut BitInput<'a>, + ) -> Result<()> { let x = operator.x(); let y = operator.y(); @@ -626,7 +519,7 @@ impl DataParser { } }, 5 => { - let (string, _data) = data.take_string(y as usize)?; + let string = data.take_string(y as usize)?; values.push(Value::String(string), "", "CAITT IA5"); } @@ -648,7 +541,7 @@ impl DataParser { _ => {} } - Ok(data) + Ok(()) } #[cfg(feature = "opera")] @@ -741,9 +634,9 @@ impl<'a> BitInput<'a> { } #[inline] - pub fn take_string(self, nbytes: usize) -> Result<(String, BitInput<'a>)> { + pub fn take_string(&mut self, nbytes: usize) -> Result { if nbytes == 0 { - return Ok((String::new(), self)); + return Ok(String::new()); } // Fast path: byte-aligned string reads @@ -753,28 +646,29 @@ impl<'a> BitInput<'a> { } let s = String::from_utf8(self.0[..nbytes].to_vec()) .map_err(|_| Error::ParseError("Invalid UTF-8 string".to_string()))?; - return Ok((s, BitInput(&self.0[nbytes..], 0))); + self.0 = &self.0[nbytes..]; + self.1 = 0; + return Ok(s); } // Slow path: unaligned reads let mut chars = Vec::with_capacity(nbytes); - let mut remaining_input = self; + // let mut remaining_input = self; for _ in 0..nbytes { - let (byte_value, next_input) = remaining_input.get_arbitary_bits(8)?; + let byte_value = self.get_arbitary_bits(8)?; chars.push(byte_value as u8); - remaining_input = next_input; } let s = String::from_utf8(chars) .map_err(|_| Error::ParseError("Invalid UTF-8 string".to_string()))?; - Ok((s, remaining_input)) + Ok(s) } #[inline] - pub fn get_arbitary_bits(self, nbits: usize) -> Result<(u64, BitInput<'a>)> { + pub fn get_arbitary_bits(&mut self, nbits: usize) -> Result { if nbits == 0 { - return Ok((0, self)); + return Ok(0); } // Fast path: byte-aligned reads for common bit widths @@ -788,7 +682,7 @@ impl<'a> BitInput<'a> { /// Fast path for byte-aligned bit reads #[inline] - fn get_arbitary_bits_aligned(self, nbits: usize) -> Result<(u64, BitInput<'a>)> { + fn get_arbitary_bits_aligned(&mut self, nbits: usize) -> Result { let byte_data = self.0; // Optimized paths for common bit widths @@ -797,14 +691,18 @@ impl<'a> BitInput<'a> { if byte_data.is_empty() { return Err(Error::ParseError("Not enough data".to_string())); } - Ok((byte_data[0] as u64, BitInput(&byte_data[1..], 0))) + self.0 = &self.0[1..]; + self.1 = 0; + Ok(byte_data[0] as u64) } 16 => { if byte_data.len() < 2 { return Err(Error::ParseError("Not enough data".to_string())); } let value = u16::from_be_bytes([byte_data[0], byte_data[1]]) as u64; - Ok((value, BitInput(&byte_data[2..], 0))) + self.0 = &self.0[2..]; + self.1 = 0; + Ok(value) } 24 => { if byte_data.len() < 3 { @@ -813,7 +711,9 @@ impl<'a> BitInput<'a> { let value = ((byte_data[0] as u64) << 16) | ((byte_data[1] as u64) << 8) | (byte_data[2] as u64); - Ok((value, BitInput(&byte_data[3..], 0))) + self.0 = &self.0[3..]; + self.1 = 0; + Ok(value) } 32 => { if byte_data.len() < 4 { @@ -822,7 +722,9 @@ impl<'a> BitInput<'a> { let value = u32::from_be_bytes([byte_data[0], byte_data[1], byte_data[2], byte_data[3]]) as u64; - Ok((value, BitInput(&byte_data[4..], 0))) + self.0 = &self.0[4..]; + self.1 = 0; + Ok(value) } _ => { // Generic byte-aligned path @@ -847,9 +749,13 @@ impl<'a> BitInput<'a> { let mask = ((1u16 << remaining_bits) - 1) as u8; let bits = (last_byte >> shift) & mask; value = (value << remaining_bits) | (bits as u64); - Ok((value, BitInput(&byte_data[full_bytes..], remaining_bits))) + self.0 = &self.0[full_bytes..]; + self.1 = remaining_bits; + Ok(value) } else { - Ok((value, BitInput(&byte_data[full_bytes..], 0))) + self.0 = &self.0[full_bytes..]; + self.1 = 0; + Ok(value) } } } @@ -857,7 +763,7 @@ impl<'a> BitInput<'a> { /// Slower path for unaligned bit reads #[inline] - fn get_arbitary_bits_unaligned(self, nbits: usize) -> Result<(u64, BitInput<'a>)> { + fn get_arbitary_bits_unaligned(&mut self, nbits: usize) -> Result { let mut value: u64 = 0; let mut remaining_bits = nbits; let mut bit_offset = self.1; @@ -912,7 +818,10 @@ impl<'a> BitInput<'a> { bit_offset = remaining_bits; } - Ok((value, BitInput(byte_data, bit_offset))) + self.0 = byte_data; + self.1 = bit_offset; + + Ok(value) } } @@ -1078,6 +987,7 @@ impl<'v> Frame<'v> { } } +#[derive(Clone, Copy)] enum Descs<'v> { Raw(&'v [genlib::FXY]), Archived(&'v [ArchivedFXY]), diff --git a/rbufr/tests/test_rb.rs b/rbufr/tests/test_rb.rs new file mode 100644 index 0000000..d72c58a --- /dev/null +++ b/rbufr/tests/test_rb.rs @@ -0,0 +1,14 @@ +use librbufr::parser::Parser; + +fn test_rb() { + let mut parser = Parser::new(); + let parsed_file = parser + .parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr") + .unwrap(); + + for msg in parsed_file.messages() { + println!("{}", msg); + + msg.load_data().unwrap(); + } +}