sync
This commit is contained in:
parent
4fe0a42736
commit
c6c15c95da
@ -5,20 +5,11 @@ use std::path::Path;
|
||||
|
||||
use crate::pattern::{TableFilePattern, TableKind, TableMetadata};
|
||||
|
||||
/// Configuration for custom table patterns
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PatternConfig {
|
||||
/// Name/description of this pattern
|
||||
pub name: String,
|
||||
|
||||
/// Regular expression pattern to match filenames
|
||||
/// Capture groups should be in order: kind, version, subcenter, center, language
|
||||
pub regex: String,
|
||||
|
||||
/// Glob pattern for scanning directories
|
||||
pub glob: String,
|
||||
|
||||
/// Mapping of regex capture groups to metadata fields
|
||||
pub mapping: FieldMapping,
|
||||
}
|
||||
|
||||
@ -182,8 +173,7 @@ impl ScanConfig {
|
||||
|
||||
/// Save configuration to a TOML file
|
||||
pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
|
||||
let content = toml::to_string_pretty(self)
|
||||
.context("Failed to serialize config")?;
|
||||
let content = toml::to_string_pretty(self).context("Failed to serialize config")?;
|
||||
|
||||
std::fs::write(path.as_ref(), content)
|
||||
.with_context(|| format!("Failed to write config file: {}", path.as_ref().display()))?;
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
use genlib::BUFRTableMPH;
|
||||
#[cfg(feature = "opera")]
|
||||
use genlib::prelude::BUFRTableBitMap;
|
||||
use genlib::tables::TableTypeTrait;
|
||||
|
||||
use crate::decoder::*;
|
||||
use crate::errors::Result;
|
||||
#[cfg(feature = "opera")]
|
||||
use crate::structs::GENCENTER;
|
||||
use crate::structs::data_parser::DataParser;
|
||||
use crate::structs::versions::{BUFRMessage, MessageVersion};
|
||||
use crate::tables::*;
|
||||
|
||||
@ -20,68 +22,20 @@ impl std::fmt::Display for MessageBlock {
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MessageBlock {
|
||||
type Target = BUFRMessage;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.message
|
||||
}
|
||||
}
|
||||
|
||||
impl MessageBlock {
|
||||
pub fn new(message: BUFRMessage) -> Self {
|
||||
MessageBlock { message }
|
||||
}
|
||||
|
||||
pub fn load_data(&self) -> Result<()> {
|
||||
let table_info = self.message.table_info();
|
||||
let master_table_version = table_info.master_table_version;
|
||||
|
||||
let master_b: BUFRTableB = self.load_first_validable_table(master_table_version)?;
|
||||
let master_d: BUFRTableD = self.load_first_validable_table(master_table_version)?;
|
||||
|
||||
let local_table_version = table_info.local_table_version as u32;
|
||||
|
||||
let local_tables = if local_table_version > 0 {
|
||||
let local_b: BUFRTableB = TableLoader.load_table(LocalTable::new(
|
||||
Some(table_info.subcenter_id * 256 + table_info.center_id),
|
||||
table_info.local_table_version,
|
||||
))?;
|
||||
|
||||
let local_d: BUFRTableD = TableLoader.load_table(LocalTable::new(
|
||||
Some(table_info.subcenter_id * 256 + table_info.center_id),
|
||||
table_info.local_table_version,
|
||||
))?;
|
||||
|
||||
Some((local_b, local_d))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let (local_b, local_d) = if let Some((b, d)) = local_tables {
|
||||
(Some(b), Some(d))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
#[cfg(feature = "opera")]
|
||||
let opera_bitmap_table = self
|
||||
.load_opera_bitmap_table(
|
||||
table_info.center_id,
|
||||
table_info.subcenter_id,
|
||||
table_info.local_table_version,
|
||||
master_table_version,
|
||||
)
|
||||
.ok();
|
||||
|
||||
let mut parser = DataParser::new(
|
||||
self.message.version(),
|
||||
master_b,
|
||||
master_d,
|
||||
local_b,
|
||||
local_d,
|
||||
#[cfg(feature = "opera")]
|
||||
opera_bitmap_table,
|
||||
);
|
||||
|
||||
let record = parser.parse(&self.message)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_first_validable_table<E: TableTypeTrait>(
|
||||
pub(crate) fn load_first_validable_table<E: TableTypeTrait>(
|
||||
&self,
|
||||
table_version: u8,
|
||||
) -> Result<BUFRTableMPH<E>> {
|
||||
@ -101,7 +55,7 @@ impl MessageBlock {
|
||||
}
|
||||
|
||||
#[cfg(feature = "opera")]
|
||||
fn load_opera_bitmap_table(
|
||||
pub(crate) fn load_opera_bitmap_table(
|
||||
&self,
|
||||
subcenter: u16,
|
||||
center: u16,
|
||||
@ -132,7 +86,6 @@ impl BUFRFile {
|
||||
self.messages.push(MessageBlock::new(message));
|
||||
}
|
||||
|
||||
/// Get the number of successfully parsed messages
|
||||
pub fn message_count(&self) -> usize {
|
||||
self.messages.len()
|
||||
}
|
||||
@ -141,7 +94,6 @@ impl BUFRFile {
|
||||
self.messages.get(index)
|
||||
}
|
||||
|
||||
/// Get a reference to all parsed messages
|
||||
pub fn messages(&self) -> &[MessageBlock] {
|
||||
&self.messages
|
||||
}
|
||||
|
||||
1628
rbufr/src/decoder.rs
Normal file
1628
rbufr/src/decoder.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,39 +1,11 @@
|
||||
mod block;
|
||||
mod errors;
|
||||
mod opera;
|
||||
pub mod block;
|
||||
pub mod decoder;
|
||||
pub mod errors;
|
||||
#[cfg(feature = "opera")]
|
||||
pub mod opera;
|
||||
pub mod parser;
|
||||
pub mod structs;
|
||||
mod tables;
|
||||
pub mod tables;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
#[test]
|
||||
fn test() {
|
||||
use genlib::prelude::*;
|
||||
let bufr = BUFRTableB::load_from_disk(
|
||||
"/Users/xiang.li1/projects/rbufr/tables/BUFRCREX_TableB_en_00.bufrtbl",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let entry = bufr.lookup(&FXY::new(0, 0, 1)).unwrap();
|
||||
|
||||
println!("{:#?}", entry);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rb() {
|
||||
use crate::parser::Parser;
|
||||
|
||||
let mut parser = Parser::new();
|
||||
let parsed_file = parser
|
||||
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
|
||||
.unwrap();
|
||||
|
||||
for msg in parsed_file.messages() {
|
||||
println!("{}", msg);
|
||||
|
||||
msg.load_data().unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
pub use crate::decoder::Decoder;
|
||||
pub use crate::parser::*;
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
use crate::structs::data_parser::Value;
|
||||
|
||||
/// This Module contains functions specific to handling BUFR Opera files.
|
||||
use crate::decoder::Value;
|
||||
|
||||
pub struct OperaBitmapParser {
|
||||
values: Vec<Value>,
|
||||
|
||||
@ -9,133 +9,106 @@ use std::{
|
||||
};
|
||||
|
||||
const BUFR_PATTERN: &[u8] = b"BUFR";
|
||||
const BUFFER_SIZE: usize = 8192; // 8KB buffer for scanning
|
||||
// const MAX_MESSAGE_SIZE: usize = 500_000; // 500KB max message size
|
||||
const BUFFER_SIZE: usize = 8192;
|
||||
|
||||
pub struct Parser;
|
||||
pub fn parse<P: AsRef<Path>>(path: P) -> Result<BUFRFile> {
|
||||
let file = File::open(path)?;
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Find all offsets in the file where "BUFR" appears using streaming approach
|
||||
fn find_bufr_offsets<R: Read + Seek>(reader: &mut R) -> Result<Vec<u64>> {
|
||||
let mut offsets = Vec::new();
|
||||
let mut buffer = vec![0u8; BUFFER_SIZE];
|
||||
let mut file_offset = 0u64;
|
||||
let mut overlap = vec![0u8; BUFR_PATTERN.len() - 1];
|
||||
let mut overlap_len = 0;
|
||||
let mut magic_bytes = [0u8; 2];
|
||||
reader.read_exact(&mut magic_bytes)?;
|
||||
reader.seek(SeekFrom::Start(0))?;
|
||||
if magic_bytes == [0x1F, 0x8B] {
|
||||
let mut gz_decoder = GzDecoder::new(reader);
|
||||
let mut bytes = vec![];
|
||||
gz_decoder.read_to_end(&mut bytes)?;
|
||||
|
||||
parse_inner(&mut Cursor::new(bytes))
|
||||
} else {
|
||||
reader.seek(SeekFrom::Start(0))?;
|
||||
|
||||
loop {
|
||||
let bytes_read = reader.read(&mut buffer)?;
|
||||
if bytes_read == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
// Create a combined view of overlap + new data
|
||||
let mut search_buffer = Vec::with_capacity(overlap_len + bytes_read);
|
||||
search_buffer.extend_from_slice(&overlap[..overlap_len]);
|
||||
search_buffer.extend_from_slice(&buffer[..bytes_read]);
|
||||
|
||||
// Search for BUFR pattern
|
||||
for i in 0..search_buffer.len().saturating_sub(BUFR_PATTERN.len() - 1) {
|
||||
if search_buffer.len() >= i + BUFR_PATTERN.len()
|
||||
&& &search_buffer[i..i + BUFR_PATTERN.len()] == BUFR_PATTERN
|
||||
{
|
||||
let actual_offset = file_offset - overlap_len as u64 + i as u64;
|
||||
offsets.push(actual_offset);
|
||||
}
|
||||
}
|
||||
|
||||
// Save overlap for next iteration
|
||||
if bytes_read >= BUFR_PATTERN.len() - 1 {
|
||||
overlap_len = BUFR_PATTERN.len() - 1;
|
||||
overlap[..overlap_len]
|
||||
.copy_from_slice(&buffer[bytes_read - overlap_len..bytes_read]);
|
||||
} else {
|
||||
overlap_len = bytes_read;
|
||||
overlap[..overlap_len].copy_from_slice(&buffer[..bytes_read]);
|
||||
}
|
||||
|
||||
file_offset += bytes_read as u64;
|
||||
}
|
||||
|
||||
Ok(offsets)
|
||||
}
|
||||
|
||||
/// Read a BUFR message from file at specific offset
|
||||
fn read_message_at_offset<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<Vec<u8>> {
|
||||
reader.seek(SeekFrom::Start(offset))?;
|
||||
|
||||
// Read Section 0 to get total length
|
||||
let mut section0_buf = [0u8; 8];
|
||||
reader.read_exact(&mut section0_buf)?;
|
||||
|
||||
// Parse total length (3 bytes starting at offset 4)
|
||||
let total_length =
|
||||
u32::from_be_bytes([0, section0_buf[4], section0_buf[5], section0_buf[6]]);
|
||||
|
||||
// Read entire message
|
||||
let mut message_buf = vec![0u8; total_length as usize];
|
||||
reader.seek(SeekFrom::Start(offset))?;
|
||||
reader.read_exact(&mut message_buf)?;
|
||||
|
||||
Ok(message_buf)
|
||||
}
|
||||
|
||||
/// Parse a file containing one or more BUFR messages using streaming approach
|
||||
pub fn parse<P: AsRef<Path>>(&mut self, path: P) -> Result<BUFRFile> {
|
||||
let file = File::open(path)?;
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
// Try to detect gzip compression
|
||||
let mut magic_bytes = [0u8; 2];
|
||||
reader.read_exact(&mut magic_bytes)?;
|
||||
reader.seek(SeekFrom::Start(0))?;
|
||||
if magic_bytes == [0x1F, 0x8B] {
|
||||
// Gzip magic number detected
|
||||
let mut gz_decoder = GzDecoder::new(reader);
|
||||
let mut bytes = vec![];
|
||||
gz_decoder.read_to_end(&mut bytes)?;
|
||||
|
||||
self.parse_inner(&mut Cursor::new(bytes))
|
||||
} else {
|
||||
// Not compressed
|
||||
// Rewind reader
|
||||
reader.seek(SeekFrom::Start(0))?;
|
||||
self.parse_inner(&mut reader)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_inner<R>(&self, buf_reader: &mut R) -> Result<BUFRFile>
|
||||
where
|
||||
R: Read + Seek,
|
||||
{
|
||||
// Find all BUFR message offsets
|
||||
let offsets = Self::find_bufr_offsets(buf_reader)?;
|
||||
|
||||
let mut file_block = BUFRFile::new();
|
||||
|
||||
// Parse each BUFR message
|
||||
for offset in offsets {
|
||||
match Self::read_message_at_offset(buf_reader, offset) {
|
||||
Ok(message_data) => match BUFRMessage::parse(&message_data) {
|
||||
Ok(message) => {
|
||||
file_block.push_message(message);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to parse BUFR message at offset {}: {:?}", offset, e);
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Failed to read BUFR message at offset {}: {:?}", offset, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(file_block)
|
||||
parse_inner(&mut reader)
|
||||
}
|
||||
}
|
||||
|
||||
fn find_bufr_offsets<R: Read + Seek>(reader: &mut R) -> Result<Vec<u64>> {
|
||||
let mut offsets = Vec::new();
|
||||
let mut buffer = vec![0u8; BUFFER_SIZE];
|
||||
let mut file_offset = 0u64;
|
||||
let mut overlap = vec![0u8; BUFR_PATTERN.len() - 1];
|
||||
let mut overlap_len = 0;
|
||||
|
||||
reader.seek(SeekFrom::Start(0))?;
|
||||
|
||||
loop {
|
||||
let bytes_read = reader.read(&mut buffer)?;
|
||||
if bytes_read == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut search_buffer = Vec::with_capacity(overlap_len + bytes_read);
|
||||
search_buffer.extend_from_slice(&overlap[..overlap_len]);
|
||||
search_buffer.extend_from_slice(&buffer[..bytes_read]);
|
||||
|
||||
for i in 0..search_buffer.len().saturating_sub(BUFR_PATTERN.len() - 1) {
|
||||
if search_buffer.len() >= i + BUFR_PATTERN.len()
|
||||
&& &search_buffer[i..i + BUFR_PATTERN.len()] == BUFR_PATTERN
|
||||
{
|
||||
let actual_offset = file_offset - overlap_len as u64 + i as u64;
|
||||
offsets.push(actual_offset);
|
||||
}
|
||||
}
|
||||
|
||||
if bytes_read >= BUFR_PATTERN.len() - 1 {
|
||||
overlap_len = BUFR_PATTERN.len() - 1;
|
||||
overlap[..overlap_len].copy_from_slice(&buffer[bytes_read - overlap_len..bytes_read]);
|
||||
} else {
|
||||
overlap_len = bytes_read;
|
||||
overlap[..overlap_len].copy_from_slice(&buffer[..bytes_read]);
|
||||
}
|
||||
|
||||
file_offset += bytes_read as u64;
|
||||
}
|
||||
|
||||
Ok(offsets)
|
||||
}
|
||||
|
||||
fn read_message_at_offset<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<Vec<u8>> {
|
||||
reader.seek(SeekFrom::Start(offset))?;
|
||||
|
||||
let mut section0_buf = [0u8; 8];
|
||||
reader.read_exact(&mut section0_buf)?;
|
||||
|
||||
let total_length = u32::from_be_bytes([0, section0_buf[4], section0_buf[5], section0_buf[6]]);
|
||||
|
||||
let mut message_buf = vec![0u8; total_length as usize];
|
||||
reader.seek(SeekFrom::Start(offset))?;
|
||||
reader.read_exact(&mut message_buf)?;
|
||||
|
||||
Ok(message_buf)
|
||||
}
|
||||
|
||||
fn parse_inner<R>(buf_reader: &mut R) -> Result<BUFRFile>
|
||||
where
|
||||
R: Read + Seek,
|
||||
{
|
||||
let offsets = find_bufr_offsets(buf_reader)?;
|
||||
let mut file_block = BUFRFile::new();
|
||||
|
||||
for offset in offsets {
|
||||
match read_message_at_offset(buf_reader, offset) {
|
||||
Ok(message_data) => match BUFRMessage::parse(&message_data) {
|
||||
Ok(message) => {
|
||||
file_block.push_message(message);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to parse BUFR message at offset {}: {:?}", offset, e);
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Failed to read BUFR message at offset {}: {:?}", offset, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(file_block)
|
||||
}
|
||||
|
||||
@ -1,11 +1,9 @@
|
||||
use std::ops::{AddAssign, Shl, Shr};
|
||||
|
||||
use nom::IResult;
|
||||
use nom::bits::{bits, bytes, complete::take};
|
||||
use nom::bits::complete::take;
|
||||
use std::ops::{AddAssign, Shl, Shr};
|
||||
pub(super) type BitInput<'a> = (&'a [u8], usize);
|
||||
|
||||
pub type BitInput<'a> = (&'a [u8], usize);
|
||||
|
||||
pub fn parse_arbitrary_bits<
|
||||
pub(super) fn parse_arbitrary_bits<
|
||||
T: From<u8> + AddAssign + Shl<usize, Output = T> + Shr<usize, Output = T>,
|
||||
>(
|
||||
input: BitInput,
|
||||
@ -13,17 +11,3 @@ pub fn parse_arbitrary_bits<
|
||||
) -> IResult<BitInput, T> {
|
||||
take(count)(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::structs::bit::parse_arbitrary_bits;
|
||||
|
||||
#[test]
|
||||
fn test() {
|
||||
let data = [0xA0, 0xA0, 0x01, 0xA0];
|
||||
|
||||
let result = parse_arbitrary_bits::<u16>((&data, 0), 16).unwrap();
|
||||
|
||||
println!("{:?}", result);
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -4,10 +4,8 @@ use nom::{
|
||||
number::complete::{be_u8, be_u16, be_u24},
|
||||
};
|
||||
pub mod bit;
|
||||
pub mod data_parser;
|
||||
pub(super) mod tools;
|
||||
pub mod versions;
|
||||
|
||||
#[cfg(feature = "opera")]
|
||||
pub(super) const GENCENTER: u16 = 247;
|
||||
|
||||
|
||||
@ -126,7 +126,7 @@ impl TableLoader {
|
||||
T: TableTypeTrait,
|
||||
{
|
||||
let path = table_type.file_path(T::TABLE_TYPE);
|
||||
println!("Loading table from {:?}", path);
|
||||
// println!("Loading table from {:?}", path);
|
||||
BUFRTableMPH::<T>::load_from_disk(path).map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,14 +0,0 @@
|
||||
use librbufr::parser::Parser;
|
||||
|
||||
fn test_rb() {
|
||||
let mut parser = Parser::new();
|
||||
let parsed_file = parser
|
||||
.parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr")
|
||||
.unwrap();
|
||||
|
||||
for msg in parsed_file.messages() {
|
||||
println!("{}", msg);
|
||||
|
||||
msg.load_data().unwrap();
|
||||
}
|
||||
}
|
||||
11
rbufr/tests/test_rc.rs
Normal file
11
rbufr/tests/test_rc.rs
Normal file
@ -0,0 +1,11 @@
|
||||
use librbufr::{decoder::Decoder, parser::parse};
|
||||
|
||||
fn test_dec() {
|
||||
let file = parse("/Users/xiang.li1/Downloads/36_2025-12-22T11_00_00.bufr").unwrap();
|
||||
for msg in file.messages() {
|
||||
let mut decoder = Decoder::from_message(msg).unwrap();
|
||||
let record = decoder.decode(msg).unwrap();
|
||||
|
||||
println!("{}", record);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user