From 6c61aed7a1b0069c6b539a77da2e2296c3eb157f Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Wed, 21 Jan 2026 22:49:04 +0100 Subject: [PATCH] Parse error breakdown, polish warnings --- src/files.rs | 3 --- src/main.rs | 24 +++++++++++++++++------- src/parser.rs | 7 ------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/files.rs b/src/files.rs index f8557a2..8f447ec 100644 --- a/src/files.rs +++ b/src/files.rs @@ -47,7 +47,6 @@ impl LogFileDiscovery { return Ok(Some(LogFile { path: gz_path, compressed: true, - date, })); } @@ -57,7 +56,6 @@ impl LogFileDiscovery { return Ok(Some(LogFile { path: plain_path, compressed: false, - date, })); } @@ -70,7 +68,6 @@ impl LogFileDiscovery { pub struct LogFile { pub path: PathBuf, pub compressed: bool, - pub date: NaiveDate, } impl LogFile { diff --git a/src/main.rs b/src/main.rs index cb5b2b9..0082851 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ use anyhow::{anyhow, Result}; use chrono::NaiveDate; use clap::Parser; +use std::collections::HashMap; use std::io::BufRead; use std::path::PathBuf; @@ -119,7 +120,7 @@ fn process_reader( let mut signature_batch: Vec = Vec::with_capacity(batch_size); let mut total_lines = 0u64; let mut parsed_lines = 0u64; - let mut error_lines = 0u64; + let mut error_counts: HashMap = HashMap::new(); for line_result in reader.lines() { let line = line_result?; @@ -136,18 +137,16 @@ fn process_reader( } } Err(e) => { - error_lines += 1; - if error_lines <= 10 { - eprintln!("Parse error: {}", e); - } + *error_counts.entry(e.to_string()).or_insert(0) += 1; } } } if total_lines % 100_000 == 0 { + let total_errors: u64 = error_counts.values().sum(); eprintln!( "Progress: {} lines read, {} parsed, {} errors", - total_lines, parsed_lines, error_lines + total_lines, parsed_lines, total_errors ); } } @@ -157,11 +156,22 @@ fn process_reader( flush_signature_batch(db, &mut signature_batch)?; } + let total_errors: u64 = error_counts.values().sum(); eprintln!( "File complete: {} lines read, {} parsed, {} errors", - total_lines, parsed_lines, error_lines + total_lines, parsed_lines, total_errors ); + // Print error summary + if !error_counts.is_empty() { + eprintln!("\nParse errors breakdown:"); + let mut errors: Vec<_> = error_counts.into_iter().collect(); + errors.sort_by(|a, b| b.1.cmp(&a.1)); // Sort by count descending + for (error, count) in errors { + eprintln!(" {} ({}x)", error, count); + } + } + Ok(()) } diff --git a/src/parser.rs b/src/parser.rs index 5fdb0f8..ad15d01 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -25,9 +25,6 @@ pub struct SignatureEntry { /// Trait for parsing different message types from logs. /// Implement this trait to add support for new message formats. pub trait MessageParser: Send + Sync { - /// Returns the message type identifier (e.g., "signature") - fn message_type(&self) -> &'static str; - /// Attempts to parse a log line. Returns None if this parser doesn't handle this message type. fn parse(&self, line: &str) -> Option>; } @@ -49,10 +46,6 @@ static SIGNATURE_RE: LazyLock = pub struct SignatureParser; impl MessageParser for SignatureParser { - fn message_type(&self) -> &'static str { - "signature" - } - fn parse(&self, line: &str) -> Option> { // Check if this line contains a signature message if !line.contains("msg=\"signature:") {