Parse error breakdown, polish warnings

This commit is contained in:
2026-01-21 22:49:04 +01:00
parent 5966e6cee6
commit 6c61aed7a1
3 changed files with 17 additions and 17 deletions

View File

@@ -47,7 +47,6 @@ impl LogFileDiscovery {
return Ok(Some(LogFile { return Ok(Some(LogFile {
path: gz_path, path: gz_path,
compressed: true, compressed: true,
date,
})); }));
} }
@@ -57,7 +56,6 @@ impl LogFileDiscovery {
return Ok(Some(LogFile { return Ok(Some(LogFile {
path: plain_path, path: plain_path,
compressed: false, compressed: false,
date,
})); }));
} }
@@ -70,7 +68,6 @@ impl LogFileDiscovery {
pub struct LogFile { pub struct LogFile {
pub path: PathBuf, pub path: PathBuf,
pub compressed: bool, pub compressed: bool,
pub date: NaiveDate,
} }
impl LogFile { impl LogFile {

View File

@@ -1,6 +1,7 @@
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use chrono::NaiveDate; use chrono::NaiveDate;
use clap::Parser; use clap::Parser;
use std::collections::HashMap;
use std::io::BufRead; use std::io::BufRead;
use std::path::PathBuf; use std::path::PathBuf;
@@ -119,7 +120,7 @@ fn process_reader(
let mut signature_batch: Vec<SignatureEntry> = Vec::with_capacity(batch_size); let mut signature_batch: Vec<SignatureEntry> = Vec::with_capacity(batch_size);
let mut total_lines = 0u64; let mut total_lines = 0u64;
let mut parsed_lines = 0u64; let mut parsed_lines = 0u64;
let mut error_lines = 0u64; let mut error_counts: HashMap<String, u64> = HashMap::new();
for line_result in reader.lines() { for line_result in reader.lines() {
let line = line_result?; let line = line_result?;
@@ -136,18 +137,16 @@ fn process_reader(
} }
} }
Err(e) => { Err(e) => {
error_lines += 1; *error_counts.entry(e.to_string()).or_insert(0) += 1;
if error_lines <= 10 {
eprintln!("Parse error: {}", e);
}
} }
} }
} }
if total_lines % 100_000 == 0 { if total_lines % 100_000 == 0 {
let total_errors: u64 = error_counts.values().sum();
eprintln!( eprintln!(
"Progress: {} lines read, {} parsed, {} errors", "Progress: {} lines read, {} parsed, {} errors",
total_lines, parsed_lines, error_lines total_lines, parsed_lines, total_errors
); );
} }
} }
@@ -157,11 +156,22 @@ fn process_reader(
flush_signature_batch(db, &mut signature_batch)?; flush_signature_batch(db, &mut signature_batch)?;
} }
let total_errors: u64 = error_counts.values().sum();
eprintln!( eprintln!(
"File complete: {} lines read, {} parsed, {} errors", "File complete: {} lines read, {} parsed, {} errors",
total_lines, parsed_lines, error_lines total_lines, parsed_lines, total_errors
); );
// Print error summary
if !error_counts.is_empty() {
eprintln!("\nParse errors breakdown:");
let mut errors: Vec<_> = error_counts.into_iter().collect();
errors.sort_by(|a, b| b.1.cmp(&a.1)); // Sort by count descending
for (error, count) in errors {
eprintln!(" {} ({}x)", error, count);
}
}
Ok(()) Ok(())
} }

View File

@@ -25,9 +25,6 @@ pub struct SignatureEntry {
/// Trait for parsing different message types from logs. /// Trait for parsing different message types from logs.
/// Implement this trait to add support for new message formats. /// Implement this trait to add support for new message formats.
pub trait MessageParser: Send + Sync { pub trait MessageParser: Send + Sync {
/// Returns the message type identifier (e.g., "signature")
fn message_type(&self) -> &'static str;
/// Attempts to parse a log line. Returns None if this parser doesn't handle this message type. /// Attempts to parse a log line. Returns None if this parser doesn't handle this message type.
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>>; fn parse(&self, line: &str) -> Option<Result<ParsedMessage>>;
} }
@@ -49,10 +46,6 @@ static SIGNATURE_RE: LazyLock<Regex> =
pub struct SignatureParser; pub struct SignatureParser;
impl MessageParser for SignatureParser { impl MessageParser for SignatureParser {
fn message_type(&self) -> &'static str {
"signature"
}
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> { fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> {
// Check if this line contains a signature message // Check if this line contains a signature message
if !line.contains("msg=\"signature:") { if !line.contains("msg=\"signature:") {