2026-01-22 10:09:52 +01:00
|
|
|
use anyhow::{Result, anyhow};
|
2026-01-21 22:34:48 +01:00
|
|
|
use chrono::NaiveDate;
|
|
|
|
|
use flate2::read::GzDecoder;
|
|
|
|
|
use std::fs::File;
|
2026-01-22 10:09:52 +01:00
|
|
|
use std::io::{BufRead, BufReader, Read};
|
2026-01-21 22:34:48 +01:00
|
|
|
use std::path::PathBuf;
|
|
|
|
|
|
2026-01-22 10:09:52 +01:00
|
|
|
/// Enum-based reader to avoid Box<dyn BufRead> heap allocation and dynamic dispatch
|
|
|
|
|
pub enum LogReader {
|
|
|
|
|
Plain(BufReader<File>),
|
|
|
|
|
Gzip(BufReader<GzDecoder<File>>),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Read for LogReader {
|
|
|
|
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
|
|
|
|
match self {
|
|
|
|
|
LogReader::Plain(r) => r.read(buf),
|
|
|
|
|
LogReader::Gzip(r) => r.read(buf),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl BufRead for LogReader {
|
|
|
|
|
fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
|
|
|
|
|
match self {
|
|
|
|
|
LogReader::Plain(r) => r.fill_buf(),
|
|
|
|
|
LogReader::Gzip(r) => r.fill_buf(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn consume(&mut self, amt: usize) {
|
|
|
|
|
match self {
|
|
|
|
|
LogReader::Plain(r) => r.consume(amt),
|
|
|
|
|
LogReader::Gzip(r) => r.consume(amt),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-21 22:34:48 +01:00
|
|
|
/// Discovers log files for a given date range
|
|
|
|
|
pub struct LogFileDiscovery {
|
|
|
|
|
base_dir: PathBuf,
|
|
|
|
|
filename: String,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl LogFileDiscovery {
|
|
|
|
|
pub fn new(base_dir: PathBuf, filename: String) -> Self {
|
|
|
|
|
Self { base_dir, filename }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns an iterator over all log files in the date range
|
|
|
|
|
pub fn discover(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LogFile>> {
|
|
|
|
|
let mut files = Vec::new();
|
|
|
|
|
|
|
|
|
|
let mut current = from;
|
|
|
|
|
while current <= to {
|
|
|
|
|
if let Some(log_file) = self.find_log_for_date(current)? {
|
|
|
|
|
files.push(log_file);
|
|
|
|
|
}
|
2026-01-22 10:09:52 +01:00
|
|
|
current = current.succ_opt().ok_or_else(|| anyhow!("Date overflow"))?;
|
2026-01-21 22:34:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(files)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn find_log_for_date(&self, date: NaiveDate) -> Result<Option<LogFile>> {
|
|
|
|
|
// Build path: <base_dir>/yyyy/mm/dd/<filename>.gz or <filename>
|
|
|
|
|
let date_path = self
|
|
|
|
|
.base_dir
|
|
|
|
|
.join(date.format("%Y").to_string())
|
|
|
|
|
.join(date.format("%m").to_string())
|
|
|
|
|
.join(date.format("%d").to_string());
|
|
|
|
|
|
|
|
|
|
// Try gzipped first
|
|
|
|
|
let gz_path = date_path.join(format!("{}.gz", self.filename));
|
|
|
|
|
if gz_path.exists() {
|
|
|
|
|
return Ok(Some(LogFile {
|
|
|
|
|
path: gz_path,
|
|
|
|
|
compressed: true,
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try uncompressed
|
|
|
|
|
let plain_path = date_path.join(&self.filename);
|
|
|
|
|
if plain_path.exists() {
|
|
|
|
|
return Ok(Some(LogFile {
|
|
|
|
|
path: plain_path,
|
|
|
|
|
compressed: false,
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// No file found for this date
|
|
|
|
|
Ok(None)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
pub struct LogFile {
|
|
|
|
|
pub path: PathBuf,
|
|
|
|
|
pub compressed: bool,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl LogFile {
|
|
|
|
|
/// Returns a buffered reader for this log file, handling compression transparently
|
2026-01-22 10:09:52 +01:00
|
|
|
pub fn reader(&self) -> Result<LogReader> {
|
2026-01-21 22:34:48 +01:00
|
|
|
let file = File::open(&self.path)?;
|
|
|
|
|
|
|
|
|
|
if self.compressed {
|
|
|
|
|
let decoder = GzDecoder::new(file);
|
2026-01-22 10:09:52 +01:00
|
|
|
Ok(LogReader::Gzip(BufReader::new(decoder)))
|
2026-01-21 22:34:48 +01:00
|
|
|
} else {
|
2026-01-22 10:09:52 +01:00
|
|
|
Ok(LogReader::Plain(BufReader::new(file)))
|
2026-01-21 22:34:48 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// For reading a single file directly (e.g., for testing)
|
2026-01-22 10:09:52 +01:00
|
|
|
pub fn read_log_file(path: &str) -> Result<LogReader> {
|
2026-01-21 22:34:48 +01:00
|
|
|
let file = File::open(path)?;
|
|
|
|
|
|
|
|
|
|
if path.ends_with(".gz") {
|
|
|
|
|
let decoder = GzDecoder::new(file);
|
2026-01-22 10:09:52 +01:00
|
|
|
Ok(LogReader::Gzip(BufReader::new(decoder)))
|
2026-01-21 22:34:48 +01:00
|
|
|
} else {
|
2026-01-22 10:09:52 +01:00
|
|
|
Ok(LogReader::Plain(BufReader::new(file)))
|
2026-01-21 22:34:48 +01:00
|
|
|
}
|
|
|
|
|
}
|