Add log ingestion tool for loading signature logs into SQLite
- Parse signature messages from log files extracting app info, device details, and feature flags (autofill, touchID, offline login, etc.) - Support both plain .log and gzip compressed .log.gz files - File discovery by date range (YYYY/mm/dd directory structure) - Batch inserts for performance with large files (10GB+ per day) - Index on session_id and version for efficient queries - Extensible parser architecture via MessageParser trait - Parallel file processing for multi-day ingestion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
97
src/files.rs
Normal file
97
src/files.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use chrono::NaiveDate;
|
||||
use flate2::read::GzDecoder;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Discovers log files for a given date range
|
||||
pub struct LogFileDiscovery {
|
||||
base_dir: PathBuf,
|
||||
filename: String,
|
||||
}
|
||||
|
||||
impl LogFileDiscovery {
|
||||
pub fn new(base_dir: PathBuf, filename: String) -> Self {
|
||||
Self { base_dir, filename }
|
||||
}
|
||||
|
||||
/// Returns an iterator over all log files in the date range
|
||||
pub fn discover(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LogFile>> {
|
||||
let mut files = Vec::new();
|
||||
|
||||
let mut current = from;
|
||||
while current <= to {
|
||||
if let Some(log_file) = self.find_log_for_date(current)? {
|
||||
files.push(log_file);
|
||||
}
|
||||
current = current
|
||||
.succ_opt()
|
||||
.ok_or_else(|| anyhow!("Date overflow"))?;
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
fn find_log_for_date(&self, date: NaiveDate) -> Result<Option<LogFile>> {
|
||||
// Build path: <base_dir>/yyyy/mm/dd/<filename>.gz or <filename>
|
||||
let date_path = self
|
||||
.base_dir
|
||||
.join(date.format("%Y").to_string())
|
||||
.join(date.format("%m").to_string())
|
||||
.join(date.format("%d").to_string());
|
||||
|
||||
// Try gzipped first
|
||||
let gz_path = date_path.join(format!("{}.gz", self.filename));
|
||||
if gz_path.exists() {
|
||||
return Ok(Some(LogFile {
|
||||
path: gz_path,
|
||||
compressed: true,
|
||||
}));
|
||||
}
|
||||
|
||||
// Try uncompressed
|
||||
let plain_path = date_path.join(&self.filename);
|
||||
if plain_path.exists() {
|
||||
return Ok(Some(LogFile {
|
||||
path: plain_path,
|
||||
compressed: false,
|
||||
}));
|
||||
}
|
||||
|
||||
// No file found for this date
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LogFile {
|
||||
pub path: PathBuf,
|
||||
pub compressed: bool,
|
||||
}
|
||||
|
||||
impl LogFile {
|
||||
/// Returns a buffered reader for this log file, handling compression transparently
|
||||
pub fn reader(&self) -> Result<Box<dyn BufRead>> {
|
||||
let file = File::open(&self.path)?;
|
||||
|
||||
if self.compressed {
|
||||
let decoder = GzDecoder::new(file);
|
||||
Ok(Box::new(BufReader::new(decoder)))
|
||||
} else {
|
||||
Ok(Box::new(BufReader::new(file)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// For reading a single file directly (e.g., for testing)
|
||||
pub fn read_log_file(path: &str) -> Result<Box<dyn BufRead>> {
|
||||
let file = File::open(path)?;
|
||||
|
||||
if path.ends_with(".gz") {
|
||||
let decoder = GzDecoder::new(file);
|
||||
Ok(Box::new(BufReader::new(decoder)))
|
||||
} else {
|
||||
Ok(Box::new(BufReader::new(file)))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user