diff --git a/src/main.rs b/src/main.rs index 9eb1411..15bad38 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use anyhow::{Result, anyhow}; use chrono::NaiveDate; -use clap::Parser; +use clap::{Parser, Subcommand}; use crossbeam_channel::{Sender, bounded}; use rayon::prelude::*; use std::collections::HashMap; @@ -13,14 +13,29 @@ use std::thread; mod db; mod files; mod parser; +mod search; use db::Database; use files::{LogFile, LogFileDiscovery, LogReader, read_log_file}; use parser::{ParsedMessage, ParserRegistry, SignatureEntry}; #[derive(Parser, Debug)] -#[command(author, version, about = "Load log files into SQLite database")] +#[command(author, version, about = "Log file analysis tool")] struct Args { + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand, Debug)] +enum Command { + /// Load signature log entries into SQLite database + Signature(SignatureArgs), + /// Search log file for lines matching a query and print timestamp + message + Search(SearchArgs), +} + +#[derive(Parser, Debug)] +struct SignatureArgs { /// Start date (YYYY/mm/dd) #[arg(long)] from: Option, @@ -54,6 +69,17 @@ struct Args { threads: usize, } +#[derive(Parser, Debug)] +struct SearchArgs { + /// Log file to search + #[arg(long)] + file: PathBuf, + + /// Text to search for in log lines + #[arg(long)] + query: String, +} + fn parse_date(s: &str) -> Result { NaiveDate::parse_from_str(s, "%Y/%m/%d") .map_err(|e| anyhow!("Invalid date format '{}': {}. Expected YYYY/mm/dd", s, e)) @@ -62,6 +88,15 @@ fn parse_date(s: &str) -> Result { fn main() -> Result<()> { let args = Args::parse(); + match args.command { + Command::Signature(sig_args) => run_signature(sig_args), + Command::Search(search_args) => { + search::run_search(search_args.file.to_str().unwrap(), &search_args.query) + } + } +} + +fn run_signature(args: SignatureArgs) -> Result<()> { // Configure rayon thread pool if threads specified if args.threads > 0 && let Err(e) = rayon::ThreadPoolBuilder::new() diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 0000000..68a51b2 --- /dev/null +++ b/src/search.rs @@ -0,0 +1,105 @@ +use anyhow::Result; +use regex::Regex; +use std::io::BufRead; +use std::sync::LazyLock; + +use crate::files::read_log_file; + +static SYSLOG_TIMESTAMP_RE: LazyLock = + LazyLock::new(|| Regex::new(r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})").unwrap()); + +static MSG_RE: LazyLock = LazyLock::new(|| Regex::new(r#"msg="([^"]+)""#).unwrap()); + +pub fn run_search(file_path: &str, query: &str) -> Result<()> { + let mut reader = read_log_file(file_path)?; + let mut line = String::new(); + let mut match_count = 0u64; + + loop { + line.clear(); + let bytes_read = reader.read_line(&mut line)?; + if bytes_read == 0 { + break; + } + + let line_trimmed = line.trim_end(); + + if !line_trimmed.contains(query) { + continue; + } + + let timestamp = SYSLOG_TIMESTAMP_RE + .captures(line_trimmed) + .map(|c| c.get(1).unwrap().as_str()); + + let msg = MSG_RE + .captures(line_trimmed) + .map(|c| c.get(1).unwrap().as_str()); + + match (timestamp, msg) { + (Some(ts), Some(m)) => println!("[{}] {}", ts, m), + (Some(ts), None) => println!("[{}] ", ts), + (None, Some(m)) => println!("[?] {}", m), + (None, None) => println!("[?] "), + } + + match_count += 1; + } + + eprintln!("{} matching lines found", match_count); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_syslog_timestamp_extraction() { + let line = r#"Jan 27 17:21:17 tom003.testintg.dbank.loc m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, msg="hello""#; + let caps = SYSLOG_TIMESTAMP_RE.captures(line).unwrap(); + assert_eq!(caps.get(1).unwrap().as_str(), "Jan 27 17:21:17"); + } + + #[test] + fn test_msg_extraction() { + let line = r#"some prefix msg="getUnreadFilesCount(externalUserId=abc123)", ex=""#; + let caps = MSG_RE.captures(line).unwrap(); + assert_eq!( + caps.get(1).unwrap().as_str(), + "getUnreadFilesCount(externalUserId=abc123)" + ); + } + + #[test] + fn test_full_line_extraction() { + let line = r#"Jan 27 17:21:17 tom003.testintg.dbank.loc m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, lc=com.m1.m1.server.api.enterprise.v5.endpoint.TeamSafeEndpointV5, threadId=183, externalUserId=null, clientIp=160.83.36.132, xsrfToken=null, correlationId=aXjl_RwRs-3BWsshhut44wAABKY, sessionId=noSession, securityContext=CA_LOGGED_IN, userId=238, request_id=[(null)]snoSessio.r44wAABKY, msg="getUnreadFilesCount(externalUserId=102c1271eddd4e62832db4b1e70b8cb4,externalTeamSafeIds=053fac9da79543d5b90612ed7d5d0ca2)", ex=""#; + + let ts = SYSLOG_TIMESTAMP_RE + .captures(line) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap(); + assert_eq!(ts, "Jan 27 17:21:17"); + + let msg = MSG_RE + .captures(line) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap(); + assert_eq!( + msg, + "getUnreadFilesCount(externalUserId=102c1271eddd4e62832db4b1e70b8cb4,externalTeamSafeIds=053fac9da79543d5b90612ed7d5d0ca2)" + ); + } + + #[test] + fn test_no_timestamp() { + let line = r#"some garbage line without proper timestamp msg="hello""#; + assert!(SYSLOG_TIMESTAMP_RE.captures(line).is_none()); + } + + #[test] + fn test_no_msg() { + let line = "Jan 27 17:21:17 some line without msg field"; + assert!(MSG_RE.captures(line).is_none()); + } +}