Add search

This commit is contained in:
Alexandr Mansurov
2026-02-20 15:31:14 +01:00
parent 7e03af23de
commit bbf8102959
2 changed files with 142 additions and 2 deletions

View File

@@ -1,6 +1,6 @@
use anyhow::{Result, anyhow};
use chrono::NaiveDate;
use clap::Parser;
use clap::{Parser, Subcommand};
use crossbeam_channel::{Sender, bounded};
use rayon::prelude::*;
use std::collections::HashMap;
@@ -13,14 +13,29 @@ use std::thread;
mod db;
mod files;
mod parser;
mod search;
use db::Database;
use files::{LogFile, LogFileDiscovery, LogReader, read_log_file};
use parser::{ParsedMessage, ParserRegistry, SignatureEntry};
#[derive(Parser, Debug)]
#[command(author, version, about = "Load log files into SQLite database")]
#[command(author, version, about = "Log file analysis tool")]
struct Args {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand, Debug)]
enum Command {
/// Load signature log entries into SQLite database
Signature(SignatureArgs),
/// Search log file for lines matching a query and print timestamp + message
Search(SearchArgs),
}
#[derive(Parser, Debug)]
struct SignatureArgs {
/// Start date (YYYY/mm/dd)
#[arg(long)]
from: Option<String>,
@@ -54,6 +69,17 @@ struct Args {
threads: usize,
}
#[derive(Parser, Debug)]
struct SearchArgs {
/// Log file to search
#[arg(long)]
file: PathBuf,
/// Text to search for in log lines
#[arg(long)]
query: String,
}
fn parse_date(s: &str) -> Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%Y/%m/%d")
.map_err(|e| anyhow!("Invalid date format '{}': {}. Expected YYYY/mm/dd", s, e))
@@ -62,6 +88,15 @@ fn parse_date(s: &str) -> Result<NaiveDate> {
fn main() -> Result<()> {
let args = Args::parse();
match args.command {
Command::Signature(sig_args) => run_signature(sig_args),
Command::Search(search_args) => {
search::run_search(search_args.file.to_str().unwrap(), &search_args.query)
}
}
}
fn run_signature(args: SignatureArgs) -> Result<()> {
// Configure rayon thread pool if threads specified
if args.threads > 0
&& let Err(e) = rayon::ThreadPoolBuilder::new()

105
src/search.rs Normal file
View File

@@ -0,0 +1,105 @@
use anyhow::Result;
use regex::Regex;
use std::io::BufRead;
use std::sync::LazyLock;
use crate::files::read_log_file;
static SYSLOG_TIMESTAMP_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})").unwrap());
static MSG_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"msg="([^"]+)""#).unwrap());
pub fn run_search(file_path: &str, query: &str) -> Result<()> {
let mut reader = read_log_file(file_path)?;
let mut line = String::new();
let mut match_count = 0u64;
loop {
line.clear();
let bytes_read = reader.read_line(&mut line)?;
if bytes_read == 0 {
break;
}
let line_trimmed = line.trim_end();
if !line_trimmed.contains(query) {
continue;
}
let timestamp = SYSLOG_TIMESTAMP_RE
.captures(line_trimmed)
.map(|c| c.get(1).unwrap().as_str());
let msg = MSG_RE
.captures(line_trimmed)
.map(|c| c.get(1).unwrap().as_str());
match (timestamp, msg) {
(Some(ts), Some(m)) => println!("[{}] {}", ts, m),
(Some(ts), None) => println!("[{}] <no msg field>", ts),
(None, Some(m)) => println!("[?] {}", m),
(None, None) => println!("[?] <no msg field>"),
}
match_count += 1;
}
eprintln!("{} matching lines found", match_count);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_syslog_timestamp_extraction() {
let line = r#"Jan 27 17:21:17 tom003.testintg.dbank.loc m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, msg="hello""#;
let caps = SYSLOG_TIMESTAMP_RE.captures(line).unwrap();
assert_eq!(caps.get(1).unwrap().as_str(), "Jan 27 17:21:17");
}
#[test]
fn test_msg_extraction() {
let line = r#"some prefix msg="getUnreadFilesCount(externalUserId=abc123)", ex=""#;
let caps = MSG_RE.captures(line).unwrap();
assert_eq!(
caps.get(1).unwrap().as_str(),
"getUnreadFilesCount(externalUserId=abc123)"
);
}
#[test]
fn test_full_line_extraction() {
let line = r#"Jan 27 17:21:17 tom003.testintg.dbank.loc m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, lc=com.m1.m1.server.api.enterprise.v5.endpoint.TeamSafeEndpointV5, threadId=183, externalUserId=null, clientIp=160.83.36.132, xsrfToken=null, correlationId=aXjl_RwRs-3BWsshhut44wAABKY, sessionId=noSession, securityContext=CA_LOGGED_IN, userId=238, request_id=[(null)]snoSessio.r44wAABKY, msg="getUnreadFilesCount(externalUserId=102c1271eddd4e62832db4b1e70b8cb4,externalTeamSafeIds=053fac9da79543d5b90612ed7d5d0ca2)", ex=""#;
let ts = SYSLOG_TIMESTAMP_RE
.captures(line)
.map(|c| c.get(1).unwrap().as_str())
.unwrap();
assert_eq!(ts, "Jan 27 17:21:17");
let msg = MSG_RE
.captures(line)
.map(|c| c.get(1).unwrap().as_str())
.unwrap();
assert_eq!(
msg,
"getUnreadFilesCount(externalUserId=102c1271eddd4e62832db4b1e70b8cb4,externalTeamSafeIds=053fac9da79543d5b90612ed7d5d0ca2)"
);
}
#[test]
fn test_no_timestamp() {
let line = r#"some garbage line without proper timestamp msg="hello""#;
assert!(SYSLOG_TIMESTAMP_RE.captures(line).is_none());
}
#[test]
fn test_no_msg() {
let line = "Jan 27 17:21:17 some line without msg field";
assert!(MSG_RE.captures(line).is_none());
}
}