1264 lines
43 KiB
Rust
1264 lines
43 KiB
Rust
|
|
use anyhow::Result;
|
||
|
|
use rayon::prelude::*;
|
||
|
|
use regex::Regex;
|
||
|
|
use std::collections::{HashMap, HashSet};
|
||
|
|
use std::io::{BufRead, BufReader, Read as _, Seek, SeekFrom};
|
||
|
|
use std::sync::LazyLock;
|
||
|
|
|
||
|
|
use crate::files::read_log_file;
|
||
|
|
|
||
|
|
static SYSLOG_TIMESTAMP_RE: LazyLock<Regex> =
|
||
|
|
LazyLock::new(|| Regex::new(r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})").unwrap());
|
||
|
|
|
||
|
|
static MSG_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"msg="([^"]+)""#).unwrap());
|
||
|
|
|
||
|
|
static CORRELATION_ID_RE: LazyLock<Regex> =
|
||
|
|
LazyLock::new(|| Regex::new(r"correlationId=([^,\s]+)").unwrap());
|
||
|
|
|
||
|
|
static SESSION_ID_RE: LazyLock<Regex> =
|
||
|
|
LazyLock::new(|| Regex::new(r"sessionId=([^,\s]+)").unwrap());
|
||
|
|
|
||
|
|
/// Matches sessionDestroyed lines and captures the sid value.
|
||
|
|
/// Example: sessionDestroyed #s=8 sid=2010F74...node003 isnew=false
|
||
|
|
static SESSION_DESTROYED_RE: LazyLock<Regex> =
|
||
|
|
LazyLock::new(|| Regex::new(r"sessionDestroyed\b.*?\bsid=([^,\s]+)").unwrap());
|
||
|
|
|
||
|
|
/// Extracts the app name from any signature line format.
|
||
|
|
/// Matches both `msg="signature:APP/..."` and `msg="MOBILE_CLIENT_LOG: signature:APP/..."`.
|
||
|
|
static SIGNATURE_APP_RE: LazyLock<Regex> =
|
||
|
|
LazyLock::new(|| Regex::new(r"signature:([^/\s]+)/").unwrap());
|
||
|
|
|
||
|
|
/// Matches changeSessionId messages and captures the long-form new and old session IDs.
|
||
|
|
/// Example: changeSessionId: newSessionId: sDF080BBD / DF080BBD...node011 replaces oldSessionId: sF9EE9D52 / F9EE9D52...node011
|
||
|
|
static CHANGE_SESSION_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||
|
|
Regex::new(
|
||
|
|
r#"changeSessionId:.*?newSessionId:\s*\S+\s*/\s*([^,\s"]+).*?replaces\s+oldSessionId:\s*\S+\s*/\s*([^,\s"]+)"#,
|
||
|
|
)
|
||
|
|
.unwrap()
|
||
|
|
});
|
||
|
|
|
||
|
|
/// Strips the `.nodeXXX` suffix from a session ID for comparison purposes.
|
||
|
|
/// "DF080BBD8D5E954C642F6C3B5639D6EE.node011" -> "DF080BBD8D5E954C642F6C3B5639D6EE"
|
||
|
|
/// "noSession" -> "noSession"
|
||
|
|
fn normalize_session_id(sid: &str) -> &str {
|
||
|
|
if let Some(dot_pos) = sid.rfind('.') {
|
||
|
|
let suffix = &sid[dot_pos + 1..];
|
||
|
|
if suffix.starts_with("node") {
|
||
|
|
return &sid[..dot_pos];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
sid
|
||
|
|
}
|
||
|
|
|
||
|
|
fn build_thread_pool(threads: usize) -> Result<rayon::ThreadPool> {
|
||
|
|
let mut builder = rayon::ThreadPoolBuilder::new();
|
||
|
|
if threads > 0 {
|
||
|
|
builder = builder.num_threads(threads);
|
||
|
|
}
|
||
|
|
builder
|
||
|
|
.build()
|
||
|
|
.map_err(|e| anyhow::anyhow!("Failed to build thread pool: {}", e))
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Split a file into byte-offset chunks for parallel processing.
|
||
|
|
/// Each chunk is a (start, end) pair. Threads adjust to line boundaries at runtime.
|
||
|
|
fn compute_chunks(file_size: u64, num_chunks: usize) -> Vec<(u64, u64)> {
|
||
|
|
if file_size == 0 || num_chunks == 0 {
|
||
|
|
return vec![];
|
||
|
|
}
|
||
|
|
let effective = num_chunks.min(file_size as usize);
|
||
|
|
let chunk_size = file_size / effective as u64;
|
||
|
|
(0..effective)
|
||
|
|
.map(|i| {
|
||
|
|
let start = i as u64 * chunk_size;
|
||
|
|
let end = if i == effective - 1 {
|
||
|
|
file_size
|
||
|
|
} else {
|
||
|
|
(i as u64 + 1) * chunk_size
|
||
|
|
};
|
||
|
|
(start, end)
|
||
|
|
})
|
||
|
|
.collect()
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Returns true if parallel chunk processing can be used (plain file + multiple threads).
|
||
|
|
fn can_parallelize(file_path: &str, num_threads: usize) -> bool {
|
||
|
|
num_threads > 1 && !file_path.ends_with(".gz")
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Stream all lines from a file (works with gzip and plain). Calls `process` for each line.
|
||
|
|
fn for_each_line_streaming<F>(file_path: &str, mut process: F) -> Result<()>
|
||
|
|
where
|
||
|
|
F: FnMut(&str),
|
||
|
|
{
|
||
|
|
let mut reader = read_log_file(file_path)?;
|
||
|
|
let mut line = String::new();
|
||
|
|
loop {
|
||
|
|
line.clear();
|
||
|
|
if reader.read_line(&mut line)? == 0 {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
process(line.trim_end());
|
||
|
|
}
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Process lines in a byte-offset chunk of a plain file. Calls `process` for each complete line.
|
||
|
|
///
|
||
|
|
/// Chunk boundaries may fall mid-line. Convention:
|
||
|
|
/// - The chunk that started reading a line owns it (reads past `end` to finish it).
|
||
|
|
/// - The next chunk checks byte `start-1`: if it's `\n`, we're at a line start; otherwise
|
||
|
|
/// skip the partial first line (the previous chunk already handled it).
|
||
|
|
fn for_each_line_in_chunk<F>(file_path: &str, start: u64, end: u64, mut process: F) -> Result<()>
|
||
|
|
where
|
||
|
|
F: FnMut(&str),
|
||
|
|
{
|
||
|
|
let file = std::fs::File::open(file_path)?;
|
||
|
|
let mut reader = BufReader::with_capacity(256 * 1024, file);
|
||
|
|
let mut pos = start;
|
||
|
|
|
||
|
|
if start > 0 {
|
||
|
|
// Check if we're at a line boundary or mid-line
|
||
|
|
reader.seek(SeekFrom::Start(start - 1))?;
|
||
|
|
let mut byte = [0u8; 1];
|
||
|
|
reader.read_exact(&mut byte)?;
|
||
|
|
// Now positioned at `start`
|
||
|
|
if byte[0] != b'\n' {
|
||
|
|
// Mid-line: skip remainder (previous chunk owns this line)
|
||
|
|
let mut skip = String::new();
|
||
|
|
let n = reader.read_line(&mut skip)?;
|
||
|
|
pos += n as u64;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
let mut line = String::new();
|
||
|
|
while pos < end {
|
||
|
|
line.clear();
|
||
|
|
let n = reader.read_line(&mut line)?;
|
||
|
|
if n == 0 {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
pos += n as u64;
|
||
|
|
process(line.trim_end());
|
||
|
|
}
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn run_search(
|
||
|
|
file_path: &str,
|
||
|
|
query: &str,
|
||
|
|
show_correlation_id: bool,
|
||
|
|
expand: bool,
|
||
|
|
threads: usize,
|
||
|
|
) -> Result<()> {
|
||
|
|
let pool = build_thread_pool(threads)?;
|
||
|
|
if expand {
|
||
|
|
run_search_expanded(file_path, query, &pool)
|
||
|
|
} else {
|
||
|
|
run_search_simple(file_path, query, show_correlation_id, &pool)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn format_simple_match(line: &str, show_correlation_id: bool) -> String {
|
||
|
|
let ts = SYSLOG_TIMESTAMP_RE
|
||
|
|
.captures(line)
|
||
|
|
.map(|c| c.get(1).unwrap().as_str())
|
||
|
|
.unwrap_or("?");
|
||
|
|
let msg = MSG_RE
|
||
|
|
.captures(line)
|
||
|
|
.map(|c| c.get(1).unwrap().as_str())
|
||
|
|
.unwrap_or("<no msg field>");
|
||
|
|
if show_correlation_id {
|
||
|
|
let cid = CORRELATION_ID_RE
|
||
|
|
.captures(line)
|
||
|
|
.map(|c| c.get(1).unwrap().as_str());
|
||
|
|
if let Some(cid) = cid {
|
||
|
|
return format!("[{}] [{}] {}", ts, cid, msg);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
format!("[{}] {}", ts, msg)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn run_search_simple(
|
||
|
|
file_path: &str,
|
||
|
|
query: &str,
|
||
|
|
show_correlation_id: bool,
|
||
|
|
pool: &rayon::ThreadPool,
|
||
|
|
) -> Result<()> {
|
||
|
|
let num_threads = pool.current_num_threads();
|
||
|
|
|
||
|
|
if !can_parallelize(file_path, num_threads) {
|
||
|
|
// Sequential: stream directly (works for gzip and plain)
|
||
|
|
let mut match_count = 0u64;
|
||
|
|
for_each_line_streaming(file_path, |trimmed| {
|
||
|
|
if trimmed.contains(query) {
|
||
|
|
println!("{}", format_simple_match(trimmed, show_correlation_id));
|
||
|
|
match_count += 1;
|
||
|
|
}
|
||
|
|
})?;
|
||
|
|
eprintln!("{} matching lines found", match_count);
|
||
|
|
return Ok(());
|
||
|
|
}
|
||
|
|
|
||
|
|
// Parallel: split plain file into chunks
|
||
|
|
let file_size = std::fs::metadata(file_path)?.len();
|
||
|
|
let chunks = compute_chunks(file_size, num_threads);
|
||
|
|
|
||
|
|
eprintln!(
|
||
|
|
"Searching with {} threads across {} chunks",
|
||
|
|
num_threads,
|
||
|
|
chunks.len()
|
||
|
|
);
|
||
|
|
|
||
|
|
let results: Vec<Result<(Vec<String>, u64)>> = pool.install(|| {
|
||
|
|
chunks
|
||
|
|
.par_iter()
|
||
|
|
.map(|&(start, end)| {
|
||
|
|
let mut lines = Vec::new();
|
||
|
|
let mut count = 0u64;
|
||
|
|
for_each_line_in_chunk(file_path, start, end, |trimmed| {
|
||
|
|
if trimmed.contains(query) {
|
||
|
|
lines.push(format_simple_match(trimmed, show_correlation_id));
|
||
|
|
count += 1;
|
||
|
|
}
|
||
|
|
})?;
|
||
|
|
Ok((lines, count))
|
||
|
|
})
|
||
|
|
.collect()
|
||
|
|
});
|
||
|
|
|
||
|
|
let mut total = 0u64;
|
||
|
|
for result in results {
|
||
|
|
let (lines, count) = result?;
|
||
|
|
for line in lines {
|
||
|
|
println!("{}", line);
|
||
|
|
}
|
||
|
|
total += count;
|
||
|
|
}
|
||
|
|
|
||
|
|
eprintln!("{} matching lines found", total);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Expand mode ---
|
||
|
|
|
||
|
|
#[derive(Default)]
|
||
|
|
struct Pass1Result {
|
||
|
|
seed_session_ids: HashSet<String>,
|
||
|
|
seed_correlation_ids: HashSet<String>,
|
||
|
|
change_session_map: HashMap<String, String>,
|
||
|
|
sessions_with_signature: HashSet<String>,
|
||
|
|
/// Maps normalized session ID to app name from its signature line
|
||
|
|
session_app_map: HashMap<String, String>,
|
||
|
|
/// Maps seed correlation ID to the session ID from the same line (for app filtering)
|
||
|
|
seed_cid_sessions: HashMap<String, String>,
|
||
|
|
line_count: u64,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Pass1Result {
|
||
|
|
fn merge(mut self, other: Pass1Result) -> Self {
|
||
|
|
self.seed_session_ids.extend(other.seed_session_ids);
|
||
|
|
self.seed_correlation_ids.extend(other.seed_correlation_ids);
|
||
|
|
self.change_session_map.extend(other.change_session_map);
|
||
|
|
self.sessions_with_signature
|
||
|
|
.extend(other.sessions_with_signature);
|
||
|
|
self.session_app_map.extend(other.session_app_map);
|
||
|
|
self.seed_cid_sessions.extend(other.seed_cid_sessions);
|
||
|
|
self.line_count += other.line_count;
|
||
|
|
self
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn process_line_pass1(trimmed: &str, query: &str, result: &mut Pass1Result) {
|
||
|
|
let sid = SESSION_ID_RE
|
||
|
|
.captures(trimmed)
|
||
|
|
.and_then(|c| c.get(1))
|
||
|
|
.map(|m| normalize_session_id(m.as_str()));
|
||
|
|
|
||
|
|
// Detect signature lines using broad regex (matches both msg="signature:APP/..."
|
||
|
|
// and msg="MOBILE_CLIENT_LOG: signature:APP/...")
|
||
|
|
if let Some(sig_caps) = SIGNATURE_APP_RE.captures(trimmed) {
|
||
|
|
let app = sig_caps.get(1).unwrap().as_str();
|
||
|
|
if let Some(s) = sid {
|
||
|
|
result.sessions_with_signature.insert(s.to_string());
|
||
|
|
result
|
||
|
|
.session_app_map
|
||
|
|
.insert(s.to_string(), app.to_string());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if trimmed.contains("changeSessionId:")
|
||
|
|
&& let Some(caps) = CHANGE_SESSION_RE.captures(trimmed)
|
||
|
|
{
|
||
|
|
let new_sid = normalize_session_id(caps.get(1).unwrap().as_str()).to_string();
|
||
|
|
let old_sid = normalize_session_id(caps.get(2).unwrap().as_str()).to_string();
|
||
|
|
result.change_session_map.insert(new_sid, old_sid);
|
||
|
|
}
|
||
|
|
|
||
|
|
if trimmed.contains(query) {
|
||
|
|
if let Some(s) = sid
|
||
|
|
&& s != "noSession"
|
||
|
|
{
|
||
|
|
result.seed_session_ids.insert(s.to_string());
|
||
|
|
// Track which correlation IDs belong to which sessions (for app filtering)
|
||
|
|
if let Some(cid) = CORRELATION_ID_RE
|
||
|
|
.captures(trimmed)
|
||
|
|
.and_then(|c| c.get(1))
|
||
|
|
{
|
||
|
|
result
|
||
|
|
.seed_cid_sessions
|
||
|
|
.insert(cid.as_str().to_string(), s.to_string());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if let Some(cid) = CORRELATION_ID_RE
|
||
|
|
.captures(trimmed)
|
||
|
|
.and_then(|c| c.get(1))
|
||
|
|
{
|
||
|
|
result
|
||
|
|
.seed_correlation_ids
|
||
|
|
.insert(cid.as_str().to_string());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
result.line_count += 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
fn run_search_expanded(
|
||
|
|
file_path: &str,
|
||
|
|
query: &str,
|
||
|
|
pool: &rayon::ThreadPool,
|
||
|
|
) -> Result<()> {
|
||
|
|
let num_threads = pool.current_num_threads();
|
||
|
|
let use_parallel = can_parallelize(file_path, num_threads);
|
||
|
|
|
||
|
|
// Pass 1: collect metadata
|
||
|
|
let pass1 = run_pass1(file_path, query, use_parallel, pool)?;
|
||
|
|
|
||
|
|
// Expansion (in-memory graph traversal — inherently sequential)
|
||
|
|
let (expanded_sids, expanded_cids) = expand_seeds(
|
||
|
|
&pass1.seed_session_ids,
|
||
|
|
&pass1.seed_correlation_ids,
|
||
|
|
&pass1.change_session_map,
|
||
|
|
&pass1.sessions_with_signature,
|
||
|
|
);
|
||
|
|
|
||
|
|
if expanded_sids.is_empty() && expanded_cids.is_empty() {
|
||
|
|
eprintln!("0 matching lines found (no sessions or correlations to expand)");
|
||
|
|
return Ok(());
|
||
|
|
}
|
||
|
|
|
||
|
|
eprintln!(
|
||
|
|
"Expanding: {} session IDs, {} correlation IDs",
|
||
|
|
expanded_sids.len(),
|
||
|
|
expanded_cids.len()
|
||
|
|
);
|
||
|
|
|
||
|
|
// Pass 2: filter and print (re-reads file; for gzip this re-decompresses from stream)
|
||
|
|
let match_count = run_pass2(file_path, query, &expanded_sids, &expanded_cids, use_parallel, pool, false)?;
|
||
|
|
|
||
|
|
eprintln!("{} lines output", match_count);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn run_pass1(
|
||
|
|
file_path: &str,
|
||
|
|
query: &str,
|
||
|
|
use_parallel: bool,
|
||
|
|
pool: &rayon::ThreadPool,
|
||
|
|
) -> Result<Pass1Result> {
|
||
|
|
if !use_parallel {
|
||
|
|
eprintln!("Pass 1: scanning sequentially...");
|
||
|
|
let mut result = Pass1Result::default();
|
||
|
|
for_each_line_streaming(file_path, |trimmed| {
|
||
|
|
process_line_pass1(trimmed, query, &mut result);
|
||
|
|
})?;
|
||
|
|
eprintln!(
|
||
|
|
"Pass 1 complete: {} lines, {} seed sessions, {} seed correlations, {} session changes",
|
||
|
|
result.line_count,
|
||
|
|
result.seed_session_ids.len(),
|
||
|
|
result.seed_correlation_ids.len(),
|
||
|
|
result.change_session_map.len()
|
||
|
|
);
|
||
|
|
return Ok(result);
|
||
|
|
}
|
||
|
|
|
||
|
|
let file_size = std::fs::metadata(file_path)?.len();
|
||
|
|
let num_threads = pool.current_num_threads();
|
||
|
|
let chunks = compute_chunks(file_size, num_threads);
|
||
|
|
|
||
|
|
eprintln!("Pass 1: scanning with {} threads...", chunks.len());
|
||
|
|
|
||
|
|
let results: Vec<Result<Pass1Result>> = pool.install(|| {
|
||
|
|
chunks
|
||
|
|
.par_iter()
|
||
|
|
.map(|&(start, end)| {
|
||
|
|
let mut chunk_result = Pass1Result::default();
|
||
|
|
for_each_line_in_chunk(file_path, start, end, |trimmed| {
|
||
|
|
process_line_pass1(trimmed, query, &mut chunk_result);
|
||
|
|
})?;
|
||
|
|
Ok(chunk_result)
|
||
|
|
})
|
||
|
|
.collect()
|
||
|
|
});
|
||
|
|
|
||
|
|
let mut merged = Pass1Result::default();
|
||
|
|
for r in results {
|
||
|
|
merged = merged.merge(r?);
|
||
|
|
}
|
||
|
|
|
||
|
|
eprintln!(
|
||
|
|
"Pass 1 complete: {} lines, {} seed sessions, {} seed correlations, {} session changes",
|
||
|
|
merged.line_count,
|
||
|
|
merged.seed_session_ids.len(),
|
||
|
|
merged.seed_correlation_ids.len(),
|
||
|
|
merged.change_session_map.len()
|
||
|
|
);
|
||
|
|
|
||
|
|
Ok(merged)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Expand seed session IDs by following changeSessionId chains backward.
|
||
|
|
/// Stops recursion when an old session has a signature line (session start).
|
||
|
|
fn expand_seeds(
|
||
|
|
seed_sids: &HashSet<String>,
|
||
|
|
seed_cids: &HashSet<String>,
|
||
|
|
change_map: &HashMap<String, String>,
|
||
|
|
sig_sessions: &HashSet<String>,
|
||
|
|
) -> (HashSet<String>, HashSet<String>) {
|
||
|
|
let mut expanded_sids = seed_sids.clone();
|
||
|
|
let mut work_queue: Vec<String> = seed_sids.iter().cloned().collect();
|
||
|
|
|
||
|
|
while let Some(current) = work_queue.pop() {
|
||
|
|
if let Some(old_sid) = change_map.get(¤t)
|
||
|
|
&& expanded_sids.insert(old_sid.clone())
|
||
|
|
{
|
||
|
|
// Newly added — include its lines.
|
||
|
|
// If it has a signature, stop recursing from it.
|
||
|
|
if !sig_sessions.contains(old_sid) {
|
||
|
|
work_queue.push(old_sid.clone());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
(expanded_sids, seed_cids.clone())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn format_pass2_match(
|
||
|
|
trimmed: &str,
|
||
|
|
query: &str,
|
||
|
|
expanded_sids: &HashSet<String>,
|
||
|
|
expanded_cids: &HashSet<String>,
|
||
|
|
strict_app_isolation: bool,
|
||
|
|
) -> Option<String> {
|
||
|
|
let is_direct_match = trimmed.contains(query);
|
||
|
|
|
||
|
|
let sid = SESSION_ID_RE
|
||
|
|
.captures(trimmed)
|
||
|
|
.and_then(|c| c.get(1))
|
||
|
|
.map(|m| normalize_session_id(m.as_str()));
|
||
|
|
|
||
|
|
let cid = CORRELATION_ID_RE
|
||
|
|
.captures(trimmed)
|
||
|
|
.and_then(|c| c.get(1))
|
||
|
|
.map(|m| m.as_str());
|
||
|
|
|
||
|
|
let sid_match = sid.is_some_and(|s| expanded_sids.contains(s));
|
||
|
|
let cid_match = cid.is_some_and(|c| expanded_cids.contains(c));
|
||
|
|
|
||
|
|
// When strict_app_isolation is enabled (search-exceptions), a CID match
|
||
|
|
// alone is not enough — the line's session must also be in the filtered set
|
||
|
|
// (or absent). This prevents leaking lines from non-matching apps that
|
||
|
|
// happen to share a correlation ID.
|
||
|
|
let effective_cid_match =
|
||
|
|
cid_match && !(strict_app_isolation && sid.is_some() && !sid_match);
|
||
|
|
|
||
|
|
if !is_direct_match && !sid_match && !effective_cid_match {
|
||
|
|
return None;
|
||
|
|
}
|
||
|
|
|
||
|
|
let timestamp = SYSLOG_TIMESTAMP_RE
|
||
|
|
.captures(trimmed)
|
||
|
|
.map(|c| c.get(1).unwrap().as_str())
|
||
|
|
.unwrap_or("?");
|
||
|
|
|
||
|
|
let msg = MSG_RE
|
||
|
|
.captures(trimmed)
|
||
|
|
.map(|c| c.get(1).unwrap().as_str())
|
||
|
|
.unwrap_or("<no msg field>");
|
||
|
|
|
||
|
|
let sid_display = sid.unwrap_or("-");
|
||
|
|
let cid_display = cid.unwrap_or("-");
|
||
|
|
let prefix = if is_direct_match { "*" } else { " " };
|
||
|
|
|
||
|
|
Some(format!(
|
||
|
|
"{} [{}] [cid:{}] [sid:{}] {}",
|
||
|
|
prefix, timestamp, cid_display, sid_display, msg
|
||
|
|
))
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Events collected per-line during pass2 parallel chunk processing.
|
||
|
|
/// Preserves file order so the assembly phase can stop at sessionDestroyed.
|
||
|
|
enum Pass2Event {
|
||
|
|
/// Line matched and should be output.
|
||
|
|
Match(String),
|
||
|
|
/// An expanded session was destroyed (no output for this line).
|
||
|
|
Destroy(String),
|
||
|
|
/// Line matched AND an expanded session was destroyed on this line.
|
||
|
|
MatchAndDestroy(String, String),
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Check whether a line destroys one of the expanded sessions.
|
||
|
|
fn check_session_destroyed(trimmed: &str, expanded_sids: &HashSet<String>) -> Option<String> {
|
||
|
|
if trimmed.contains("sessionDestroyed")
|
||
|
|
&& let Some(caps) = SESSION_DESTROYED_RE.captures(trimmed)
|
||
|
|
{
|
||
|
|
let sid = normalize_session_id(caps.get(1).unwrap().as_str());
|
||
|
|
if expanded_sids.contains(sid) {
|
||
|
|
return Some(sid.to_string());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
None
|
||
|
|
}
|
||
|
|
|
||
|
|
fn run_pass2(
|
||
|
|
file_path: &str,
|
||
|
|
query: &str,
|
||
|
|
expanded_sids: &HashSet<String>,
|
||
|
|
expanded_cids: &HashSet<String>,
|
||
|
|
use_parallel: bool,
|
||
|
|
pool: &rayon::ThreadPool,
|
||
|
|
strict_app_isolation: bool,
|
||
|
|
) -> Result<u64> {
|
||
|
|
if !use_parallel {
|
||
|
|
eprintln!("Pass 2: filtering sequentially...");
|
||
|
|
let mut count = 0u64;
|
||
|
|
let mut reader = read_log_file(file_path)?;
|
||
|
|
let mut line = String::new();
|
||
|
|
let mut remaining_sids: HashSet<&str> =
|
||
|
|
expanded_sids.iter().map(|s| s.as_str()).collect();
|
||
|
|
|
||
|
|
loop {
|
||
|
|
line.clear();
|
||
|
|
if reader.read_line(&mut line)? == 0 {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
let trimmed = line.trim_end();
|
||
|
|
|
||
|
|
// Check for sessionDestroyed
|
||
|
|
if trimmed.contains("sessionDestroyed")
|
||
|
|
&& let Some(caps) = SESSION_DESTROYED_RE.captures(trimmed)
|
||
|
|
{
|
||
|
|
let destroyed_sid = normalize_session_id(caps.get(1).unwrap().as_str());
|
||
|
|
if remaining_sids.remove(destroyed_sid) {
|
||
|
|
// Still output the sessionDestroyed line itself if it matches
|
||
|
|
if let Some(formatted) =
|
||
|
|
format_pass2_match(trimmed, query, expanded_sids, expanded_cids, strict_app_isolation)
|
||
|
|
{
|
||
|
|
println!("{}", formatted);
|
||
|
|
count += 1;
|
||
|
|
}
|
||
|
|
if remaining_sids.is_empty() {
|
||
|
|
eprintln!(
|
||
|
|
"All {} expanded sessions destroyed, stopping early",
|
||
|
|
expanded_sids.len()
|
||
|
|
);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if let Some(formatted) =
|
||
|
|
format_pass2_match(trimmed, query, expanded_sids, expanded_cids, strict_app_isolation)
|
||
|
|
{
|
||
|
|
println!("{}", formatted);
|
||
|
|
count += 1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return Ok(count);
|
||
|
|
}
|
||
|
|
|
||
|
|
let file_size = std::fs::metadata(file_path)?.len();
|
||
|
|
let num_threads = pool.current_num_threads();
|
||
|
|
let chunks = compute_chunks(file_size, num_threads);
|
||
|
|
|
||
|
|
eprintln!("Pass 2: filtering with {} threads...", chunks.len());
|
||
|
|
|
||
|
|
// Each chunk collects events preserving file order so the assembly
|
||
|
|
// phase can apply the same sessionDestroyed stop logic as sequential.
|
||
|
|
let results: Vec<Result<Vec<Pass2Event>>> = pool.install(|| {
|
||
|
|
chunks
|
||
|
|
.par_iter()
|
||
|
|
.map(|&(start, end)| {
|
||
|
|
let mut events = Vec::new();
|
||
|
|
for_each_line_in_chunk(file_path, start, end, |trimmed| {
|
||
|
|
let formatted = format_pass2_match(
|
||
|
|
trimmed, query, expanded_sids, expanded_cids, strict_app_isolation,
|
||
|
|
);
|
||
|
|
let destroyed = check_session_destroyed(trimmed, expanded_sids);
|
||
|
|
match (formatted, destroyed) {
|
||
|
|
(Some(f), Some(d)) => events.push(Pass2Event::MatchAndDestroy(f, d)),
|
||
|
|
(Some(f), None) => events.push(Pass2Event::Match(f)),
|
||
|
|
(None, Some(d)) => events.push(Pass2Event::Destroy(d)),
|
||
|
|
(None, None) => {}
|
||
|
|
}
|
||
|
|
})?;
|
||
|
|
Ok(events)
|
||
|
|
})
|
||
|
|
.collect()
|
||
|
|
});
|
||
|
|
|
||
|
|
// Assemble in chunk order, stopping when all expanded sessions are destroyed.
|
||
|
|
let mut remaining_sids: HashSet<&str> =
|
||
|
|
expanded_sids.iter().map(|s| s.as_str()).collect();
|
||
|
|
let mut total = 0u64;
|
||
|
|
'outer: for result in results {
|
||
|
|
let events = result?;
|
||
|
|
for event in events {
|
||
|
|
match event {
|
||
|
|
Pass2Event::Match(line) => {
|
||
|
|
println!("{}", line);
|
||
|
|
total += 1;
|
||
|
|
}
|
||
|
|
Pass2Event::Destroy(sid) => {
|
||
|
|
remaining_sids.remove(sid.as_str());
|
||
|
|
if remaining_sids.is_empty() {
|
||
|
|
break 'outer;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
Pass2Event::MatchAndDestroy(line, sid) => {
|
||
|
|
println!("{}", line);
|
||
|
|
total += 1;
|
||
|
|
remaining_sids.remove(sid.as_str());
|
||
|
|
if remaining_sids.is_empty() {
|
||
|
|
break 'outer;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
Ok(total)
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- search_exceptions ---
|
||
|
|
|
||
|
|
/// Filter expanded session IDs to only those in chains containing a matching-app signature.
|
||
|
|
/// Builds a reverse change_session_map (old → [new]) and propagates forward from matching roots.
|
||
|
|
fn filter_expanded_by_app(
|
||
|
|
expanded_sids: &HashSet<String>,
|
||
|
|
change_map: &HashMap<String, String>,
|
||
|
|
session_app_map: &HashMap<String, String>,
|
||
|
|
app_filters: &[String],
|
||
|
|
) -> HashSet<String> {
|
||
|
|
// Build reverse map: old → [new1, new2, ...]
|
||
|
|
let mut reverse_map: HashMap<&str, Vec<&str>> = HashMap::new();
|
||
|
|
for (new_sid, old_sid) in change_map {
|
||
|
|
reverse_map
|
||
|
|
.entry(old_sid.as_str())
|
||
|
|
.or_default()
|
||
|
|
.push(new_sid.as_str());
|
||
|
|
}
|
||
|
|
|
||
|
|
// Find all sessions with any matching app
|
||
|
|
let matching_roots: Vec<&str> = session_app_map
|
||
|
|
.iter()
|
||
|
|
.filter(|(_, app)| app_filters.iter().any(|f| f == app.as_str()))
|
||
|
|
.map(|(sid, _)| sid.as_str())
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
// Propagate forward from matching roots through the reverse map
|
||
|
|
let mut matching_sessions: HashSet<String> = HashSet::new();
|
||
|
|
let mut work_queue: Vec<&str> = matching_roots;
|
||
|
|
while let Some(current) = work_queue.pop() {
|
||
|
|
if !matching_sessions.insert(current.to_string()) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if let Some(nexts) = reverse_map.get(current) {
|
||
|
|
work_queue.extend(nexts.iter().copied());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Intersect with expanded_sids
|
||
|
|
expanded_sids
|
||
|
|
.intersection(&matching_sessions)
|
||
|
|
.cloned()
|
||
|
|
.collect()
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn run_search_exceptions(file_path: &str, app_filters: &[String], threads: usize) -> Result<()> {
|
||
|
|
let pool = build_thread_pool(threads)?;
|
||
|
|
let query = "Exception";
|
||
|
|
let num_threads = pool.current_num_threads();
|
||
|
|
let use_parallel = can_parallelize(file_path, num_threads);
|
||
|
|
|
||
|
|
// Pass 1: collect metadata
|
||
|
|
let pass1 = run_pass1(file_path, query, use_parallel, &pool)?;
|
||
|
|
|
||
|
|
// Expand seeds
|
||
|
|
let (expanded_sids, expanded_cids) = expand_seeds(
|
||
|
|
&pass1.seed_session_ids,
|
||
|
|
&pass1.seed_correlation_ids,
|
||
|
|
&pass1.change_session_map,
|
||
|
|
&pass1.sessions_with_signature,
|
||
|
|
);
|
||
|
|
|
||
|
|
// Filter by app
|
||
|
|
let filtered_sids = filter_expanded_by_app(
|
||
|
|
&expanded_sids,
|
||
|
|
&pass1.change_session_map,
|
||
|
|
&pass1.session_app_map,
|
||
|
|
app_filters,
|
||
|
|
);
|
||
|
|
|
||
|
|
// Filter correlation IDs: keep only those whose seed line's session is in the filtered set
|
||
|
|
let filtered_cids: HashSet<String> = expanded_cids
|
||
|
|
.iter()
|
||
|
|
.filter(|cid| {
|
||
|
|
pass1
|
||
|
|
.seed_cid_sessions
|
||
|
|
.get(cid.as_str())
|
||
|
|
.is_some_and(|sid| filtered_sids.contains(sid))
|
||
|
|
})
|
||
|
|
.cloned()
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
if filtered_sids.is_empty() && filtered_cids.is_empty() {
|
||
|
|
eprintln!(
|
||
|
|
"0 matching lines found (no sessions matching apps {:?})",
|
||
|
|
app_filters
|
||
|
|
);
|
||
|
|
return Ok(());
|
||
|
|
}
|
||
|
|
|
||
|
|
eprintln!(
|
||
|
|
"Expanding: {} session IDs (filtered from {}), {} correlation IDs (filtered from {})",
|
||
|
|
filtered_sids.len(),
|
||
|
|
expanded_sids.len(),
|
||
|
|
filtered_cids.len(),
|
||
|
|
expanded_cids.len()
|
||
|
|
);
|
||
|
|
|
||
|
|
// Pass 2: filter and print (strict isolation prevents CID leaking across apps)
|
||
|
|
let match_count = run_pass2(
|
||
|
|
file_path,
|
||
|
|
query,
|
||
|
|
&filtered_sids,
|
||
|
|
&filtered_cids,
|
||
|
|
use_parallel,
|
||
|
|
&pool,
|
||
|
|
true,
|
||
|
|
)?;
|
||
|
|
|
||
|
|
eprintln!("{} lines output", match_count);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(test)]
|
||
|
|
mod tests {
|
||
|
|
use super::*;
|
||
|
|
use std::io::Write;
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_syslog_timestamp_extraction() {
|
||
|
|
let line = r#"Jan 27 17:21:17 a.b.c.d m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, msg="hello""#;
|
||
|
|
let caps = SYSLOG_TIMESTAMP_RE.captures(line).unwrap();
|
||
|
|
assert_eq!(caps.get(1).unwrap().as_str(), "Jan 27 17:21:17");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_msg_extraction() {
|
||
|
|
let line = r#"some prefix msg="getUnreadFilesCount(externalUserId=abc123)", ex=""#;
|
||
|
|
let caps = MSG_RE.captures(line).unwrap();
|
||
|
|
assert_eq!(
|
||
|
|
caps.get(1).unwrap().as_str(),
|
||
|
|
"getUnreadFilesCount(externalUserId=abc123)"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_full_line_extraction() {
|
||
|
|
let line = r#"Jan 27 17:21:17 a.b.c.d m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, lc=com.a.b.c.d.e.v5.endpoint.f, threadId=183, externalUserId=null, clientIp=1.1.1.1, xsrfToken=null, correlationId=abcd, sessionId=noSession, securityContext=CA_LOGGED_IN, userId=123, request_id=[(null)]snoSessio.abc, msg="getUnreadFilesCount(externalUserId=aaaaa,externalTeamSafeIds=bbbbb)", ex=""#;
|
||
|
|
|
||
|
|
let ts = SYSLOG_TIMESTAMP_RE
|
||
|
|
.captures(line)
|
||
|
|
.map(|c| c.get(1).unwrap().as_str())
|
||
|
|
.unwrap();
|
||
|
|
assert_eq!(ts, "Jan 27 17:21:17");
|
||
|
|
|
||
|
|
let msg = MSG_RE
|
||
|
|
.captures(line)
|
||
|
|
.map(|c| c.get(1).unwrap().as_str())
|
||
|
|
.unwrap();
|
||
|
|
assert_eq!(
|
||
|
|
msg,
|
||
|
|
"getUnreadFilesCount(externalUserId=aaaaa,externalTeamSafeIds=bbbbb)"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_no_timestamp() {
|
||
|
|
let line = r#"some garbage line without proper timestamp msg="hello""#;
|
||
|
|
assert!(SYSLOG_TIMESTAMP_RE.captures(line).is_none());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_no_msg() {
|
||
|
|
let line = "Jan 27 17:21:17 some line without msg field";
|
||
|
|
assert!(MSG_RE.captures(line).is_none());
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- normalize_session_id tests ---
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_normalize_session_id_with_node_suffix() {
|
||
|
|
assert_eq!(
|
||
|
|
normalize_session_id("DF080BBD8D5E954C642F6C3B5639D6EE.node011"),
|
||
|
|
"DF080BBD8D5E954C642F6C3B5639D6EE"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_normalize_session_id_without_suffix() {
|
||
|
|
assert_eq!(
|
||
|
|
normalize_session_id("DF080BBD8D5E954C642F6C3B5639D6EE"),
|
||
|
|
"DF080BBD8D5E954C642F6C3B5639D6EE"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_normalize_session_id_no_session() {
|
||
|
|
assert_eq!(normalize_session_id("noSession"), "noSession");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_normalize_session_id_non_node_dot() {
|
||
|
|
assert_eq!(normalize_session_id("some.session.id"), "some.session.id");
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- CHANGE_SESSION_RE tests ---
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_change_session_id_regex() {
|
||
|
|
let line = r#"msg="changeSessionId: newSessionId: sDF080BBD / DF080BBD8D5E954C642F6C3B5639D6EE.node011 replaces oldSessionId: sF9EE9D52 / F9EE9D52FDB4502EB5CE6FFA24194AFD.node011""#;
|
||
|
|
let caps = CHANGE_SESSION_RE.captures(line).unwrap();
|
||
|
|
assert_eq!(
|
||
|
|
caps.get(1).unwrap().as_str(),
|
||
|
|
"DF080BBD8D5E954C642F6C3B5639D6EE.node011"
|
||
|
|
);
|
||
|
|
assert_eq!(
|
||
|
|
caps.get(2).unwrap().as_str(),
|
||
|
|
"F9EE9D52FDB4502EB5CE6FFA24194AFD.node011"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- SESSION_ID_RE tests ---
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_session_id_extraction() {
|
||
|
|
let line = "sessionId=ABC123DEF456.node005, something else";
|
||
|
|
let caps = SESSION_ID_RE.captures(line).unwrap();
|
||
|
|
assert_eq!(caps.get(1).unwrap().as_str(), "ABC123DEF456.node005");
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- expand_seeds tests ---
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_expand_seeds_no_chain() {
|
||
|
|
let seed_sids: HashSet<String> = ["A".to_string()].into();
|
||
|
|
let seed_cids: HashSet<String> = ["c1".to_string()].into();
|
||
|
|
let change_map = HashMap::new();
|
||
|
|
let sig_sessions = HashSet::new();
|
||
|
|
|
||
|
|
let (sids, cids) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions);
|
||
|
|
assert_eq!(sids, seed_sids);
|
||
|
|
assert_eq!(cids, seed_cids);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_expand_seeds_single_chain() {
|
||
|
|
// B replaced A (A is old, B is new). Seed is B.
|
||
|
|
let seed_sids: HashSet<String> = ["B".to_string()].into();
|
||
|
|
let seed_cids: HashSet<String> = HashSet::new();
|
||
|
|
let change_map: HashMap<String, String> = [("B".to_string(), "A".to_string())].into();
|
||
|
|
let sig_sessions: HashSet<String> = ["A".to_string()].into();
|
||
|
|
|
||
|
|
let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions);
|
||
|
|
assert!(sids.contains("A"));
|
||
|
|
assert!(sids.contains("B"));
|
||
|
|
assert_eq!(sids.len(), 2);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_expand_seeds_multi_hop_chain() {
|
||
|
|
// C replaced B, B replaced A. Seed is C.
|
||
|
|
let seed_sids: HashSet<String> = ["C".to_string()].into();
|
||
|
|
let seed_cids: HashSet<String> = HashSet::new();
|
||
|
|
let change_map: HashMap<String, String> = [
|
||
|
|
("C".to_string(), "B".to_string()),
|
||
|
|
("B".to_string(), "A".to_string()),
|
||
|
|
]
|
||
|
|
.into();
|
||
|
|
let sig_sessions: HashSet<String> = ["A".to_string()].into();
|
||
|
|
|
||
|
|
let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions);
|
||
|
|
assert!(sids.contains("A"));
|
||
|
|
assert!(sids.contains("B"));
|
||
|
|
assert!(sids.contains("C"));
|
||
|
|
assert_eq!(sids.len(), 3);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_expand_seeds_stops_at_signature() {
|
||
|
|
// D replaced C, C replaced B, B replaced A. B has signature. Seed is D.
|
||
|
|
let seed_sids: HashSet<String> = ["D".to_string()].into();
|
||
|
|
let seed_cids: HashSet<String> = HashSet::new();
|
||
|
|
let change_map: HashMap<String, String> = [
|
||
|
|
("D".to_string(), "C".to_string()),
|
||
|
|
("C".to_string(), "B".to_string()),
|
||
|
|
("B".to_string(), "A".to_string()),
|
||
|
|
]
|
||
|
|
.into();
|
||
|
|
let sig_sessions: HashSet<String> = ["B".to_string()].into();
|
||
|
|
|
||
|
|
let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions);
|
||
|
|
assert!(sids.contains("D"));
|
||
|
|
assert!(sids.contains("C"));
|
||
|
|
assert!(sids.contains("B"));
|
||
|
|
assert!(!sids.contains("A"));
|
||
|
|
assert_eq!(sids.len(), 3);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_expand_seeds_cycle_protection() {
|
||
|
|
// A -> B -> A (cycle)
|
||
|
|
let seed_sids: HashSet<String> = ["A".to_string()].into();
|
||
|
|
let seed_cids: HashSet<String> = HashSet::new();
|
||
|
|
let change_map: HashMap<String, String> = [
|
||
|
|
("A".to_string(), "B".to_string()),
|
||
|
|
("B".to_string(), "A".to_string()),
|
||
|
|
]
|
||
|
|
.into();
|
||
|
|
let sig_sessions: HashSet<String> = HashSet::new();
|
||
|
|
|
||
|
|
let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions);
|
||
|
|
assert!(sids.contains("A"));
|
||
|
|
assert!(sids.contains("B"));
|
||
|
|
assert_eq!(sids.len(), 2);
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Integration test ---
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_expand_integration() -> Result<()> {
|
||
|
|
let dir = tempfile::tempdir()?;
|
||
|
|
let log_path = dir.path().join("test.log");
|
||
|
|
let mut file = std::fs::File::create(&log_path)?;
|
||
|
|
|
||
|
|
// Line 1: signature for session A
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:01 host app dt="2026-01-01 00:00:01,000", sessionId=AAAA.node001, correlationId=c1, msg="signature:APP/1.0/ details:OS:1""#
|
||
|
|
)?;
|
||
|
|
// Line 2: normal line for session A, matches query
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:02 host app dt="2026-01-01 00:00:02,000", sessionId=AAAA.node001, correlationId=c1, msg="findme something""#
|
||
|
|
)?;
|
||
|
|
// Line 3: changeSessionId: B replaces A
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:03 host app dt="2026-01-01 00:00:03,000", sessionId=BBBB.node001, correlationId=c2, msg="changeSessionId: newSessionId: sBBBB / BBBB.node001 replaces oldSessionId: sAAAA / AAAA.node001""#
|
||
|
|
)?;
|
||
|
|
// Line 4: normal line for session B
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:04 host app dt="2026-01-01 00:00:04,000", sessionId=BBBB.node001, correlationId=c2, msg="some other action""#
|
||
|
|
)?;
|
||
|
|
// Line 5: unrelated session X
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:05 host app dt="2026-01-01 00:00:05,000", sessionId=XXXX.node002, correlationId=c9, msg="unrelated""#
|
||
|
|
)?;
|
||
|
|
|
||
|
|
// "findme" matches line 2 (session AAAA, correlation c1).
|
||
|
|
// Session AAAA is in seeds. No changeSessionId has AAAA as new → no backward expansion.
|
||
|
|
// Correlation c1 is in seeds.
|
||
|
|
// Expected output: lines 1 and 2 (both have session AAAA or correlation c1).
|
||
|
|
// Lines 3,4 (session BBBB) should NOT be included (AAAA is old, not new).
|
||
|
|
// Line 5 (session XXXX) should NOT be included.
|
||
|
|
run_search(log_path.to_str().unwrap(), "findme", false, true, 1)?;
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_expand_follows_change_session_backward() -> Result<()> {
|
||
|
|
let dir = tempfile::tempdir()?;
|
||
|
|
let log_path = dir.path().join("test.log");
|
||
|
|
let mut file = std::fs::File::create(&log_path)?;
|
||
|
|
|
||
|
|
// Line 1: signature for session OLD
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:01 host app dt="2026-01-01 00:00:01,000", sessionId=OLD.node001, correlationId=c0, msg="signature:APP/1.0/ details:OS:1""#
|
||
|
|
)?;
|
||
|
|
// Line 2: normal line for session OLD
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:02 host app dt="2026-01-01 00:00:02,000", sessionId=OLD.node001, correlationId=c1, msg="doing stuff""#
|
||
|
|
)?;
|
||
|
|
// Line 3: changeSessionId: NEW replaces OLD
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:03 host app dt="2026-01-01 00:00:03,000", sessionId=NEW.node001, correlationId=c2, msg="changeSessionId: newSessionId: sNEW / NEW.node001 replaces oldSessionId: sOLD / OLD.node001""#
|
||
|
|
)?;
|
||
|
|
// Line 4: normal line for session NEW, matches query
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:04 host app dt="2026-01-01 00:00:04,000", sessionId=NEW.node001, correlationId=c3, msg="findme in new session""#
|
||
|
|
)?;
|
||
|
|
|
||
|
|
// "findme" matches line 4 (session NEW). changeSessionId maps NEW→OLD.
|
||
|
|
// OLD has a signature → include OLD but stop recursing.
|
||
|
|
// Expected: lines 1-4 all included (sessions OLD and NEW).
|
||
|
|
run_search(log_path.to_str().unwrap(), "findme", false, true, 1)?;
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Chunk boundary tests ---
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_chunk_boundary_no_lost_lines() -> Result<()> {
|
||
|
|
// Write lines of known byte sizes, then split exactly on a line boundary
|
||
|
|
let dir = tempfile::tempdir()?;
|
||
|
|
let path = dir.path().join("test.log");
|
||
|
|
let mut f = std::fs::File::create(&path)?;
|
||
|
|
// 3 lines: "line1\n", "line2\n", "line3\n" (6 bytes each)
|
||
|
|
write!(f, "line1\nline2\nline3\n")?;
|
||
|
|
drop(f);
|
||
|
|
|
||
|
|
// Split at byte 6 (exactly between line1 and line2)
|
||
|
|
let mut collected = Vec::new();
|
||
|
|
for_each_line_in_chunk(path.to_str().unwrap(), 0, 6, |l| {
|
||
|
|
collected.push(l.to_string());
|
||
|
|
})?;
|
||
|
|
for_each_line_in_chunk(path.to_str().unwrap(), 6, 18, |l| {
|
||
|
|
collected.push(l.to_string());
|
||
|
|
})?;
|
||
|
|
assert_eq!(collected, vec!["line1", "line2", "line3"]);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_chunk_boundary_mid_line() -> Result<()> {
|
||
|
|
let dir = tempfile::tempdir()?;
|
||
|
|
let path = dir.path().join("test.log");
|
||
|
|
let mut f = std::fs::File::create(&path)?;
|
||
|
|
write!(f, "line1\nline2\nline3\n")?;
|
||
|
|
drop(f);
|
||
|
|
|
||
|
|
// Split at byte 3 (middle of "line1")
|
||
|
|
let mut collected = Vec::new();
|
||
|
|
for_each_line_in_chunk(path.to_str().unwrap(), 0, 3, |l| {
|
||
|
|
collected.push(l.to_string());
|
||
|
|
})?;
|
||
|
|
for_each_line_in_chunk(path.to_str().unwrap(), 3, 18, |l| {
|
||
|
|
collected.push(l.to_string());
|
||
|
|
})?;
|
||
|
|
assert_eq!(collected, vec!["line1", "line2", "line3"]);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_chunk_four_way_split() -> Result<()> {
|
||
|
|
let dir = tempfile::tempdir()?;
|
||
|
|
let path = dir.path().join("test.log");
|
||
|
|
let mut f = std::fs::File::create(&path)?;
|
||
|
|
for i in 0..20 {
|
||
|
|
writeln!(f, "line {:02}", i)?;
|
||
|
|
}
|
||
|
|
drop(f);
|
||
|
|
|
||
|
|
let file_size = std::fs::metadata(path.as_path())?.len();
|
||
|
|
let chunks = compute_chunks(file_size, 4);
|
||
|
|
|
||
|
|
let mut collected = Vec::new();
|
||
|
|
for (start, end) in chunks {
|
||
|
|
for_each_line_in_chunk(path.to_str().unwrap(), start, end, |l| {
|
||
|
|
collected.push(l.to_string());
|
||
|
|
})?;
|
||
|
|
}
|
||
|
|
let expected: Vec<String> = (0..20).map(|i| format!("line {:02}", i)).collect();
|
||
|
|
assert_eq!(collected, expected);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Regression: thread-consistency (P1) ---
|
||
|
|
// Both sequential and parallel pass2 must stop at sessionDestroyed
|
||
|
|
// and produce identical results regardless of thread count.
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_pass2_stops_at_session_destroyed() -> Result<()> {
|
||
|
|
let dir = tempfile::tempdir()?;
|
||
|
|
let log_path = dir.path().join("test.log");
|
||
|
|
let mut file = std::fs::File::create(&log_path)?;
|
||
|
|
|
||
|
|
// Session AAAA with signature
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:01 host app sessionId=AAAA.node001, correlationId=c1, msg="signature:APP_A/1.0/ details:OS:1""#
|
||
|
|
)?;
|
||
|
|
// Line that matches query, with CID c1
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:02 host app sessionId=AAAA.node001, correlationId=c1, msg="findme error""#
|
||
|
|
)?;
|
||
|
|
// Session destroyed for AAAA
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:03 host app msg="sessionDestroyed #s=1 sid=AAAA.node001 isnew=false""#
|
||
|
|
)?;
|
||
|
|
// Post-destroy line with same CID c1 — must NOT be included
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:04 host app sessionId=noSession, correlationId=c1, msg="async callback after destroy""#
|
||
|
|
)?;
|
||
|
|
|
||
|
|
// Run pass1 + expand
|
||
|
|
let pool = build_thread_pool(1)?;
|
||
|
|
let pass1 = run_pass1(log_path.to_str().unwrap(), "findme", false, &pool)?;
|
||
|
|
let (expanded_sids, expanded_cids) = expand_seeds(
|
||
|
|
&pass1.seed_session_ids,
|
||
|
|
&pass1.seed_correlation_ids,
|
||
|
|
&pass1.change_session_map,
|
||
|
|
&pass1.sessions_with_signature,
|
||
|
|
);
|
||
|
|
|
||
|
|
assert!(expanded_cids.contains("c1"));
|
||
|
|
|
||
|
|
// Sequential: lines 1 (sid match) + 2 (query+sid+cid) = 2, then
|
||
|
|
// line 3 destroys AAAA (the only expanded session) → stop.
|
||
|
|
// Line 4 must NOT be included.
|
||
|
|
let seq_count = run_pass2(
|
||
|
|
log_path.to_str().unwrap(),
|
||
|
|
"findme",
|
||
|
|
&expanded_sids,
|
||
|
|
&expanded_cids,
|
||
|
|
false,
|
||
|
|
&pool,
|
||
|
|
false,
|
||
|
|
)?;
|
||
|
|
assert_eq!(seq_count, 2, "sequential pass2 must stop at sessionDestroyed");
|
||
|
|
|
||
|
|
// Parallel: must produce the same count.
|
||
|
|
let par_pool = build_thread_pool(2)?;
|
||
|
|
let par_count = run_pass2(
|
||
|
|
log_path.to_str().unwrap(),
|
||
|
|
"findme",
|
||
|
|
&expanded_sids,
|
||
|
|
&expanded_cids,
|
||
|
|
true,
|
||
|
|
&par_pool,
|
||
|
|
false,
|
||
|
|
)?;
|
||
|
|
assert_eq!(par_count, seq_count, "parallel pass2 must match sequential");
|
||
|
|
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_session_destroyed_regex() {
|
||
|
|
let line = "sessionDestroyed #s=8 sid=2010F74498079D00A5647F3777545A64.node003 isnew=false age=693s last=644s attrs=loginState,lastRequest,userSessionData";
|
||
|
|
let caps = SESSION_DESTROYED_RE.captures(line).unwrap();
|
||
|
|
assert_eq!(
|
||
|
|
caps.get(1).unwrap().as_str(),
|
||
|
|
"2010F74498079D00A5647F3777545A64.node003"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Regression: strict app isolation (P1) ---
|
||
|
|
// search-exceptions --app APP_A must not include lines from APP_B even when
|
||
|
|
// they share a correlation ID with an APP_A session.
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_search_exceptions_strict_app_isolation() -> Result<()> {
|
||
|
|
let dir = tempfile::tempdir()?;
|
||
|
|
let log_path = dir.path().join("test.log");
|
||
|
|
let mut file = std::fs::File::create(&log_path)?;
|
||
|
|
|
||
|
|
// APP_A session
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:01 host app sessionId=AAAA.node001, correlationId=c1, msg="signature:APP_A/1.0/ details:OS:1""#
|
||
|
|
)?;
|
||
|
|
// APP_A Exception line with shared CID
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:02 host app sessionId=AAAA.node001, correlationId=shared_cid, msg="Exception in APP_A""#
|
||
|
|
)?;
|
||
|
|
// APP_B session with same shared CID
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:03 host app sessionId=BBBB.node001, correlationId=c2, msg="signature:APP_B/2.0/ details:OS:1""#
|
||
|
|
)?;
|
||
|
|
writeln!(
|
||
|
|
file,
|
||
|
|
r#"Jan 01 00:00:04 host app sessionId=BBBB.node001, correlationId=shared_cid, msg="handling request from APP_B""#
|
||
|
|
)?;
|
||
|
|
|
||
|
|
// Run the full pipeline as search-exceptions does
|
||
|
|
let pool = build_thread_pool(1)?;
|
||
|
|
let query = "Exception";
|
||
|
|
let pass1 = run_pass1(log_path.to_str().unwrap(), query, false, &pool)?;
|
||
|
|
|
||
|
|
let (expanded_sids, expanded_cids) = expand_seeds(
|
||
|
|
&pass1.seed_session_ids,
|
||
|
|
&pass1.seed_correlation_ids,
|
||
|
|
&pass1.change_session_map,
|
||
|
|
&pass1.sessions_with_signature,
|
||
|
|
);
|
||
|
|
|
||
|
|
let app_filters = vec!["APP_A".to_string()];
|
||
|
|
let filtered_sids = filter_expanded_by_app(
|
||
|
|
&expanded_sids,
|
||
|
|
&pass1.change_session_map,
|
||
|
|
&pass1.session_app_map,
|
||
|
|
&app_filters,
|
||
|
|
);
|
||
|
|
|
||
|
|
let filtered_cids: HashSet<String> = expanded_cids
|
||
|
|
.iter()
|
||
|
|
.filter(|cid| {
|
||
|
|
pass1
|
||
|
|
.seed_cid_sessions
|
||
|
|
.get(cid.as_str())
|
||
|
|
.is_some_and(|sid| filtered_sids.contains(sid))
|
||
|
|
})
|
||
|
|
.cloned()
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
// shared_cid should be in filtered_cids (it came from APP_A's session)
|
||
|
|
assert!(filtered_cids.contains("shared_cid"));
|
||
|
|
// BBBB should NOT be in filtered_sids
|
||
|
|
assert!(!filtered_sids.contains("BBBB"));
|
||
|
|
|
||
|
|
// Run pass2 with strict_app_isolation=true
|
||
|
|
let count = run_pass2(
|
||
|
|
log_path.to_str().unwrap(),
|
||
|
|
query,
|
||
|
|
&filtered_sids,
|
||
|
|
&filtered_cids,
|
||
|
|
false,
|
||
|
|
&pool,
|
||
|
|
true,
|
||
|
|
)?;
|
||
|
|
|
||
|
|
// Line 1 (sig for APP_A, sid match) = included
|
||
|
|
// Line 2 (Exception, sid+cid match) = included
|
||
|
|
// Line 3 (sig for APP_B, sid NOT in filtered) = excluded
|
||
|
|
// Line 4 (APP_B line, cid=shared_cid but sid=BBBB not in filtered) = excluded by strict isolation
|
||
|
|
assert_eq!(count, 2, "APP_B lines must not leak through shared CID");
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|