Improved memory and disk usage, best practices. Review done by GPT 5.2, code written by Claude Opus 4.5

This commit is contained in:
2026-01-22 10:09:52 +01:00
parent 169409738f
commit 7e03af23de
4 changed files with 445 additions and 193 deletions

109
src/db.rs
View File

@@ -1,5 +1,5 @@
use anyhow::Result;
use rusqlite::{params, Connection, Transaction};
use rusqlite::{Connection, Transaction, params};
use std::collections::HashMap;
use crate::parser::SignatureEntry;
@@ -12,8 +12,17 @@ impl Database {
pub fn new(path: &str) -> Result<Self> {
let conn = Connection::open(path)?;
// Enable WAL mode for better concurrent read/write performance
// Production-optimized pragmas for bulk ingestion
// WAL mode for better concurrent read/write performance
conn.pragma_update(None, "journal_mode", "WAL")?;
// NORMAL synchronous is acceptable for ingestion pipelines (fsync on checkpoint only)
conn.pragma_update(None, "synchronous", "NORMAL")?;
// Bound WAL growth: checkpoint every 1000 pages (~4MB with default page size)
conn.pragma_update(None, "wal_autocheckpoint", 1000)?;
// Small bounded cache to limit memory usage (negative = KB)
conn.pragma_update(None, "cache_size", -8000)?; // 8MB cache
// Store temp tables in memory to reduce disk I/O
conn.pragma_update(None, "temp_store", "MEMORY")?;
let db = Self { conn };
db.init_schema()?;
@@ -21,6 +30,7 @@ impl Database {
}
fn init_schema(&self) -> Result<()> {
// Create tables without indexes - indexes will be created after ingestion
self.conn.execute_batch(
r#"
-- Lookup tables for low-cardinality text columns
@@ -55,6 +65,7 @@ impl Database {
);
-- Main table with normalized foreign keys and integer timestamp
-- Indexes are created AFTER ingestion for better bulk insert performance
CREATE TABLE IF NOT EXISTS signature_entries (
id INTEGER PRIMARY KEY,
session_id TEXT NOT NULL,
@@ -72,12 +83,32 @@ impl Database {
device_id INTEGER REFERENCES devices(id),
password_autofill_usage INTEGER
);
"#,
)?;
Ok(())
}
/// Create indexes and optimize database after ingestion is complete
pub fn finalize(&self) -> Result<()> {
eprintln!("Creating indexes...");
self.conn.execute_batch(
r#"
CREATE INDEX IF NOT EXISTS idx_session_id ON signature_entries(session_id);
CREATE INDEX IF NOT EXISTS idx_timestamp ON signature_entries(timestamp_ms);
CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version_id);
"#,
)?;
// Run optimizer to analyze tables and update statistics
// Checkpoint and truncate WAL to reduce disk usage
eprintln!("Optimizing database and Checkpointing WAL...");
self.conn.execute_batch(
r#"
PRAGMA optimize;
PRAGMA wal_checkpoint(TRUNCATE);
"#,
)?;
Ok(())
}
@@ -94,6 +125,26 @@ impl Database {
let mut os_cache: HashMap<String, i64> = HashMap::new();
let mut app_name_cache: HashMap<String, i64> = HashMap::new();
// Prepare all lookup statements once (using INSERT ... ON CONFLICT ... RETURNING)
let mut apps_stmt = tx.prepare_cached(
"INSERT INTO apps (name) VALUES (?) ON CONFLICT(name) DO UPDATE SET name=excluded.name RETURNING id",
)?;
let mut versions_stmt = tx.prepare_cached(
"INSERT INTO versions (name) VALUES (?) ON CONFLICT(name) DO UPDATE SET name=excluded.name RETURNING id",
)?;
let mut models_stmt = tx.prepare_cached(
"INSERT INTO models (name) VALUES (?) ON CONFLICT(name) DO UPDATE SET name=excluded.name RETURNING id",
)?;
let mut devices_stmt = tx.prepare_cached(
"INSERT INTO devices (name) VALUES (?) ON CONFLICT(name) DO UPDATE SET name=excluded.name RETURNING id",
)?;
let mut os_stmt = tx.prepare_cached(
"INSERT INTO os_versions (name) VALUES (?) ON CONFLICT(name) DO UPDATE SET name=excluded.name RETURNING id",
)?;
let mut app_names_stmt = tx.prepare_cached(
"INSERT INTO app_names (name) VALUES (?) ON CONFLICT(name) DO UPDATE SET name=excluded.name RETURNING id",
)?;
let mut insert_stmt = tx.prepare_cached(
r#"
INSERT INTO signature_entries (
@@ -106,12 +157,29 @@ impl Database {
)?;
for entry in entries {
let app_id = get_or_insert_lookup(tx, &mut app_cache, "apps", &entry.app)?;
let version_id = get_or_insert_lookup(tx, &mut version_cache, "versions", &entry.version)?;
let model_id = entry.model.as_ref().map(|v| get_or_insert_lookup(tx, &mut model_cache, "models", v)).transpose()?;
let device_id = entry.device.as_ref().map(|v| get_or_insert_lookup(tx, &mut device_cache, "devices", v)).transpose()?;
let os_id = entry.os.as_ref().map(|v| get_or_insert_lookup(tx, &mut os_cache, "os_versions", v)).transpose()?;
let app_name_id = entry.app_name.as_ref().map(|v| get_or_insert_lookup(tx, &mut app_name_cache, "app_names", v)).transpose()?;
let app_id = get_or_insert_cached(&mut apps_stmt, &mut app_cache, &entry.app)?;
let version_id =
get_or_insert_cached(&mut versions_stmt, &mut version_cache, &entry.version)?;
let model_id = entry
.model
.as_ref()
.map(|v| get_or_insert_cached(&mut models_stmt, &mut model_cache, v))
.transpose()?;
let device_id = entry
.device
.as_ref()
.map(|v| get_or_insert_cached(&mut devices_stmt, &mut device_cache, v))
.transpose()?;
let os_id = entry
.os
.as_ref()
.map(|v| get_or_insert_cached(&mut os_stmt, &mut os_cache, v))
.transpose()?;
let app_name_id = entry
.app_name
.as_ref()
.map(|v| get_or_insert_cached(&mut app_names_stmt, &mut app_name_cache, v))
.transpose()?;
insert_stmt.execute(params![
entry.session_id,
@@ -135,32 +203,19 @@ impl Database {
}
}
/// Get or insert a value into a lookup table, using a cache to minimize DB queries
fn get_or_insert_lookup(
tx: &Transaction<'_>,
/// Get or insert a value using a prepared statement with RETURNING, with in-memory cache
fn get_or_insert_cached(
stmt: &mut rusqlite::CachedStatement<'_>,
cache: &mut HashMap<String, i64>,
table: &str,
value: &str,
) -> Result<i64> {
// Check cache first
if let Some(&id) = cache.get(value) {
return Ok(id);
}
// Try to find existing entry
let query = format!("SELECT id FROM {} WHERE name = ?", table);
let existing: Option<i64> = tx
.query_row(&query, params![value], |row| row.get(0))
.ok();
if let Some(id) = existing {
cache.insert(value.to_string(), id);
return Ok(id);
}
// Insert new entry
let insert = format!("INSERT INTO {} (name) VALUES (?)", table);
tx.execute(&insert, params![value])?;
let id = tx.last_insert_rowid();
// Use INSERT ... ON CONFLICT ... RETURNING to get id in one round-trip
let id: i64 = stmt.query_row(params![value], |row| row.get(0))?;
cache.insert(value.to_string(), id);
Ok(id)
}

View File

@@ -1,10 +1,41 @@
use anyhow::{anyhow, Result};
use anyhow::{Result, anyhow};
use chrono::NaiveDate;
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::io::{BufRead, BufReader, Read};
use std::path::PathBuf;
/// Enum-based reader to avoid Box<dyn BufRead> heap allocation and dynamic dispatch
pub enum LogReader {
Plain(BufReader<File>),
Gzip(BufReader<GzDecoder<File>>),
}
impl Read for LogReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self {
LogReader::Plain(r) => r.read(buf),
LogReader::Gzip(r) => r.read(buf),
}
}
}
impl BufRead for LogReader {
fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
match self {
LogReader::Plain(r) => r.fill_buf(),
LogReader::Gzip(r) => r.fill_buf(),
}
}
fn consume(&mut self, amt: usize) {
match self {
LogReader::Plain(r) => r.consume(amt),
LogReader::Gzip(r) => r.consume(amt),
}
}
}
/// Discovers log files for a given date range
pub struct LogFileDiscovery {
base_dir: PathBuf,
@@ -25,9 +56,7 @@ impl LogFileDiscovery {
if let Some(log_file) = self.find_log_for_date(current)? {
files.push(log_file);
}
current = current
.succ_opt()
.ok_or_else(|| anyhow!("Date overflow"))?;
current = current.succ_opt().ok_or_else(|| anyhow!("Date overflow"))?;
}
Ok(files)
@@ -72,26 +101,26 @@ pub struct LogFile {
impl LogFile {
/// Returns a buffered reader for this log file, handling compression transparently
pub fn reader(&self) -> Result<Box<dyn BufRead>> {
pub fn reader(&self) -> Result<LogReader> {
let file = File::open(&self.path)?;
if self.compressed {
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::new(decoder)))
Ok(LogReader::Gzip(BufReader::new(decoder)))
} else {
Ok(Box::new(BufReader::new(file)))
Ok(LogReader::Plain(BufReader::new(file)))
}
}
}
/// For reading a single file directly (e.g., for testing)
pub fn read_log_file(path: &str) -> Result<Box<dyn BufRead>> {
pub fn read_log_file(path: &str) -> Result<LogReader> {
let file = File::open(path)?;
if path.ends_with(".gz") {
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::new(decoder)))
Ok(LogReader::Gzip(BufReader::new(decoder)))
} else {
Ok(Box::new(BufReader::new(file)))
Ok(LogReader::Plain(BufReader::new(file)))
}
}

View File

@@ -1,13 +1,13 @@
use anyhow::{anyhow, Result};
use anyhow::{Result, anyhow};
use chrono::NaiveDate;
use clap::Parser;
use crossbeam_channel::{bounded, Sender};
use crossbeam_channel::{Sender, bounded};
use rayon::prelude::*;
use std::collections::HashMap;
use std::io::BufRead;
use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::thread;
mod db;
@@ -15,7 +15,7 @@ mod files;
mod parser;
use db::Database;
use files::{read_log_file, LogFile, LogFileDiscovery};
use files::{LogFile, LogFileDiscovery, LogReader, read_log_file};
use parser::{ParsedMessage, ParserRegistry, SignatureEntry};
#[derive(Parser, Debug)]
@@ -63,11 +63,15 @@ fn main() -> Result<()> {
let args = Args::parse();
// Configure rayon thread pool if threads specified
if args.threads > 0 {
rayon::ThreadPoolBuilder::new()
if args.threads > 0
&& let Err(e) = rayon::ThreadPoolBuilder::new()
.num_threads(args.threads)
.build_global()
.ok(); // Ignore error if pool already initialized
{
eprintln!(
"Warning: Could not configure thread pool ({}), using default",
e
);
}
let use_parallel = args.threads != 1;
@@ -79,6 +83,7 @@ fn main() -> Result<()> {
eprintln!("Processing single file: {}", file_path.display());
let reader = read_log_file(file_path.to_str().unwrap())?;
process_reader(reader, &registry, &mut db, args.batch_size)?;
db.finalize()?;
} else {
// Process date range
let from = parse_date(
@@ -142,12 +147,18 @@ fn process_files_sequential(
let reader = log_file.reader()?;
process_reader(reader, &registry, &mut db, batch_size)?;
}
db.finalize()?;
Ok(())
}
fn process_files_parallel(log_files: Vec<LogFile>, output: &str, batch_size: usize) -> Result<()> {
let num_threads = rayon::current_num_threads();
eprintln!("Processing {} files with {} threads", log_files.len(), num_threads);
eprintln!(
"Processing {} files with {} threads",
log_files.len(),
num_threads
);
// Channel for sending parsed entries to the DB writer
// Buffer size: enough batches to keep workers busy without excessive memory
@@ -158,6 +169,9 @@ fn process_files_parallel(log_files: Vec<LogFile>, output: &str, batch_size: usi
let parsed_lines = Arc::new(AtomicU64::new(0));
let error_count = Arc::new(AtomicU64::new(0));
// Shared parser registry - parsers are stateless, so we can share one instance
let registry = Arc::new(ParserRegistry::new());
// Spawn DB writer thread
let output_path = output.to_string();
let db_handle = thread::spawn(move || -> Result<()> {
@@ -169,19 +183,23 @@ fn process_files_parallel(log_files: Vec<LogFile>, output: &str, batch_size: usi
tx.commit()?;
}
db.finalize()?;
Ok(())
});
// Process files in parallel
let result: Result<()> = log_files
.into_par_iter()
.try_for_each(|log_file| {
let result: Result<()> = log_files.into_par_iter().try_for_each(|log_file| {
let file_path = log_file.path.display().to_string();
let compressed = if log_file.compressed { "compressed" } else { "plain" };
let compressed = if log_file.compressed {
"compressed"
} else {
"plain"
};
eprintln!("Starting: {} ({})", file_path, compressed);
process_file_parallel(
log_file,
&registry,
&sender,
batch_size,
&total_lines,
@@ -214,25 +232,35 @@ fn process_files_parallel(log_files: Vec<LogFile>, output: &str, batch_size: usi
fn process_file_parallel(
log_file: LogFile,
registry: &Arc<ParserRegistry>,
sender: &Sender<Vec<SignatureEntry>>,
batch_size: usize,
total_lines: &AtomicU64,
parsed_lines: &AtomicU64,
error_count: &AtomicU64,
) -> Result<()> {
let registry = ParserRegistry::new();
let reader = log_file.reader()?;
let mut reader = log_file.reader()?;
let mut batch: Vec<SignatureEntry> = Vec::with_capacity(batch_size);
let mut file_lines = 0u64;
let mut file_parsed = 0u64;
let mut file_errors = 0u64;
for line_result in reader.lines() {
let line = line_result?;
// Reuse line buffer to avoid per-line allocations
let mut line = String::new();
loop {
line.clear();
let bytes_read = reader.read_line(&mut line)?;
if bytes_read == 0 {
break; // EOF
}
file_lines += 1;
if let Some(parse_result) = registry.parse(&line) {
// Trim newline for parsing (without reallocating)
let line_trimmed = line.trim_end();
if let Some(parse_result) = registry.parse(line_trimmed) {
match parse_result {
Ok(ParsedMessage::Signature(entry)) => {
batch.push(entry);
@@ -266,7 +294,7 @@ fn process_file_parallel(
}
fn process_reader(
reader: Box<dyn BufRead>,
mut reader: LogReader,
registry: &ParserRegistry,
db: &mut Database,
batch_size: usize,
@@ -276,11 +304,21 @@ fn process_reader(
let mut parsed_lines = 0u64;
let mut error_counts: HashMap<String, u64> = HashMap::new();
for line_result in reader.lines() {
let line = line_result?;
// Reuse line buffer to avoid per-line allocations
let mut line = String::new();
loop {
line.clear();
let bytes_read = reader.read_line(&mut line)?;
if bytes_read == 0 {
break; // EOF
}
total_lines += 1;
if let Some(parse_result) = registry.parse(&line) {
// Trim newline for parsing (without reallocating)
let line_trimmed = line.trim_end();
if let Some(parse_result) = registry.parse(line_trimmed) {
match parse_result {
Ok(ParsedMessage::Signature(entry)) => {
signature_batch.push(entry);

View File

@@ -1,4 +1,4 @@
use anyhow::{anyhow, Result};
use anyhow::{Result, anyhow};
use chrono::NaiveDateTime;
use regex::Regex;
use std::sync::LazyLock;
@@ -23,6 +23,45 @@ pub struct SignatureEntry {
pub password_autofill_usage: Option<i64>,
}
/// Parsed details without HashMap allocation - holds string slices into the original details string
struct ParsedDetails<'a> {
offline_login_usage: Option<&'a str>,
is_password_autofill_enabled: Option<&'a str>,
camera_roll_usage: Option<&'a str>,
os: Option<&'a str>,
app_name: Option<&'a str>,
touch_id: Option<&'a str>,
is_offline_login_enabled: Option<&'a str>,
model: Option<&'a str>,
device: Option<&'a str>,
password_autofill_usage: Option<&'a str>,
}
impl<'a> ParsedDetails<'a> {
fn new() -> Self {
Self {
offline_login_usage: None,
is_password_autofill_enabled: None,
camera_roll_usage: None,
os: None,
app_name: None,
touch_id: None,
is_offline_login_enabled: None,
model: None,
device: None,
password_autofill_usage: None,
}
}
}
/// Parsed mobile details without HashMap allocation
struct ParsedMobileDetails<'a> {
os: Option<&'a str>,
app_name: Option<&'a str>,
model: Option<&'a str>,
device: Option<&'a str>,
}
/// Trait for parsing different message types from logs.
/// Implement this trait to add support for new message formats.
pub trait MessageParser: Send + Sync {
@@ -39,8 +78,9 @@ pub enum ParsedMessage {
static SESSION_ID_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"sessionId=([^,\s]+)").unwrap());
static DATETIME_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"dt="(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})(?:,(\d{3}))?"#).unwrap());
static DATETIME_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"dt="(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})(?:,(\d{3}))?"#).unwrap()
});
static CORRELATION_ID_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"correlationId=([^,\s]+)").unwrap());
static SIGNATURE_RE: LazyLock<Regex> =
@@ -100,25 +140,26 @@ impl SignatureParser {
let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
let details_str = caps.get(3).map(|m| m.as_str()).unwrap();
// Parse details key-value pairs
// Handle the tricky "device:iOS, Apple" case by parsing carefully
let details = parse_details(details_str)?;
// Parse details key-value pairs directly into struct (no HashMap)
let details = parse_details_direct(details_str);
let entry = SignatureEntry {
session_id,
timestamp_ms,
app,
version,
offline_login_usage: parse_number(&details, "offlineLoginUsage"),
is_password_autofill_enabled: parse_bool(&details, "isPasswordAutofillEnabled"),
camera_roll_usage: parse_number(&details, "cameraRollUsage"),
os: get_string(&details, "OS"),
app_name: get_string(&details, "appName"),
touch_id: parse_bool(&details, "touchID"),
is_offline_login_enabled: parse_bool(&details, "isOfflineLoginEnabled"),
model: get_string(&details, "model"),
device: get_string(&details, "device"),
password_autofill_usage: parse_number(&details, "passwordAutofillUsage"),
offline_login_usage: details.offline_login_usage.and_then(parse_number_str),
is_password_autofill_enabled: details
.is_password_autofill_enabled
.and_then(parse_bool_str),
camera_roll_usage: details.camera_roll_usage.and_then(parse_number_str),
os: details.os.map(|s| s.to_string()),
app_name: details.app_name.map(|s| s.to_string()),
touch_id: details.touch_id.and_then(parse_bool_str),
is_offline_login_enabled: details.is_offline_login_enabled.and_then(parse_bool_str),
model: details.model.map(|s| s.to_string()),
device: details.device.map(|s| s.to_string()),
password_autofill_usage: details.password_autofill_usage.and_then(parse_number_str),
};
Ok(ParsedMessage::Signature(entry))
@@ -151,7 +192,7 @@ impl MobileClientIosParser {
let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
let details_str = caps.get(4).map(|m| m.as_str()).unwrap();
let details = parse_mobile_details(details_str);
let details = parse_mobile_details_direct(details_str);
let entry = SignatureEntry {
session_id,
@@ -161,11 +202,11 @@ impl MobileClientIosParser {
offline_login_usage: None,
is_password_autofill_enabled: None,
camera_roll_usage: None,
os: get_string(&details, "os"),
app_name: get_string(&details, "app-name"),
os: details.os.map(|s| s.to_string()),
app_name: details.app_name.map(|s| s.to_string()),
touch_id: None,
is_offline_login_enabled: None,
model: get_string(&details, "model"),
model: details.model.map(|s| s.to_string()),
device: Some("iOS".to_string()),
password_autofill_usage: None,
};
@@ -200,7 +241,7 @@ impl MobileClientAndroidParser {
let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
let details_str = caps.get(4).map(|m| m.as_str()).unwrap();
let details = parse_mobile_details_android(details_str);
let details = parse_mobile_details_android_direct(details_str);
let entry = SignatureEntry {
session_id,
@@ -210,12 +251,12 @@ impl MobileClientAndroidParser {
offline_login_usage: None,
is_password_autofill_enabled: None,
camera_roll_usage: None,
os: get_string(&details, "os"),
os: details.os.map(|s| s.to_string()),
app_name: Some("native Android".to_string()),
touch_id: None,
is_offline_login_enabled: None,
model: get_string(&details, "model"),
device: get_string(&details, "device"),
model: details.model.map(|s| s.to_string()),
device: details.device.map(|s| s.to_string()),
password_autofill_usage: None,
};
@@ -250,93 +291,95 @@ fn extract_correlation_id(line: &str) -> Result<String> {
.ok_or_else(|| anyhow!("Missing correlationId"))
}
/// Parse mobile client details for iOS (simple comma-separated key:value)
fn parse_mobile_details(details: &str) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
/// Parse mobile client details for iOS directly into struct (no HashMap allocation)
fn parse_mobile_details_direct(details: &str) -> ParsedMobileDetails<'_> {
// Keys for iOS mobile client
let known_keys = ["sdk-client", "sdk-version", "app-name", "device", "model", "os"];
const KNOWN_KEYS: [&str; 6] = [
"sdk-client",
"sdk-version",
"app-name",
"device",
"model",
"os",
];
let mut key_positions: Vec<(usize, &str)> = known_keys
let mut key_positions: Vec<(usize, &str)> = KNOWN_KEYS
.iter()
.filter_map(|&key| {
let pattern = format!("{}:", key);
details.find(&pattern).map(|pos| (pos, key))
})
.filter_map(|&key| find_key_position(details, key))
.collect();
key_positions.sort_by_key(|&(pos, _)| pos);
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value_start = pos + key.len() + 1;
let value_end = if i + 1 < key_positions.len() {
let next_pos = key_positions[i + 1].0;
if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') {
next_pos - 1
} else {
next_pos
}
} else {
details.find(" user-agent").unwrap_or(details.len())
let mut result = ParsedMobileDetails {
os: None,
app_name: None,
model: None,
device: None,
};
let value = details[value_start..value_end].trim().to_string();
map.insert(key.to_string(), value);
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value = extract_value(details, pos, key, i, &key_positions);
match key {
"os" => result.os = Some(value),
"app-name" => result.app_name = Some(value),
"model" => result.model = Some(value),
"device" => result.device = Some(value),
_ => {}
}
}
map
result
}
/// Parse mobile client details for Android (handles device with commas)
fn parse_mobile_details_android(details: &str) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
/// Parse mobile client details for Android directly into struct (no HashMap allocation)
fn parse_mobile_details_android_direct(details: &str) -> ParsedMobileDetails<'_> {
// For Android, device can contain commas like "Android, samsung"
// Keys in order: sdk-client, sdk-version, app-name, device, model, os
let known_keys = ["sdk-client", "sdk-version", "app-name", "device", "model", "os"];
const KNOWN_KEYS: [&str; 6] = [
"sdk-client",
"sdk-version",
"app-name",
"device",
"model",
"os",
];
let mut key_positions: Vec<(usize, &str)> = known_keys
let mut key_positions: Vec<(usize, &str)> = KNOWN_KEYS
.iter()
.filter_map(|&key| {
let pattern = format!("{}:", key);
details.find(&pattern).map(|pos| (pos, key))
})
.filter_map(|&key| find_key_position(details, key))
.collect();
key_positions.sort_by_key(|&(pos, _)| pos);
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value_start = pos + key.len() + 1;
let value_end = if i + 1 < key_positions.len() {
let next_pos = key_positions[i + 1].0;
// Find the comma before the next key
if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') {
next_pos - 1
} else {
next_pos
}
} else {
details.find(" user-agent").unwrap_or(details.len())
let mut result = ParsedMobileDetails {
os: None,
app_name: None,
model: None,
device: None,
};
let value = details[value_start..value_end].trim().to_string();
map.insert(key.to_string(), value);
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value = extract_value(details, pos, key, i, &key_positions);
match key {
"os" => result.os = Some(value),
"app-name" => result.app_name = Some(value),
"model" => result.model = Some(value),
"device" => result.device = Some(value),
_ => {}
}
}
map
result
}
/// Parse the details string which has format like:
/// offlineLoginUsage:0,isPasswordAutofillEnabled:no,...,device:iOS, Apple,passwordAutofillUsage:0
fn parse_details(details: &str) -> Result<std::collections::HashMap<String, String>> {
let mut map = std::collections::HashMap::new();
/// Parse the details string directly into ParsedDetails (no HashMap allocation)
/// Format: offlineLoginUsage:0,isPasswordAutofillEnabled:no,...,device:iOS, Apple,passwordAutofillUsage:0
fn parse_details_direct(details: &str) -> ParsedDetails<'_> {
// Known keys in order they appear
let known_keys = [
const KNOWN_KEYS: [&str; 10] = [
"offlineLoginUsage",
"isPasswordAutofillEnabled",
"cameraRollUsage",
@@ -350,25 +393,71 @@ fn parse_details(details: &str) -> Result<std::collections::HashMap<String, Stri
];
// Find positions of each key
let mut key_positions: Vec<(usize, &str)> = known_keys
let mut key_positions: Vec<(usize, &str)> = KNOWN_KEYS
.iter()
.filter_map(|&key| {
let pattern = format!("{}:", key);
details.find(&pattern).map(|pos| (pos, key))
})
.filter_map(|&key| find_key_position(details, key))
.collect();
// Sort by position
key_positions.sort_by_key(|&(pos, _)| pos);
let mut result = ParsedDetails::new();
// Extract values between keys
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value = extract_value(details, pos, key, i, &key_positions);
match key {
"offlineLoginUsage" => result.offline_login_usage = Some(value),
"isPasswordAutofillEnabled" => result.is_password_autofill_enabled = Some(value),
"cameraRollUsage" => result.camera_roll_usage = Some(value),
"OS" => result.os = Some(value),
"appName" => result.app_name = Some(value),
"touchID" => result.touch_id = Some(value),
"isOfflineLoginEnabled" => result.is_offline_login_enabled = Some(value),
"model" => result.model = Some(value),
"device" => result.device = Some(value),
"passwordAutofillUsage" => result.password_autofill_usage = Some(value),
_ => {}
}
}
result
}
/// Find the position of a key in the details string without allocating
#[inline]
fn find_key_position<'a>(details: &str, key: &'a str) -> Option<(usize, &'a str)> {
// Search for "key:" pattern
let mut search_start = 0;
while let Some(pos) = details[search_start..].find(key) {
let absolute_pos = search_start + pos;
// Check if followed by ':'
if details.as_bytes().get(absolute_pos + key.len()) == Some(&b':') {
// Check if at start or preceded by comma
if absolute_pos == 0 || details.as_bytes().get(absolute_pos - 1) == Some(&b',') {
return Some((absolute_pos, key));
}
}
search_start = absolute_pos + 1;
}
None
}
/// Extract a value from the details string without allocating
#[inline]
fn extract_value<'a>(
details: &'a str,
pos: usize,
key: &str,
index: usize,
key_positions: &[(usize, &str)],
) -> &'a str {
let value_start = pos + key.len() + 1; // +1 for ':'
let value_end = if i + 1 < key_positions.len() {
// Value ends at the comma before the next key
let next_pos = key_positions[i + 1].0;
let value_end = if index + 1 < key_positions.len() {
let next_pos = key_positions[index + 1].0;
// Find the comma before the next key
if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') {
next_pos - 1
@@ -377,34 +466,74 @@ fn parse_details(details: &str) -> Result<std::collections::HashMap<String, Stri
}
} else {
// Last key - value goes until " user-agent" or end
details
.find(" user-agent")
.unwrap_or(details.len())
details.find(" user-agent").unwrap_or(details.len())
};
let value = details[value_start..value_end].trim().to_string();
map.insert(key.to_string(), value);
details[value_start..value_end].trim()
}
Ok(map)
/// Parse a number from a string slice without allocation
#[inline]
fn parse_number_str(s: &str) -> Option<i64> {
s.parse().ok()
}
fn parse_number(map: &std::collections::HashMap<String, String>, key: &str) -> Option<i64> {
map.get(key).and_then(|v| v.parse().ok())
/// Parse a boolean from a string slice using ASCII-insensitive matching (no allocation)
#[inline]
fn parse_bool_str(value: &str) -> Option<bool> {
let bytes = value.as_bytes();
match bytes.len() {
1 => match bytes[0] {
b'1' => Some(true),
b'0' => Some(false),
_ => None,
},
2 => {
// "no"
if (bytes[0] == b'n' || bytes[0] == b'N') && (bytes[1] == b'o' || bytes[1] == b'O') {
Some(false)
} else {
None
}
}
3 => {
// "yes"
if (bytes[0] == b'y' || bytes[0] == b'Y')
&& (bytes[1] == b'e' || bytes[1] == b'E')
&& (bytes[2] == b's' || bytes[2] == b'S')
{
Some(true)
} else {
None
}
}
4 => {
// "true"
if (bytes[0] == b't' || bytes[0] == b'T')
&& (bytes[1] == b'r' || bytes[1] == b'R')
&& (bytes[2] == b'u' || bytes[2] == b'U')
&& (bytes[3] == b'e' || bytes[3] == b'E')
{
Some(true)
} else {
None
}
}
5 => {
// "false"
if (bytes[0] == b'f' || bytes[0] == b'F')
&& (bytes[1] == b'a' || bytes[1] == b'A')
&& (bytes[2] == b'l' || bytes[2] == b'L')
&& (bytes[3] == b's' || bytes[3] == b'S')
&& (bytes[4] == b'e' || bytes[4] == b'E')
{
Some(false)
} else {
None
}
}
fn parse_bool(map: &std::collections::HashMap<String, String>, key: &str) -> Option<bool> {
map.get(key).and_then(|value| {
match value.to_lowercase().as_str() {
"yes" | "true" | "1" => Some(true),
"no" | "false" | "0" => Some(false),
_ => None,
}
})
}
fn get_string(map: &std::collections::HashMap<String, String>, key: &str) -> Option<String> {
map.get(key).map(|s| s.to_string())
}
/// Registry of all available message parsers
@@ -477,7 +606,8 @@ mod tests {
#[test]
fn test_parse_non_signature_line() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06", msg="some other message""#;
let line =
r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06", msg="some other message""#;
let registry = ParserRegistry::new();
assert!(registry.parse(line).is_none());
}