From 8fd9ff91409798d2402dcf07e155af8173b39f1f Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Thu, 22 Jan 2026 00:34:32 +0100 Subject: [PATCH] Optimize disk usage --- src/db.rs | 124 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 101 insertions(+), 23 deletions(-) diff --git a/src/db.rs b/src/db.rs index c37ffab..f65969e 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,5 +1,6 @@ use anyhow::Result; use rusqlite::{params, Connection, Transaction}; +use std::collections::HashMap; use crate::parser::SignatureEntry; @@ -22,26 +23,58 @@ impl Database { fn init_schema(&self) -> Result<()> { self.conn.execute_batch( r#" + -- Lookup tables for low-cardinality text columns + CREATE TABLE IF NOT EXISTS apps ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE + ); + + CREATE TABLE IF NOT EXISTS versions ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE + ); + + CREATE TABLE IF NOT EXISTS models ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE + ); + + CREATE TABLE IF NOT EXISTS devices ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE + ); + + CREATE TABLE IF NOT EXISTS os_versions ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE + ); + + CREATE TABLE IF NOT EXISTS app_names ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE + ); + + -- Main table with normalized foreign keys and integer timestamp CREATE TABLE IF NOT EXISTS signature_entries ( - id INTEGER PRIMARY KEY AUTOINCREMENT, session_id TEXT NOT NULL, - timestamp TEXT NOT NULL, - app TEXT NOT NULL, - version TEXT NOT NULL, + timestamp INTEGER NOT NULL, + app_id INTEGER NOT NULL REFERENCES apps(id), + version_id INTEGER NOT NULL REFERENCES versions(id), offline_login_usage INTEGER, is_password_autofill_enabled INTEGER, camera_roll_usage INTEGER, - os TEXT, - app_name TEXT, + os_id INTEGER REFERENCES os_versions(id), + app_name_id INTEGER REFERENCES app_names(id), touch_id INTEGER, is_offline_login_enabled INTEGER, - model TEXT, - device TEXT, - password_autofill_usage INTEGER - ); + model_id INTEGER REFERENCES models(id), + device_id INTEGER REFERENCES devices(id), + password_autofill_usage INTEGER, + PRIMARY KEY (session_id, timestamp) + ) WITHOUT ROWID; CREATE INDEX IF NOT EXISTS idx_session_id ON signature_entries(session_id); - CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version); + CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version_id); "#, )?; Ok(()) @@ -52,32 +85,47 @@ impl Database { } pub fn insert_signature_batch(tx: &Transaction<'_>, entries: &[SignatureEntry]) -> Result<()> { - let mut stmt = tx.prepare_cached( + // Build lookup caches for this batch + let mut app_cache: HashMap = HashMap::new(); + let mut version_cache: HashMap = HashMap::new(); + let mut model_cache: HashMap = HashMap::new(); + let mut device_cache: HashMap = HashMap::new(); + let mut os_cache: HashMap = HashMap::new(); + let mut app_name_cache: HashMap = HashMap::new(); + + let mut insert_stmt = tx.prepare_cached( r#" INSERT INTO signature_entries ( - session_id, timestamp, app, version, + session_id, timestamp, app_id, version_id, offline_login_usage, is_password_autofill_enabled, camera_roll_usage, - os, app_name, touch_id, is_offline_login_enabled, - model, device, password_autofill_usage + os_id, app_name_id, touch_id, is_offline_login_enabled, + model_id, device_id, password_autofill_usage ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) "#, )?; for entry in entries { - stmt.execute(params![ + let app_id = get_or_insert_lookup(tx, &mut app_cache, "apps", &entry.app)?; + let version_id = get_or_insert_lookup(tx, &mut version_cache, "versions", &entry.version)?; + let model_id = entry.model.as_ref().map(|v| get_or_insert_lookup(tx, &mut model_cache, "models", v)).transpose()?; + let device_id = entry.device.as_ref().map(|v| get_or_insert_lookup(tx, &mut device_cache, "devices", v)).transpose()?; + let os_id = entry.os.as_ref().map(|v| get_or_insert_lookup(tx, &mut os_cache, "os_versions", v)).transpose()?; + let app_name_id = entry.app_name.as_ref().map(|v| get_or_insert_lookup(tx, &mut app_name_cache, "app_names", v)).transpose()?; + + insert_stmt.execute(params![ entry.session_id, - entry.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(), - entry.app, - entry.version, + entry.timestamp.and_utc().timestamp(), + app_id, + version_id, entry.offline_login_usage, entry.is_password_autofill_enabled.map(|b| b as i32), entry.camera_roll_usage, - entry.os, - entry.app_name, + os_id, + app_name_id, entry.touch_id.map(|b| b as i32), entry.is_offline_login_enabled.map(|b| b as i32), - entry.model, - entry.device, + model_id, + device_id, entry.password_autofill_usage, ])?; } @@ -85,3 +133,33 @@ impl Database { Ok(()) } } + +/// Get or insert a value into a lookup table, using a cache to minimize DB queries +fn get_or_insert_lookup( + tx: &Transaction<'_>, + cache: &mut HashMap, + table: &str, + value: &str, +) -> Result { + if let Some(&id) = cache.get(value) { + return Ok(id); + } + + // Try to find existing entry + let query = format!("SELECT id FROM {} WHERE name = ?", table); + let existing: Option = tx + .query_row(&query, params![value], |row| row.get(0)) + .ok(); + + if let Some(id) = existing { + cache.insert(value.to_string(), id); + return Ok(id); + } + + // Insert new entry + let insert = format!("INSERT INTO {} (name) VALUES (?)", table); + tx.execute(&insert, params![value])?; + let id = tx.last_insert_rowid(); + cache.insert(value.to_string(), id); + Ok(id) +}