Optimize disk usage

This commit is contained in:
2026-01-22 00:34:32 +01:00
parent 946d0184a1
commit 8fd9ff9140

124
src/db.rs
View File

@@ -1,5 +1,6 @@
use anyhow::Result; use anyhow::Result;
use rusqlite::{params, Connection, Transaction}; use rusqlite::{params, Connection, Transaction};
use std::collections::HashMap;
use crate::parser::SignatureEntry; use crate::parser::SignatureEntry;
@@ -22,26 +23,58 @@ impl Database {
fn init_schema(&self) -> Result<()> { fn init_schema(&self) -> Result<()> {
self.conn.execute_batch( self.conn.execute_batch(
r#" r#"
-- Lookup tables for low-cardinality text columns
CREATE TABLE IF NOT EXISTS apps (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS versions (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS models (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS devices (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS os_versions (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS app_names (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
-- Main table with normalized foreign keys and integer timestamp
CREATE TABLE IF NOT EXISTS signature_entries ( CREATE TABLE IF NOT EXISTS signature_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL, session_id TEXT NOT NULL,
timestamp TEXT NOT NULL, timestamp INTEGER NOT NULL,
app TEXT NOT NULL, app_id INTEGER NOT NULL REFERENCES apps(id),
version TEXT NOT NULL, version_id INTEGER NOT NULL REFERENCES versions(id),
offline_login_usage INTEGER, offline_login_usage INTEGER,
is_password_autofill_enabled INTEGER, is_password_autofill_enabled INTEGER,
camera_roll_usage INTEGER, camera_roll_usage INTEGER,
os TEXT, os_id INTEGER REFERENCES os_versions(id),
app_name TEXT, app_name_id INTEGER REFERENCES app_names(id),
touch_id INTEGER, touch_id INTEGER,
is_offline_login_enabled INTEGER, is_offline_login_enabled INTEGER,
model TEXT, model_id INTEGER REFERENCES models(id),
device TEXT, device_id INTEGER REFERENCES devices(id),
password_autofill_usage INTEGER password_autofill_usage INTEGER,
); PRIMARY KEY (session_id, timestamp)
) WITHOUT ROWID;
CREATE INDEX IF NOT EXISTS idx_session_id ON signature_entries(session_id); CREATE INDEX IF NOT EXISTS idx_session_id ON signature_entries(session_id);
CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version); CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version_id);
"#, "#,
)?; )?;
Ok(()) Ok(())
@@ -52,32 +85,47 @@ impl Database {
} }
pub fn insert_signature_batch(tx: &Transaction<'_>, entries: &[SignatureEntry]) -> Result<()> { pub fn insert_signature_batch(tx: &Transaction<'_>, entries: &[SignatureEntry]) -> Result<()> {
let mut stmt = tx.prepare_cached( // Build lookup caches for this batch
let mut app_cache: HashMap<String, i64> = HashMap::new();
let mut version_cache: HashMap<String, i64> = HashMap::new();
let mut model_cache: HashMap<String, i64> = HashMap::new();
let mut device_cache: HashMap<String, i64> = HashMap::new();
let mut os_cache: HashMap<String, i64> = HashMap::new();
let mut app_name_cache: HashMap<String, i64> = HashMap::new();
let mut insert_stmt = tx.prepare_cached(
r#" r#"
INSERT INTO signature_entries ( INSERT INTO signature_entries (
session_id, timestamp, app, version, session_id, timestamp, app_id, version_id,
offline_login_usage, is_password_autofill_enabled, camera_roll_usage, offline_login_usage, is_password_autofill_enabled, camera_roll_usage,
os, app_name, touch_id, is_offline_login_enabled, os_id, app_name_id, touch_id, is_offline_login_enabled,
model, device, password_autofill_usage model_id, device_id, password_autofill_usage
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#, "#,
)?; )?;
for entry in entries { for entry in entries {
stmt.execute(params![ let app_id = get_or_insert_lookup(tx, &mut app_cache, "apps", &entry.app)?;
let version_id = get_or_insert_lookup(tx, &mut version_cache, "versions", &entry.version)?;
let model_id = entry.model.as_ref().map(|v| get_or_insert_lookup(tx, &mut model_cache, "models", v)).transpose()?;
let device_id = entry.device.as_ref().map(|v| get_or_insert_lookup(tx, &mut device_cache, "devices", v)).transpose()?;
let os_id = entry.os.as_ref().map(|v| get_or_insert_lookup(tx, &mut os_cache, "os_versions", v)).transpose()?;
let app_name_id = entry.app_name.as_ref().map(|v| get_or_insert_lookup(tx, &mut app_name_cache, "app_names", v)).transpose()?;
insert_stmt.execute(params![
entry.session_id, entry.session_id,
entry.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(), entry.timestamp.and_utc().timestamp(),
entry.app, app_id,
entry.version, version_id,
entry.offline_login_usage, entry.offline_login_usage,
entry.is_password_autofill_enabled.map(|b| b as i32), entry.is_password_autofill_enabled.map(|b| b as i32),
entry.camera_roll_usage, entry.camera_roll_usage,
entry.os, os_id,
entry.app_name, app_name_id,
entry.touch_id.map(|b| b as i32), entry.touch_id.map(|b| b as i32),
entry.is_offline_login_enabled.map(|b| b as i32), entry.is_offline_login_enabled.map(|b| b as i32),
entry.model, model_id,
entry.device, device_id,
entry.password_autofill_usage, entry.password_autofill_usage,
])?; ])?;
} }
@@ -85,3 +133,33 @@ impl Database {
Ok(()) Ok(())
} }
} }
/// Get or insert a value into a lookup table, using a cache to minimize DB queries
fn get_or_insert_lookup(
tx: &Transaction<'_>,
cache: &mut HashMap<String, i64>,
table: &str,
value: &str,
) -> Result<i64> {
if let Some(&id) = cache.get(value) {
return Ok(id);
}
// Try to find existing entry
let query = format!("SELECT id FROM {} WHERE name = ?", table);
let existing: Option<i64> = tx
.query_row(&query, params![value], |row| row.get(0))
.ok();
if let Some(id) = existing {
cache.insert(value.to_string(), id);
return Ok(id);
}
// Insert new entry
let insert = format!("INSERT INTO {} (name) VALUES (?)", table);
tx.execute(&insert, params![value])?;
let id = tx.last_insert_rowid();
cache.insert(value.to_string(), id);
Ok(id)
}