Add log ingestion tool for loading signature logs into SQLite

- Parse signature messages from log files extracting app info, device
  details, and feature flags (autofill, touchID, offline login, etc.)
- Support both plain .log and gzip compressed .log.gz files
- File discovery by date range (YYYY/mm/dd directory structure)
- Batch inserts for performance with large files (10GB+ per day)
- Index on session_id and version for efficient queries
- Extensible parser architecture via MessageParser trait
- Parallel file processing for multi-day ingestion

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-21 22:34:48 +01:00
commit 169409738f
18 changed files with 2626 additions and 0 deletions

14
.github/dependabot.yaml vendored Normal file
View File

@@ -0,0 +1,14 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: weekly
time: "07:00"
timezone: "Asia/Tokyo"
- package-ecosystem: cargo
directory: /
schedule:
interval: weekly
time: "07:00"
timezone: "Asia/Tokyo"

31
.github/workflows/audit.yaml vendored Normal file
View File

@@ -0,0 +1,31 @@
name: Security audit
on:
schedule:
- cron: "0 0 */3 * *"
push:
branches: [main]
paths:
- "**/Cargo.toml"
- "**/Cargo.lock"
pull_request:
paths:
- "**/Cargo.toml"
- "**/Cargo.lock"
jobs:
audit:
name: Audit
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Install cargo-audit
uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6
with:
tool: cargo-audit
- name: Run audit
run: cargo audit

39
.github/workflows/benchmark.yaml vendored Normal file
View File

@@ -0,0 +1,39 @@
name: Benchmark
on:
push:
branches:
- main
paths:
- '**/*.rs'
- '**/Cargo.toml'
- '**/Cargo.lock'
- '.github/workflows/benchmark.yaml'
permissions:
contents: write
deployments: write
jobs:
benchmark:
name: Run Rust benchmark example
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Toolchain setup
run: rustup toolchain update nightly && rustup default nightly
- name: Run benchmark
run: cargo +nightly bench | tee output.txt
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: Rust Benchmark
tool: 'cargo'
output-file-path: output.txt
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true
# Show alert with commit comment on detecting possible performance regression
alert-threshold: '200%'
comment-on-alert: true
fail-on-alert: true
benchmark-data-dir-path: docs

97
.github/workflows/ci.yaml vendored Normal file
View File

@@ -0,0 +1,97 @@
name: Rust CI
on:
push:
branches: [main]
paths:
- '**/*.rs'
- '**/Cargo.toml'
- '**/Cargo.lock'
- '.github/workflows/ci.yaml'
pull_request:
paths:
- '**/*.rs'
- '**/Cargo.toml'
- '**/Cargo.lock'
- '.github/workflows/ci.yaml'
env:
CARGO_TERM_COLOR: always
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
check:
name: Check
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2
with:
components: rustfmt, clippy
cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }}
- name: Install reviewdog
uses: reviewdog/action-setup@d8a7baabd7f3e8544ee4dbde3ee41d0011c3a93f # v1.5.0
- name: Check format
run: |
cargo fmt --all -- --check
- uses: giraffate/clippy-action@13b9d32482f25d29ead141b79e7e04e7900281e0 # v1.0.1
with:
reporter: 'github-pr-review'
github_token: ${{ secrets.GITHUB_TOKEN }}
fail_on_error: true
filter_mode: nofilter
- name: Build
run: cargo build
test:
name: Test
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
permissions:
contents: write
pull-requests: write
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2
with:
components: llvm-tools-preview
cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }}
- name: Install tools
uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6
with:
tool: cargo-llvm-cov, cargo-nextest
- name: Run test
if: runner.os != 'Linux'
run: |
cargo nextest run
- name: Generate coverage
if: runner.os == 'Linux'
run: cargo llvm-cov nextest --lcov --output-path lcov.info
- name: Upload coverage
if: runner.os == 'Linux'
uses: k1LoW/octocov-action@73d561f65d59e66899ed5c87e4621a913b5d5c20 # v1.5.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -0,0 +1,34 @@
name: Dependabot Auto-merge
on:
pull_request:
types:
- opened
- synchronize
- reopened
permissions:
contents: write
pull-requests: write
jobs:
dependabot-automation:
runs-on: ubuntu-latest
if: ${{ github.actor == 'dependabot[bot]' }}
timeout-minutes: 13
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@21025c705c08248db411dc16f3619e6b5f9ea21a # v2.5.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Approve & enable auto-merge for Dependabot PR
if: |
steps.metadata.outputs.update-type == 'version-update:semver-patch' ||
steps.metadata.outputs.update-type == 'version-update:semver-minor'
run: |
gh pr merge --auto -s "$PR_URL"
env:
PR_URL: ${{ github.event.pull_request.html_url }}
PR_TITLE: ${{ github.event.pull_request.title }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

134
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,134 @@
name: Release
on:
push:
tags:
- 'v*'
workflow_dispatch:
permissions:
contents: write
jobs:
build:
name: Build - ${{ matrix.target }}
strategy:
matrix:
include:
- target: x86_64-unknown-linux-gnu
runner: ubuntu-latest
os: Linux
arch: x86_64
ext: tar.gz
- target: aarch64-unknown-linux-gnu
runner: ubuntu-24.04-arm
os: Linux
arch: arm64
ext: tar.gz
- target: x86_64-apple-darwin
runner: macos-15-intel
os: Darwin
arch: x86_64
ext: tar.gz
- target: aarch64-apple-darwin
runner: macos-latest
os: Darwin
arch: arm64
ext: tar.gz
- target: x86_64-pc-windows-msvc
runner: windows-latest
os: Windows
arch: x86_64
ext: zip
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Setup sccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9
- name: Setup environment variables for sccache
shell: bash
run: |
echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV"
echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
- name: Setup Rust
uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2
with:
rustflags: ""
- name: Get project name
id: project
shell: bash
run: |
name=$(cargo metadata --format-version 1 --no-deps | jq -r '.packages[0].name')
echo "name=$name" >> "$GITHUB_OUTPUT"
- name: Build
run: cargo build --release
- name: Create archive (Unix)
if: matrix.os != 'Windows'
shell: bash
run: |
name="${{ steps.project.outputs.name }}"
archive_name="${name}_${{ matrix.os }}_${{ matrix.arch }}.tar.gz"
tar -czvf "$archive_name" -C target/release "$name"
echo "archive_name=$archive_name" >> "$GITHUB_ENV"
- name: Create archive (Windows)
if: matrix.os == 'Windows'
shell: pwsh
run: |
$name = "${{ steps.project.outputs.name }}"
$archiveName = "${name}_${{ matrix.os }}_${{ matrix.arch }}.zip"
Compress-Archive -Path "target/release/${name}.exe" -DestinationPath $archiveName
echo "archive_name=$archiveName" >> $env:GITHUB_ENV
- name: Upload artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ${{ steps.project.outputs.name }}_${{ matrix.os }}_${{ matrix.arch }}
path: ${{ env.archive_name }}
if-no-files-found: error
release:
name: Release
needs: build
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- name: Download all artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifacts
merge-multiple: true
- name: Generate changelog
id: changelog
run: |
# Get the previous tag
prev_tag=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "")
if [ -n "$prev_tag" ]; then
echo "## Changes since $prev_tag" > changelog.md
echo "" >> changelog.md
git log --pretty=format:"- %s" "$prev_tag"..HEAD >> changelog.md
else
echo "## Initial Release" > changelog.md
fi
- name: Create release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh release create "${{ github.ref_name }}" \
--title "${{ github.ref_name }}" \
--notes-file changelog.md \
artifacts/*

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
/target
.DS_Store
*.db

88
CLAUDE.md Normal file
View File

@@ -0,0 +1,88 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Summary
Log ingestion tool that parses application log files and loads them into SQLite for analysis. Primary use case is analyzing mobile app telemetry - tracking feature adoption (autofill, touchID, offline login), app versions, device models, etc.
### What it does
- Parses log lines containing `signature:` messages with app/device telemetry
- Extracts: sessionId, timestamp, app name, version, OS, model, device, and feature flags
- Converts yes/no to booleans, parses numeric usage counters
- Loads into SQLite with indexes on session_id and version for efficient queries
### Log format
Logs are stored in `<base_dir>/YYYY/MM/DD/<filename>` structure, either as `.log` (plain) or `.log.gz` (gzip compressed). A single day can exceed 10GB.
Example signature message:
```
msg="signature:XAMARIN_APP/5.23.0/ details:offlineLoginUsage:0,isPasswordAutofillEnabled:no,cameraRollUsage:0,OS:26.2.0,appName:App,touchID:yes,isOfflineLoginEnabled:yes,model:iPhone15,3,device:iOS, Apple,passwordAutofillUsage:0 user-agent:..."
```
### Extensibility
The parser uses a `MessageParser` trait allowing new message types to be added. Currently only `signature:` messages are parsed; other message types are skipped.
## Commands
### Build & Run
```bash
# Build
cargo build
# Release build
cargo build --release
# Run with single file
cargo run -- --file /path/to/logs.log --output output.db
# Run with date range
cargo run -- --from 2026/01/20 --to 2026/01/21 --base-dir /var/log/app --filename app.log --output output.db
```
### Testing
```bash
# Run all tests
cargo test
# Fast test execution with cargo-nextest (recommended)
cargo nextest run
# Run a single test
cargo test test_name
```
### Quality Checks
```bash
# Format check
cargo fmt -- --check
# Apply formatting
cargo fmt
# Static analysis with Clippy
cargo clippy
```
## Architecture
### Source Files
- **src/main.rs**: CLI argument parsing (clap), orchestrates file discovery and processing
- **src/parser.rs**: Log line parsing, `MessageParser` trait, `SignatureParser` implementation
- **src/db.rs**: SQLite schema creation, batched inserts
- **src/files.rs**: File discovery by date range, handles .gz and plain files
### Database Schema
Table `signature_entries` with columns: session_id, timestamp, app, version, offline_login_usage, is_password_autofill_enabled, camera_roll_usage, os, app_name, touch_id, is_offline_login_enabled, model, device, password_autofill_usage.
Indexes on `session_id` and `version`.
### Key Design Decisions
- Batched inserts (default 10k rows per transaction) for performance
- Regex-based parsing with lazy static compilation
- Extensible via `MessageParser` trait + `ParsedMessage` enum
## CI/CD Configuration
- **ci.yaml**: Formatting, Clippy, build, and tests
- **audit.yaml**: Security audit for dependencies
- **release.yaml**: Automated release on tag push (cross-platform builds via GoReleaser)

720
Cargo.lock generated Normal file
View File

@@ -0,0 +1,720 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
[[package]]
name = "anstyle-parse"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
dependencies = [
"anstyle",
"once_cell",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "bumpalo"
version = "3.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
[[package]]
name = "cc"
version = "1.2.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "chrono"
version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "clap"
version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "colorchoice"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "find-msvc-tools"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
[[package]]
name = "flate2"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "iana-time-zone"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "js-sys"
version = "0.3.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
[[package]]
name = "libsqlite3-sys"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "947e6816f7825b2b45027c2c32e7085da9934defa535de4a6a46b10a4d5257fa"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "log_ingest"
version = "0.0.1"
dependencies = [
"anyhow",
"chrono",
"clap",
"crossbeam-channel",
"flate2",
"rayon",
"regex",
"rusqlite",
]
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro2"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rusqlite"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a22715a5d6deef63c637207afbe68d0c72c3f8d0022d7cf9714c442d6157606b"
dependencies = [
"bitflags",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"smallvec",
]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "wasm-bindgen"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
dependencies = [
"unicode-ident",
]
[[package]]
name = "windows-core"
version = "0.62.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-result"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

15
Cargo.toml Normal file
View File

@@ -0,0 +1,15 @@
[package]
name = "log_ingest"
version = "0.0.1"
authors = ["Alexandr Mansurov"]
edition = "2024"
[dependencies]
clap = { version = "4.5.42", features = ["derive"] }
rusqlite = { version = "0.35", features = ["bundled"] }
chrono = "0.4"
regex = "1"
flate2 = "1"
anyhow = "1"
rayon = "1"
crossbeam-channel = "0.5"

2
Cross.toml Normal file
View File

@@ -0,0 +1,2 @@
[target.x86_64-unknown-linux-gnu]
image = "ghcr.io/cross-rs/x86_64-unknown-linux-gnu:main"

8
LICENSE Normal file
View File

@@ -0,0 +1,8 @@
The MIT License (MIT)
Copyright (c) 2026, Alexandr Mansurov
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

180
README.md Normal file
View File

@@ -0,0 +1,180 @@
# log_ingest
A Rust CLI tool for loading log files into a SQLite database for analysis.
## Overview
Parses application logs containing signature messages and loads them into SQLite for querying. Designed to handle large log volumes (10GB+ per day) with batched inserts and efficient parsing.
## Features
- Parse `signature:` messages extracting app info, device details, and feature flags
- Support for both plain `.log` and gzip compressed `.log.gz` files
- File discovery by date range using `YYYY/mm/dd` directory structure
- Batched inserts for performance with large files
- Parallel file processing for multi-day ingestion
- Indexed columns (`session_id`, `version`) for efficient queries
- Extensible parser architecture for adding new message types
## Installation
```bash
cargo build --release
```
## Usage
### Process a single file
```bash
log_ingest --file /path/to/logs.log --output output.db
```
### Process a date range
```bash
log_ingest \
--from 2026/01/20 \
--to 2026/01/21 \
--base-dir /var/log/myapp \
--filename app.log \
--output output.db
```
The tool will look for files at `<base-dir>/YYYY/MM/DD/<filename>.gz` or `<base-dir>/YYYY/MM/DD/<filename>` for each day in the range.
### Parallel processing
When processing multiple files, parsing runs in parallel by default using all available CPU cores. A single writer thread handles database inserts to avoid SQLite contention.
```bash
# Use all CPU cores (default)
log_ingest --from 2026/01/01 --to 2026/01/31 ...
# Limit to 4 threads
log_ingest --threads 4 --from 2026/01/01 --to 2026/01/31 ...
# Sequential processing (disable parallelism)
log_ingest --threads 1 --from 2026/01/01 --to 2026/01/31 ...
```
### Options
| Option | Description |
|--------|-------------|
| `--file <PATH>` | Single log file to process |
| `--from <DATE>` | Start date (YYYY/mm/dd) |
| `--to <DATE>` | End date (YYYY/mm/dd) |
| `--base-dir <PATH>` | Base directory containing log files |
| `--filename <NAME>` | Log filename (e.g., `app.log`) |
| `-o, --output <PATH>` | Output SQLite database path |
| `--batch-size <N>` | Batch size for inserts (default: 10000) |
| `--threads <N>` | Number of parallel threads (0 = all cores, 1 = sequential) |
## Database Schema
The schema uses normalized lookup tables to minimize disk usage for large datasets.
```sql
-- Lookup tables for low-cardinality text columns
CREATE TABLE apps (id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE);
CREATE TABLE versions (id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE);
CREATE TABLE models (id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE);
CREATE TABLE devices (id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE);
CREATE TABLE os_versions (id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE);
CREATE TABLE app_names (id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE);
-- Main table with foreign keys and millisecond timestamp
CREATE TABLE signature_entries (
id INTEGER PRIMARY KEY,
session_id TEXT NOT NULL,
timestamp_ms INTEGER NOT NULL, -- Unix epoch milliseconds
app_id INTEGER NOT NULL REFERENCES apps(id),
version_id INTEGER NOT NULL REFERENCES versions(id),
offline_login_usage INTEGER,
is_password_autofill_enabled INTEGER,
camera_roll_usage INTEGER,
os_id INTEGER REFERENCES os_versions(id),
app_name_id INTEGER REFERENCES app_names(id),
touch_id INTEGER,
is_offline_login_enabled INTEGER,
model_id INTEGER REFERENCES models(id),
device_id INTEGER REFERENCES devices(id),
password_autofill_usage INTEGER
);
CREATE INDEX idx_session_id ON signature_entries(session_id);
CREATE INDEX idx_timestamp ON signature_entries(timestamp_ms);
CREATE INDEX idx_version ON signature_entries(version_id);
```
## Example Queries
```sql
-- Percentage of users with password autofill enabled
SELECT
ROUND(100.0 * SUM(is_password_autofill_enabled) / COUNT(*), 2) as pct
FROM signature_entries;
-- Count by app version
SELECT v.name as version, COUNT(*) as cnt
FROM signature_entries se
JOIN versions v ON se.version_id = v.id
GROUP BY v.name
ORDER BY cnt DESC;
-- Device breakdown
SELECT d.name as device, COUNT(*) as cnt
FROM signature_entries se
JOIN devices d ON se.device_id = d.id
GROUP BY d.name;
-- Convert timestamp_ms to readable datetime
SELECT
datetime(timestamp_ms / 1000, 'unixepoch') as timestamp,
session_id
FROM signature_entries
LIMIT 10;
```
## Development
```bash
# Build
cargo build
# Run tests
cargo test
# Format
cargo fmt
# Lint
cargo clippy
```
## Cross-Compilation
To build a Linux x86_64 binary from macOS:
1. Install [cargo-zigbuild](https://github.com/rust-cross/cargo-zigbuild) and [Zig](https://ziglang.org/):
```bash
cargo install cargo-zigbuild
brew install zig
```
2. Add the Linux target:
```bash
rustup target add x86_64-unknown-linux-gnu
```
3. Build:
```bash
cargo zigbuild --release --target x86_64-unknown-linux-gnu
```
The binary will be at `target/x86_64-unknown-linux-gnu/release/log_ingest`.
## License
MIT

2
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,2 @@
[toolchain]
channel = "1.90"

166
src/db.rs Normal file
View File

@@ -0,0 +1,166 @@
use anyhow::Result;
use rusqlite::{params, Connection, Transaction};
use std::collections::HashMap;
use crate::parser::SignatureEntry;
pub struct Database {
conn: Connection,
}
impl Database {
pub fn new(path: &str) -> Result<Self> {
let conn = Connection::open(path)?;
// Enable WAL mode for better concurrent read/write performance
conn.pragma_update(None, "journal_mode", "WAL")?;
let db = Self { conn };
db.init_schema()?;
Ok(db)
}
fn init_schema(&self) -> Result<()> {
self.conn.execute_batch(
r#"
-- Lookup tables for low-cardinality text columns
CREATE TABLE IF NOT EXISTS apps (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS versions (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS models (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS devices (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS os_versions (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS app_names (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
-- Main table with normalized foreign keys and integer timestamp
CREATE TABLE IF NOT EXISTS signature_entries (
id INTEGER PRIMARY KEY,
session_id TEXT NOT NULL,
timestamp_ms INTEGER NOT NULL,
app_id INTEGER NOT NULL REFERENCES apps(id),
version_id INTEGER NOT NULL REFERENCES versions(id),
offline_login_usage INTEGER,
is_password_autofill_enabled INTEGER,
camera_roll_usage INTEGER,
os_id INTEGER REFERENCES os_versions(id),
app_name_id INTEGER REFERENCES app_names(id),
touch_id INTEGER,
is_offline_login_enabled INTEGER,
model_id INTEGER REFERENCES models(id),
device_id INTEGER REFERENCES devices(id),
password_autofill_usage INTEGER
);
CREATE INDEX IF NOT EXISTS idx_session_id ON signature_entries(session_id);
CREATE INDEX IF NOT EXISTS idx_timestamp ON signature_entries(timestamp_ms);
CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version_id);
"#,
)?;
Ok(())
}
pub fn begin_transaction(&mut self) -> Result<Transaction<'_>> {
Ok(self.conn.transaction()?)
}
pub fn insert_signature_batch(tx: &Transaction<'_>, entries: &[SignatureEntry]) -> Result<()> {
// Build lookup caches for this batch
let mut app_cache: HashMap<String, i64> = HashMap::new();
let mut version_cache: HashMap<String, i64> = HashMap::new();
let mut model_cache: HashMap<String, i64> = HashMap::new();
let mut device_cache: HashMap<String, i64> = HashMap::new();
let mut os_cache: HashMap<String, i64> = HashMap::new();
let mut app_name_cache: HashMap<String, i64> = HashMap::new();
let mut insert_stmt = tx.prepare_cached(
r#"
INSERT INTO signature_entries (
id, session_id, timestamp_ms, app_id, version_id,
offline_login_usage, is_password_autofill_enabled, camera_roll_usage,
os_id, app_name_id, touch_id, is_offline_login_enabled,
model_id, device_id, password_autofill_usage
) VALUES (NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#,
)?;
for entry in entries {
let app_id = get_or_insert_lookup(tx, &mut app_cache, "apps", &entry.app)?;
let version_id = get_or_insert_lookup(tx, &mut version_cache, "versions", &entry.version)?;
let model_id = entry.model.as_ref().map(|v| get_or_insert_lookup(tx, &mut model_cache, "models", v)).transpose()?;
let device_id = entry.device.as_ref().map(|v| get_or_insert_lookup(tx, &mut device_cache, "devices", v)).transpose()?;
let os_id = entry.os.as_ref().map(|v| get_or_insert_lookup(tx, &mut os_cache, "os_versions", v)).transpose()?;
let app_name_id = entry.app_name.as_ref().map(|v| get_or_insert_lookup(tx, &mut app_name_cache, "app_names", v)).transpose()?;
insert_stmt.execute(params![
entry.session_id,
entry.timestamp_ms,
app_id,
version_id,
entry.offline_login_usage,
entry.is_password_autofill_enabled.map(|b| b as i32),
entry.camera_roll_usage,
os_id,
app_name_id,
entry.touch_id.map(|b| b as i32),
entry.is_offline_login_enabled.map(|b| b as i32),
model_id,
device_id,
entry.password_autofill_usage,
])?;
}
Ok(())
}
}
/// Get or insert a value into a lookup table, using a cache to minimize DB queries
fn get_or_insert_lookup(
tx: &Transaction<'_>,
cache: &mut HashMap<String, i64>,
table: &str,
value: &str,
) -> Result<i64> {
if let Some(&id) = cache.get(value) {
return Ok(id);
}
// Try to find existing entry
let query = format!("SELECT id FROM {} WHERE name = ?", table);
let existing: Option<i64> = tx
.query_row(&query, params![value], |row| row.get(0))
.ok();
if let Some(id) = existing {
cache.insert(value.to_string(), id);
return Ok(id);
}
// Insert new entry
let insert = format!("INSERT INTO {} (name) VALUES (?)", table);
tx.execute(&insert, params![value])?;
let id = tx.last_insert_rowid();
cache.insert(value.to_string(), id);
Ok(id)
}

97
src/files.rs Normal file
View File

@@ -0,0 +1,97 @@
use anyhow::{anyhow, Result};
use chrono::NaiveDate;
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
/// Discovers log files for a given date range
pub struct LogFileDiscovery {
base_dir: PathBuf,
filename: String,
}
impl LogFileDiscovery {
pub fn new(base_dir: PathBuf, filename: String) -> Self {
Self { base_dir, filename }
}
/// Returns an iterator over all log files in the date range
pub fn discover(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LogFile>> {
let mut files = Vec::new();
let mut current = from;
while current <= to {
if let Some(log_file) = self.find_log_for_date(current)? {
files.push(log_file);
}
current = current
.succ_opt()
.ok_or_else(|| anyhow!("Date overflow"))?;
}
Ok(files)
}
fn find_log_for_date(&self, date: NaiveDate) -> Result<Option<LogFile>> {
// Build path: <base_dir>/yyyy/mm/dd/<filename>.gz or <filename>
let date_path = self
.base_dir
.join(date.format("%Y").to_string())
.join(date.format("%m").to_string())
.join(date.format("%d").to_string());
// Try gzipped first
let gz_path = date_path.join(format!("{}.gz", self.filename));
if gz_path.exists() {
return Ok(Some(LogFile {
path: gz_path,
compressed: true,
}));
}
// Try uncompressed
let plain_path = date_path.join(&self.filename);
if plain_path.exists() {
return Ok(Some(LogFile {
path: plain_path,
compressed: false,
}));
}
// No file found for this date
Ok(None)
}
}
#[derive(Debug)]
pub struct LogFile {
pub path: PathBuf,
pub compressed: bool,
}
impl LogFile {
/// Returns a buffered reader for this log file, handling compression transparently
pub fn reader(&self) -> Result<Box<dyn BufRead>> {
let file = File::open(&self.path)?;
if self.compressed {
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::new(decoder)))
} else {
Ok(Box::new(BufReader::new(file)))
}
}
}
/// For reading a single file directly (e.g., for testing)
pub fn read_log_file(path: &str) -> Result<Box<dyn BufRead>> {
let file = File::open(path)?;
if path.ends_with(".gz") {
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::new(decoder)))
} else {
Ok(Box::new(BufReader::new(file)))
}
}

338
src/main.rs Normal file
View File

@@ -0,0 +1,338 @@
use anyhow::{anyhow, Result};
use chrono::NaiveDate;
use clap::Parser;
use crossbeam_channel::{bounded, Sender};
use rayon::prelude::*;
use std::collections::HashMap;
use std::io::BufRead;
use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::thread;
mod db;
mod files;
mod parser;
use db::Database;
use files::{read_log_file, LogFile, LogFileDiscovery};
use parser::{ParsedMessage, ParserRegistry, SignatureEntry};
#[derive(Parser, Debug)]
#[command(author, version, about = "Load log files into SQLite database")]
struct Args {
/// Start date (YYYY/mm/dd)
#[arg(long)]
from: Option<String>,
/// End date (YYYY/mm/dd)
#[arg(long)]
to: Option<String>,
/// Base directory containing log files
#[arg(long)]
base_dir: Option<PathBuf>,
/// Log filename (without date path, e.g., "app.log")
#[arg(long)]
filename: Option<String>,
/// Single log file to process (alternative to date range)
#[arg(long)]
file: Option<PathBuf>,
/// Output SQLite database path
#[arg(long, short)]
output: String,
/// Batch size for database inserts
#[arg(long, default_value = "10000")]
batch_size: usize,
/// Number of parallel threads for file processing (0 = use all available cores)
#[arg(long, default_value = "0")]
threads: usize,
}
fn parse_date(s: &str) -> Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%Y/%m/%d")
.map_err(|e| anyhow!("Invalid date format '{}': {}. Expected YYYY/mm/dd", s, e))
}
fn main() -> Result<()> {
let args = Args::parse();
// Configure rayon thread pool if threads specified
if args.threads > 0 {
rayon::ThreadPoolBuilder::new()
.num_threads(args.threads)
.build_global()
.ok(); // Ignore error if pool already initialized
}
let use_parallel = args.threads != 1;
if let Some(file_path) = &args.file {
// Process single file (no parallelism needed)
let mut db = Database::new(&args.output)?;
let registry = ParserRegistry::new();
eprintln!("Processing single file: {}", file_path.display());
let reader = read_log_file(file_path.to_str().unwrap())?;
process_reader(reader, &registry, &mut db, args.batch_size)?;
} else {
// Process date range
let from = parse_date(
args.from
.as_ref()
.ok_or_else(|| anyhow!("--from is required when not using --file"))?,
)?;
let to = parse_date(
args.to
.as_ref()
.ok_or_else(|| anyhow!("--to is required when not using --file"))?,
)?;
let base_dir = args
.base_dir
.as_ref()
.ok_or_else(|| anyhow!("--base-dir is required when not using --file"))?;
let filename = args
.filename
.as_ref()
.ok_or_else(|| anyhow!("--filename is required when not using --file"))?;
let discovery = LogFileDiscovery::new(base_dir.clone(), filename.clone());
let log_files = discovery.discover(from, to)?;
if log_files.is_empty() {
eprintln!("No log files found in the specified date range");
return Ok(());
}
eprintln!("Found {} log files to process", log_files.len());
if use_parallel && log_files.len() > 1 {
process_files_parallel(log_files, &args.output, args.batch_size)?;
} else {
process_files_sequential(log_files, &args.output, args.batch_size)?;
}
}
eprintln!("Done!");
Ok(())
}
fn process_files_sequential(
log_files: Vec<LogFile>,
output: &str,
batch_size: usize,
) -> Result<()> {
let mut db = Database::new(output)?;
let registry = ParserRegistry::new();
for log_file in log_files {
eprintln!(
"Processing: {} ({})",
log_file.path.display(),
if log_file.compressed {
"compressed"
} else {
"plain"
}
);
let reader = log_file.reader()?;
process_reader(reader, &registry, &mut db, batch_size)?;
}
Ok(())
}
fn process_files_parallel(log_files: Vec<LogFile>, output: &str, batch_size: usize) -> Result<()> {
let num_threads = rayon::current_num_threads();
eprintln!("Processing {} files with {} threads", log_files.len(), num_threads);
// Channel for sending parsed entries to the DB writer
// Buffer size: enough batches to keep workers busy without excessive memory
let (sender, receiver) = bounded::<Vec<SignatureEntry>>(num_threads * 2);
// Shared counters for progress reporting
let total_lines = Arc::new(AtomicU64::new(0));
let parsed_lines = Arc::new(AtomicU64::new(0));
let error_count = Arc::new(AtomicU64::new(0));
// Spawn DB writer thread
let output_path = output.to_string();
let db_handle = thread::spawn(move || -> Result<()> {
let mut db = Database::new(&output_path)?;
for batch in receiver {
let tx = db.begin_transaction()?;
Database::insert_signature_batch(&tx, &batch)?;
tx.commit()?;
}
Ok(())
});
// Process files in parallel
let result: Result<()> = log_files
.into_par_iter()
.try_for_each(|log_file| {
let file_path = log_file.path.display().to_string();
let compressed = if log_file.compressed { "compressed" } else { "plain" };
eprintln!("Starting: {} ({})", file_path, compressed);
process_file_parallel(
log_file,
&sender,
batch_size,
&total_lines,
&parsed_lines,
&error_count,
)?;
eprintln!("Finished: {}", file_path);
Ok(())
});
// Close the channel so DB writer knows to stop
drop(sender);
// Wait for DB writer to finish
db_handle
.join()
.map_err(|_| anyhow!("DB writer thread panicked"))??;
// Print final stats
eprintln!(
"Total: {} lines read, {} parsed, {} errors",
total_lines.load(Ordering::Relaxed),
parsed_lines.load(Ordering::Relaxed),
error_count.load(Ordering::Relaxed)
);
result
}
fn process_file_parallel(
log_file: LogFile,
sender: &Sender<Vec<SignatureEntry>>,
batch_size: usize,
total_lines: &AtomicU64,
parsed_lines: &AtomicU64,
error_count: &AtomicU64,
) -> Result<()> {
let registry = ParserRegistry::new();
let reader = log_file.reader()?;
let mut batch: Vec<SignatureEntry> = Vec::with_capacity(batch_size);
let mut file_lines = 0u64;
let mut file_parsed = 0u64;
let mut file_errors = 0u64;
for line_result in reader.lines() {
let line = line_result?;
file_lines += 1;
if let Some(parse_result) = registry.parse(&line) {
match parse_result {
Ok(ParsedMessage::Signature(entry)) => {
batch.push(entry);
file_parsed += 1;
if batch.len() >= batch_size {
sender.send(std::mem::replace(
&mut batch,
Vec::with_capacity(batch_size),
))?;
}
}
Err(_) => {
file_errors += 1;
}
}
}
}
// Send remaining entries
if !batch.is_empty() {
sender.send(batch)?;
}
// Update shared counters
total_lines.fetch_add(file_lines, Ordering::Relaxed);
parsed_lines.fetch_add(file_parsed, Ordering::Relaxed);
error_count.fetch_add(file_errors, Ordering::Relaxed);
Ok(())
}
fn process_reader(
reader: Box<dyn BufRead>,
registry: &ParserRegistry,
db: &mut Database,
batch_size: usize,
) -> Result<()> {
let mut signature_batch: Vec<SignatureEntry> = Vec::with_capacity(batch_size);
let mut total_lines = 0u64;
let mut parsed_lines = 0u64;
let mut error_counts: HashMap<String, u64> = HashMap::new();
for line_result in reader.lines() {
let line = line_result?;
total_lines += 1;
if let Some(parse_result) = registry.parse(&line) {
match parse_result {
Ok(ParsedMessage::Signature(entry)) => {
signature_batch.push(entry);
parsed_lines += 1;
if signature_batch.len() >= batch_size {
flush_signature_batch(db, &mut signature_batch)?;
}
}
Err(e) => {
*error_counts.entry(e.to_string()).or_insert(0) += 1;
}
}
}
if total_lines.is_multiple_of(100_000) {
let total_errors: u64 = error_counts.values().sum();
eprintln!(
"Progress: {} lines read, {} parsed, {} errors",
total_lines, parsed_lines, total_errors
);
}
}
// Flush remaining entries
if !signature_batch.is_empty() {
flush_signature_batch(db, &mut signature_batch)?;
}
let total_errors: u64 = error_counts.values().sum();
eprintln!(
"File complete: {} lines read, {} parsed, {} errors",
total_lines, parsed_lines, total_errors
);
// Print error summary
if !error_counts.is_empty() {
eprintln!("\nParse errors breakdown:");
let mut errors: Vec<_> = error_counts.into_iter().collect();
errors.sort_by(|a, b| b.1.cmp(&a.1)); // Sort by count descending
for (error, count) in errors {
eprintln!(" {} ({}x)", error, count);
}
}
Ok(())
}
fn flush_signature_batch(db: &mut Database, batch: &mut Vec<SignatureEntry>) -> Result<()> {
let tx = db.begin_transaction()?;
Database::insert_signature_batch(&tx, batch)?;
tx.commit()?;
batch.clear();
Ok(())
}

658
src/parser.rs Normal file
View File

@@ -0,0 +1,658 @@
use anyhow::{anyhow, Result};
use chrono::NaiveDateTime;
use regex::Regex;
use std::sync::LazyLock;
/// Represents a parsed signature log entry
#[derive(Debug, Clone, PartialEq)]
pub struct SignatureEntry {
pub session_id: String,
/// Timestamp as milliseconds since Unix epoch (UTC)
pub timestamp_ms: i64,
pub app: String,
pub version: String,
pub offline_login_usage: Option<i64>,
pub is_password_autofill_enabled: Option<bool>,
pub camera_roll_usage: Option<i64>,
pub os: Option<String>,
pub app_name: Option<String>,
pub touch_id: Option<bool>,
pub is_offline_login_enabled: Option<bool>,
pub model: Option<String>,
pub device: Option<String>,
pub password_autofill_usage: Option<i64>,
}
/// Trait for parsing different message types from logs.
/// Implement this trait to add support for new message formats.
pub trait MessageParser: Send + Sync {
/// Attempts to parse a log line. Returns None if this parser doesn't handle this message type.
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>>;
}
/// Enum of all possible parsed message types.
/// Extend this when adding new message parsers.
#[derive(Debug, Clone)]
pub enum ParsedMessage {
Signature(SignatureEntry),
}
static SESSION_ID_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"sessionId=([^,\s]+)").unwrap());
static DATETIME_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"dt="(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})(?:,(\d{3}))?"#).unwrap());
static CORRELATION_ID_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"correlationId=([^,\s]+)").unwrap());
static SIGNATURE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"msg="signature:([^/]+)/([^/]*)/\s*details:([^"]+)"#).unwrap());
// iOS mobile client: signature:appId/version/deviceId details:...
static MOBILE_IOS_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"msg="MOBILE_CLIENT_LOG: signature:([^/]+)/([^/]+)/([^\s]+)\s+details:([^"]+)"#)
.unwrap()
});
// Android mobile client: signature:appId/version/{json} details:...
static MOBILE_ANDROID_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"msg="MOBILE_CLIENT_LOG: signature:([^/]+)/([^/]+)/(\{[^}]+\})\s+details:([^"]+)"#)
.unwrap()
});
pub struct SignatureParser;
impl MessageParser for SignatureParser {
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> {
// Check if this line contains a signature message (but not MOBILE_CLIENT_LOG)
if !line.contains("msg=\"signature:") {
return None;
}
// Skip non-mobile client messages
if line.contains("msg=\"signature:WEB_CLIENT")
|| line.contains("msg=\"signature:SYNC_CLIENT")
|| line.contains("msg=\"signature:BROWSER_EXTENSION")
{
return None;
}
Some(self.parse_signature_line(line))
}
}
impl SignatureParser {
fn parse_signature_line(&self, line: &str) -> Result<ParsedMessage> {
// Extract session ID
let session_id = SESSION_ID_RE
.captures(line)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.ok_or_else(|| anyhow!("Missing sessionId"))?;
// Extract timestamp as milliseconds
let timestamp_ms = extract_timestamp_ms(line)?;
// Extract signature details
let caps = SIGNATURE_RE
.captures(line)
.ok_or_else(|| anyhow!("Invalid signature format"))?;
let app = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
let details_str = caps.get(3).map(|m| m.as_str()).unwrap();
// Parse details key-value pairs
// Handle the tricky "device:iOS, Apple" case by parsing carefully
let details = parse_details(details_str)?;
let entry = SignatureEntry {
session_id,
timestamp_ms,
app,
version,
offline_login_usage: parse_number(&details, "offlineLoginUsage"),
is_password_autofill_enabled: parse_bool(&details, "isPasswordAutofillEnabled"),
camera_roll_usage: parse_number(&details, "cameraRollUsage"),
os: get_string(&details, "OS"),
app_name: get_string(&details, "appName"),
touch_id: parse_bool(&details, "touchID"),
is_offline_login_enabled: parse_bool(&details, "isOfflineLoginEnabled"),
model: get_string(&details, "model"),
device: get_string(&details, "device"),
password_autofill_usage: parse_number(&details, "passwordAutofillUsage"),
};
Ok(ParsedMessage::Signature(entry))
}
}
/// Parser for iOS MOBILE_CLIENT_LOG messages
pub struct MobileClientIosParser;
impl MessageParser for MobileClientIosParser {
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> {
if !line.contains("MOBILE_CLIENT_LOG:") || !line.contains("sdk-client:IOS") {
return None;
}
Some(self.parse_mobile_ios_line(line))
}
}
impl MobileClientIosParser {
fn parse_mobile_ios_line(&self, line: &str) -> Result<ParsedMessage> {
let timestamp_ms = extract_timestamp_ms(line)?;
let session_id = extract_correlation_id(line)?;
let caps = MOBILE_IOS_RE
.captures(line)
.ok_or_else(|| anyhow!("Invalid iOS mobile client format"))?;
let app = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
let details_str = caps.get(4).map(|m| m.as_str()).unwrap();
let details = parse_mobile_details(details_str);
let entry = SignatureEntry {
session_id,
timestamp_ms,
app,
version,
offline_login_usage: None,
is_password_autofill_enabled: None,
camera_roll_usage: None,
os: get_string(&details, "os"),
app_name: get_string(&details, "app-name"),
touch_id: None,
is_offline_login_enabled: None,
model: get_string(&details, "model"),
device: Some("iOS".to_string()),
password_autofill_usage: None,
};
Ok(ParsedMessage::Signature(entry))
}
}
/// Parser for Android MOBILE_CLIENT_LOG messages
pub struct MobileClientAndroidParser;
impl MessageParser for MobileClientAndroidParser {
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> {
if !line.contains("MOBILE_CLIENT_LOG:") || !line.contains("sdk-client:ANDROID") {
return None;
}
Some(self.parse_mobile_android_line(line))
}
}
impl MobileClientAndroidParser {
fn parse_mobile_android_line(&self, line: &str) -> Result<ParsedMessage> {
let timestamp_ms = extract_timestamp_ms(line)?;
let session_id = extract_correlation_id(line)?;
let caps = MOBILE_ANDROID_RE
.captures(line)
.ok_or_else(|| anyhow!("Invalid Android mobile client format"))?;
let app = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
let details_str = caps.get(4).map(|m| m.as_str()).unwrap();
let details = parse_mobile_details_android(details_str);
let entry = SignatureEntry {
session_id,
timestamp_ms,
app,
version,
offline_login_usage: None,
is_password_autofill_enabled: None,
camera_roll_usage: None,
os: get_string(&details, "os"),
app_name: Some("native Android".to_string()),
touch_id: None,
is_offline_login_enabled: None,
model: get_string(&details, "model"),
device: get_string(&details, "device"),
password_autofill_usage: None,
};
Ok(ParsedMessage::Signature(entry))
}
}
/// Extract timestamp from log line as milliseconds since Unix epoch
fn extract_timestamp_ms(line: &str) -> Result<i64> {
let caps = DATETIME_RE
.captures(line)
.ok_or_else(|| anyhow!("Missing datetime"))?;
let datetime_str = caps.get(1).map(|m| m.as_str()).unwrap();
let millis: i64 = caps
.get(2)
.map(|m| m.as_str().parse().unwrap_or(0))
.unwrap_or(0);
let dt = NaiveDateTime::parse_from_str(datetime_str, "%Y-%m-%d %H:%M:%S")
.map_err(|e| anyhow!("Invalid datetime format: {}", e))?;
Ok(dt.and_utc().timestamp_millis() + millis)
}
/// Extract correlation ID as session ID for mobile client logs
fn extract_correlation_id(line: &str) -> Result<String> {
CORRELATION_ID_RE
.captures(line)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.ok_or_else(|| anyhow!("Missing correlationId"))
}
/// Parse mobile client details for iOS (simple comma-separated key:value)
fn parse_mobile_details(details: &str) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
// Keys for iOS mobile client
let known_keys = ["sdk-client", "sdk-version", "app-name", "device", "model", "os"];
let mut key_positions: Vec<(usize, &str)> = known_keys
.iter()
.filter_map(|&key| {
let pattern = format!("{}:", key);
details.find(&pattern).map(|pos| (pos, key))
})
.collect();
key_positions.sort_by_key(|&(pos, _)| pos);
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value_start = pos + key.len() + 1;
let value_end = if i + 1 < key_positions.len() {
let next_pos = key_positions[i + 1].0;
if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') {
next_pos - 1
} else {
next_pos
}
} else {
details.find(" user-agent").unwrap_or(details.len())
};
let value = details[value_start..value_end].trim().to_string();
map.insert(key.to_string(), value);
}
map
}
/// Parse mobile client details for Android (handles device with commas)
fn parse_mobile_details_android(details: &str) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
// For Android, device can contain commas like "Android, samsung"
// Keys in order: sdk-client, sdk-version, app-name, device, model, os
let known_keys = ["sdk-client", "sdk-version", "app-name", "device", "model", "os"];
let mut key_positions: Vec<(usize, &str)> = known_keys
.iter()
.filter_map(|&key| {
let pattern = format!("{}:", key);
details.find(&pattern).map(|pos| (pos, key))
})
.collect();
key_positions.sort_by_key(|&(pos, _)| pos);
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value_start = pos + key.len() + 1;
let value_end = if i + 1 < key_positions.len() {
let next_pos = key_positions[i + 1].0;
// Find the comma before the next key
if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') {
next_pos - 1
} else {
next_pos
}
} else {
details.find(" user-agent").unwrap_or(details.len())
};
let value = details[value_start..value_end].trim().to_string();
map.insert(key.to_string(), value);
}
map
}
/// Parse the details string which has format like:
/// offlineLoginUsage:0,isPasswordAutofillEnabled:no,...,device:iOS, Apple,passwordAutofillUsage:0
fn parse_details(details: &str) -> Result<std::collections::HashMap<String, String>> {
let mut map = std::collections::HashMap::new();
// Known keys in order they appear
let known_keys = [
"offlineLoginUsage",
"isPasswordAutofillEnabled",
"cameraRollUsage",
"OS",
"appName",
"touchID",
"isOfflineLoginEnabled",
"model",
"device",
"passwordAutofillUsage",
];
// Find positions of each key
let mut key_positions: Vec<(usize, &str)> = known_keys
.iter()
.filter_map(|&key| {
let pattern = format!("{}:", key);
details.find(&pattern).map(|pos| (pos, key))
})
.collect();
// Sort by position
key_positions.sort_by_key(|&(pos, _)| pos);
// Extract values between keys
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value_start = pos + key.len() + 1; // +1 for ':'
let value_end = if i + 1 < key_positions.len() {
// Value ends at the comma before the next key
let next_pos = key_positions[i + 1].0;
// Find the comma before the next key
if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') {
next_pos - 1
} else {
next_pos
}
} else {
// Last key - value goes until " user-agent" or end
details
.find(" user-agent")
.unwrap_or(details.len())
};
let value = details[value_start..value_end].trim().to_string();
map.insert(key.to_string(), value);
}
Ok(map)
}
fn parse_number(map: &std::collections::HashMap<String, String>, key: &str) -> Option<i64> {
map.get(key).and_then(|v| v.parse().ok())
}
fn parse_bool(map: &std::collections::HashMap<String, String>, key: &str) -> Option<bool> {
map.get(key).and_then(|value| {
match value.to_lowercase().as_str() {
"yes" | "true" | "1" => Some(true),
"no" | "false" | "0" => Some(false),
_ => None,
}
})
}
fn get_string(map: &std::collections::HashMap<String, String>, key: &str) -> Option<String> {
map.get(key).map(|s| s.to_string())
}
/// Registry of all available message parsers
pub struct ParserRegistry {
parsers: Vec<Box<dyn MessageParser>>,
}
impl ParserRegistry {
pub fn new() -> Self {
let mut registry = Self {
parsers: Vec::new(),
};
// Register default parsers (order matters - more specific first)
registry.register(Box::new(MobileClientIosParser));
registry.register(Box::new(MobileClientAndroidParser));
registry.register(Box::new(SignatureParser));
registry
}
pub fn register(&mut self, parser: Box<dyn MessageParser>) {
self.parsers.push(parser);
}
/// Try to parse a line with all registered parsers
pub fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> {
for parser in &self.parsers {
if let Some(result) = parser.parse(line) {
return Some(result);
}
}
None
}
}
impl Default for ParserRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_signature_line() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", ll=INFO, lc=CreateSessionStep, threadId=188, externalUserId=null, clientIp=***, correlationId=***, sessionId=test-session-123, request_id=[***]***.*** t@tid@.pnull_uid_X_2631582, userId=X, msg="signature:XAMARIN_APP/5.23.0/ details:offlineLoginUsage:0,isPasswordAutofillEnabled:no,cameraRollUsage:0,OS:26.2.0,appName:App,touchID:no,isOfflineLoginEnabled:yes,model:iPhone18,1,device:iOS, Apple,passwordAutofillUsage:0 user-agent:mobileApp/5.23.0", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.session_id, "test-session-123");
assert_eq!(entry.app, "XAMARIN_APP");
assert_eq!(entry.version, "5.23.0");
assert_eq!(entry.offline_login_usage, Some(0));
assert_eq!(entry.is_password_autofill_enabled, Some(false));
assert_eq!(entry.camera_roll_usage, Some(0));
assert_eq!(entry.os, Some("26.2.0".to_string()));
assert_eq!(entry.app_name, Some("App".to_string()));
assert_eq!(entry.touch_id, Some(false));
assert_eq!(entry.is_offline_login_enabled, Some(true));
assert_eq!(entry.model, Some("iPhone18,1".to_string()));
assert_eq!(entry.device, Some("iOS, Apple".to_string()));
assert_eq!(entry.password_autofill_usage, Some(0));
}
}
}
#[test]
fn test_parse_non_signature_line() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06", msg="some other message""#;
let registry = ParserRegistry::new();
assert!(registry.parse(line).is_none());
}
#[test]
fn test_parse_signature_with_missing_offline_login_usage() {
// Line missing offlineLoginUsage field
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-123, msg="signature:XAMARIN_APP/5.23.0/ details:isPasswordAutofillEnabled:yes,cameraRollUsage:1,OS:26.2.0,appName:App,touchID:yes,isOfflineLoginEnabled:no,model:iPhone15,3,device:iOS, Apple,passwordAutofillUsage:2 user-agent:test", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.session_id, "test-123");
assert_eq!(entry.app, "XAMARIN_APP");
assert_eq!(entry.version, "5.23.0");
// Missing field should be None
assert_eq!(entry.offline_login_usage, None);
// Other fields should be present
assert_eq!(entry.is_password_autofill_enabled, Some(true));
assert_eq!(entry.camera_roll_usage, Some(1));
assert_eq!(entry.os, Some("26.2.0".to_string()));
assert_eq!(entry.app_name, Some("App".to_string()));
assert_eq!(entry.touch_id, Some(true));
assert_eq!(entry.is_offline_login_enabled, Some(false));
assert_eq!(entry.model, Some("iPhone15,3".to_string()));
assert_eq!(entry.device, Some("iOS, Apple".to_string()));
assert_eq!(entry.password_autofill_usage, Some(2));
}
}
}
#[test]
fn test_parse_signature_with_missing_password_autofill_usage() {
// Line missing passwordAutofillUsage (truncated log line scenario)
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-456, msg="signature:XAMARIN_APP/5.23.0/ details:offlineLoginUsage:0,isPasswordAutofillEnabled:no,cameraRollUsage:0,OS:16.0.0,appName:App,touchID:yes,isOfflineLoginEnabled:yes,model:SM-S938B,device:Android", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.session_id, "test-456");
assert_eq!(entry.app, "XAMARIN_APP");
// passwordAutofillUsage missing
assert_eq!(entry.password_autofill_usage, None);
// Other fields present
assert_eq!(entry.offline_login_usage, Some(0));
assert_eq!(entry.device, Some("Android".to_string()));
}
}
}
#[test]
fn test_parse_signature_with_multiple_missing_fields() {
// Line missing multiple fields
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-789, msg="signature:XAMARIN_APP/5.23.0/ details:OS:26.2.0,appName:App,model:iPhone18,1 user-agent:test", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.session_id, "test-789");
assert_eq!(entry.app, "XAMARIN_APP");
assert_eq!(entry.version, "5.23.0");
// Missing fields should be None
assert_eq!(entry.offline_login_usage, None);
assert_eq!(entry.is_password_autofill_enabled, None);
assert_eq!(entry.camera_roll_usage, None);
assert_eq!(entry.touch_id, None);
assert_eq!(entry.is_offline_login_enabled, None);
assert_eq!(entry.device, None);
assert_eq!(entry.password_autofill_usage, None);
// Present fields should have values
assert_eq!(entry.os, Some("26.2.0".to_string()));
assert_eq!(entry.app_name, Some("App".to_string()));
assert_eq!(entry.model, Some("iPhone18,1".to_string()));
}
}
}
#[test]
fn test_web_client_signature_is_skipped() {
// WEB_CLIENT signature messages should be skipped (return None)
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-123, msg="signature:WEB_CLIENT/1.0.0/ details:browser:Chrome,OS:Windows user-agent:test", ex=""#;
let registry = ParserRegistry::new();
assert!(registry.parse(line).is_none());
}
#[test]
fn test_web_client_with_version_is_skipped() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-456, msg="signature:WEB_CLIENT/2.5.0/ details:something:value user-agent:test", ex=""#;
let registry = ParserRegistry::new();
assert!(registry.parse(line).is_none());
}
#[test]
fn test_sync_client_signature_is_skipped() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-789, msg="signature:SYNC_CLIENT/1.2.0/ details:platform:Windows,OS:10.0 user-agent:test", ex=""#;
let registry = ParserRegistry::new();
assert!(registry.parse(line).is_none());
}
#[test]
fn test_browser_extension_signature_is_skipped() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-abc, msg="signature:BROWSER_EXTENSION/3.0.1/ details:browser:Firefox,OS:MacOS user-agent:test", ex=""#;
let registry = ParserRegistry::new();
assert!(registry.parse(line).is_none());
}
#[test]
fn test_parse_mobile_client_ios() {
let line = r#"Jan 21 00:01:55 tom012 m1s-kv dt="2026-01-21 00:01:55,573", ll=INFO, lc=com.xyz.domains.user.controllers.UserControllerV1, correlationId=aXAJY9JuqalC5uxAaB6EsgAAALo, applicationName=ApiGateway, msg="MOBILE_CLIENT_LOG: signature:com.xyz.app/2.1.0/738D8CD5-28BC-490C-AB02-2C309FA64875 details:sdk-client:IOS,sdk-version:1.4.0,app-name:App,device:iOS,model:iPhone14,4,os:26.2 user-agent:26.2", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.session_id, "aXAJY9JuqalC5uxAaB6EsgAAALo");
assert_eq!(entry.app, "com.xyz.app");
assert_eq!(entry.version, "2.1.0");
assert_eq!(entry.model, Some("iPhone14,4".to_string()));
assert_eq!(entry.os, Some("26.2".to_string()));
assert_eq!(entry.app_name, Some("App".to_string()));
assert_eq!(entry.device, Some("iOS".to_string()));
// These fields are not present in mobile client logs
assert_eq!(entry.offline_login_usage, None);
assert_eq!(entry.is_password_autofill_enabled, None);
}
}
}
#[test]
fn test_parse_mobile_client_android() {
let line = r#"Jan 21 01:01:59 tom011 m1s-kv dt="2026-01-21 01:01:59,647", ll=INFO, lc=com.xyz.domains.user.controllers.UserControllerV1, correlationId=aXAXdwzcGYAFt6MF_m4tKAAAAg4, applicationName=ApiGateway, msg="MOBILE_CLIENT_LOG: signature:com.xyz.mobile.app/2.1.0/{"client":"2a4f12c97b64948d","version":"1.4.0"} details:sdk-client:ANDROID,sdk-version:1.4.0,app-name:2.1.0,device:Android, samsung,model:SM-A536B,os:16 user-agent:16", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.session_id, "aXAXdwzcGYAFt6MF_m4tKAAAAg4");
assert_eq!(entry.app, "com.xyz.mobile.app");
assert_eq!(entry.version, "2.1.0");
assert_eq!(entry.model, Some("SM-A536B".to_string()));
assert_eq!(entry.os, Some("16".to_string()));
assert_eq!(entry.app_name, Some("native Android".to_string()));
assert_eq!(entry.device, Some("Android, samsung".to_string()));
// These fields are not present in mobile client logs
assert_eq!(entry.offline_login_usage, None);
assert_eq!(entry.is_password_autofill_enabled, None);
}
}
}
#[test]
fn test_xamarin_app_still_parsed() {
// Ensure XAMARIN_APP messages are still parsed after adding WEB_CLIENT filter
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", sessionId=test-session-123, msg="signature:XAMARIN_APP/5.23.0/ details:offlineLoginUsage:0,isPasswordAutofillEnabled:yes,cameraRollUsage:0,OS:26.2.0,appName:App,touchID:yes,isOfflineLoginEnabled:yes,model:iPhone18,1,device:iOS, Apple,passwordAutofillUsage:0 user-agent:test", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.app, "XAMARIN_APP");
assert_eq!(entry.version, "5.23.0");
}
}
}
}