Add log ingestion tool for loading signature logs into SQLite

- Parse signature messages from log files extracting app info, device
  details, and feature flags (autofill, touchID, offline login, etc.)
- Support both plain .log and gzip compressed .log.gz files
- File discovery by date range (YYYY/mm/dd directory structure)
- Batch inserts for performance with large files (10GB+ per day)
- Index on session_id and version for efficient queries
- Extensible parser architecture via MessageParser trait

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-21 22:34:48 +01:00
commit 30cc4fae01
17 changed files with 1868 additions and 0 deletions

14
.github/dependabot.yaml vendored Normal file
View File

@@ -0,0 +1,14 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: weekly
time: "07:00"
timezone: "Asia/Tokyo"
- package-ecosystem: cargo
directory: /
schedule:
interval: weekly
time: "07:00"
timezone: "Asia/Tokyo"

31
.github/workflows/audit.yaml vendored Normal file
View File

@@ -0,0 +1,31 @@
name: Security audit
on:
schedule:
- cron: "0 0 */3 * *"
push:
branches: [main]
paths:
- "**/Cargo.toml"
- "**/Cargo.lock"
pull_request:
paths:
- "**/Cargo.toml"
- "**/Cargo.lock"
jobs:
audit:
name: Audit
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Install cargo-audit
uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6
with:
tool: cargo-audit
- name: Run audit
run: cargo audit

39
.github/workflows/benchmark.yaml vendored Normal file
View File

@@ -0,0 +1,39 @@
name: Benchmark
on:
push:
branches:
- main
paths:
- '**/*.rs'
- '**/Cargo.toml'
- '**/Cargo.lock'
- '.github/workflows/benchmark.yaml'
permissions:
contents: write
deployments: write
jobs:
benchmark:
name: Run Rust benchmark example
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Toolchain setup
run: rustup toolchain update nightly && rustup default nightly
- name: Run benchmark
run: cargo +nightly bench | tee output.txt
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: Rust Benchmark
tool: 'cargo'
output-file-path: output.txt
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true
# Show alert with commit comment on detecting possible performance regression
alert-threshold: '200%'
comment-on-alert: true
fail-on-alert: true
benchmark-data-dir-path: docs

97
.github/workflows/ci.yaml vendored Normal file
View File

@@ -0,0 +1,97 @@
name: Rust CI
on:
push:
branches: [main]
paths:
- '**/*.rs'
- '**/Cargo.toml'
- '**/Cargo.lock'
- '.github/workflows/ci.yaml'
pull_request:
paths:
- '**/*.rs'
- '**/Cargo.toml'
- '**/Cargo.lock'
- '.github/workflows/ci.yaml'
env:
CARGO_TERM_COLOR: always
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
check:
name: Check
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2
with:
components: rustfmt, clippy
cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }}
- name: Install reviewdog
uses: reviewdog/action-setup@d8a7baabd7f3e8544ee4dbde3ee41d0011c3a93f # v1.5.0
- name: Check format
run: |
cargo fmt --all -- --check
- uses: giraffate/clippy-action@13b9d32482f25d29ead141b79e7e04e7900281e0 # v1.0.1
with:
reporter: 'github-pr-review'
github_token: ${{ secrets.GITHUB_TOKEN }}
fail_on_error: true
filter_mode: nofilter
- name: Build
run: cargo build
test:
name: Test
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
permissions:
contents: write
pull-requests: write
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2
with:
components: llvm-tools-preview
cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }}
- name: Install tools
uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6
with:
tool: cargo-llvm-cov, cargo-nextest
- name: Run test
if: runner.os != 'Linux'
run: |
cargo nextest run
- name: Generate coverage
if: runner.os == 'Linux'
run: cargo llvm-cov nextest --lcov --output-path lcov.info
- name: Upload coverage
if: runner.os == 'Linux'
uses: k1LoW/octocov-action@73d561f65d59e66899ed5c87e4621a913b5d5c20 # v1.5.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -0,0 +1,34 @@
name: Dependabot Auto-merge
on:
pull_request:
types:
- opened
- synchronize
- reopened
permissions:
contents: write
pull-requests: write
jobs:
dependabot-automation:
runs-on: ubuntu-latest
if: ${{ github.actor == 'dependabot[bot]' }}
timeout-minutes: 13
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@21025c705c08248db411dc16f3619e6b5f9ea21a # v2.5.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Approve & enable auto-merge for Dependabot PR
if: |
steps.metadata.outputs.update-type == 'version-update:semver-patch' ||
steps.metadata.outputs.update-type == 'version-update:semver-minor'
run: |
gh pr merge --auto -s "$PR_URL"
env:
PR_URL: ${{ github.event.pull_request.html_url }}
PR_TITLE: ${{ github.event.pull_request.title }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

134
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,134 @@
name: Release
on:
push:
tags:
- 'v*'
workflow_dispatch:
permissions:
contents: write
jobs:
build:
name: Build - ${{ matrix.target }}
strategy:
matrix:
include:
- target: x86_64-unknown-linux-gnu
runner: ubuntu-latest
os: Linux
arch: x86_64
ext: tar.gz
- target: aarch64-unknown-linux-gnu
runner: ubuntu-24.04-arm
os: Linux
arch: arm64
ext: tar.gz
- target: x86_64-apple-darwin
runner: macos-15-intel
os: Darwin
arch: x86_64
ext: tar.gz
- target: aarch64-apple-darwin
runner: macos-latest
os: Darwin
arch: arm64
ext: tar.gz
- target: x86_64-pc-windows-msvc
runner: windows-latest
os: Windows
arch: x86_64
ext: zip
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Setup sccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9
- name: Setup environment variables for sccache
shell: bash
run: |
echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV"
echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
- name: Setup Rust
uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2
with:
rustflags: ""
- name: Get project name
id: project
shell: bash
run: |
name=$(cargo metadata --format-version 1 --no-deps | jq -r '.packages[0].name')
echo "name=$name" >> "$GITHUB_OUTPUT"
- name: Build
run: cargo build --release
- name: Create archive (Unix)
if: matrix.os != 'Windows'
shell: bash
run: |
name="${{ steps.project.outputs.name }}"
archive_name="${name}_${{ matrix.os }}_${{ matrix.arch }}.tar.gz"
tar -czvf "$archive_name" -C target/release "$name"
echo "archive_name=$archive_name" >> "$GITHUB_ENV"
- name: Create archive (Windows)
if: matrix.os == 'Windows'
shell: pwsh
run: |
$name = "${{ steps.project.outputs.name }}"
$archiveName = "${name}_${{ matrix.os }}_${{ matrix.arch }}.zip"
Compress-Archive -Path "target/release/${name}.exe" -DestinationPath $archiveName
echo "archive_name=$archiveName" >> $env:GITHUB_ENV
- name: Upload artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ${{ steps.project.outputs.name }}_${{ matrix.os }}_${{ matrix.arch }}
path: ${{ env.archive_name }}
if-no-files-found: error
release:
name: Release
needs: build
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- name: Download all artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifacts
merge-multiple: true
- name: Generate changelog
id: changelog
run: |
# Get the previous tag
prev_tag=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "")
if [ -n "$prev_tag" ]; then
echo "## Changes since $prev_tag" > changelog.md
echo "" >> changelog.md
git log --pretty=format:"- %s" "$prev_tag"..HEAD >> changelog.md
else
echo "## Initial Release" > changelog.md
fi
- name: Create release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh release create "${{ github.ref_name }}" \
--title "${{ github.ref_name }}" \
--notes-file changelog.md \
artifacts/*

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
/target
.DS_Store
*.db

71
CLAUDE.md Normal file
View File

@@ -0,0 +1,71 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Commands
### Build & Run
```bash
# Build
cargo build
# Release build
cargo build --release
# Run (example)
cargo run -- --name "World"
```
### Testing
```bash
# Run all tests
cargo test
# Fast test execution with cargo-nextest (recommended)
cargo nextest run
# Run a single test
cargo test test_name
# Generate coverage (requires cargo-llvm-cov)
cargo llvm-cov nextest --lcov --output-path lcov.info
```
### Quality Checks
```bash
# Format check
cargo fmt -- --check
# Apply formatting
cargo fmt
# Static analysis with Clippy
cargo clippy
```
### Benchmarks
```bash
# Requires nightly toolchain
cargo +nightly bench
```
## Architecture
### Project Structure
- **cargo-generate template**: This repository is a template for generating new Rust CLI projects
- **CLI parser**: Uses clap v4 derive macros for command-line argument processing
- **Benchmarks**: Located in `benches/` directory, uses nightly compiler's test crate
### CI/CD Configuration
- **ci.yaml**: Main CI workflow
- Runs formatting, Clippy, build, and tests
- Generates coverage on Linux with octocov reporting
- Automatic PR feedback via reviewdog
- **benchmark.yaml**: Auto-deploys benchmark results to GitHub Pages
- **audit.yaml**: Security audit for dependencies
- **release.yaml**: Automated release on tag push (cross-platform builds via GoReleaser)
### Key Settings
- **Rust version**: Fixed to 1.87 in `rust-toolchain.toml`
- **Edition**: Uses Rust 2024 edition
- **Test tools**: cargo-nextest and cargo-llvm-cov recommended

658
Cargo.lock generated Normal file
View File

@@ -0,0 +1,658 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
[[package]]
name = "anstyle-parse"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
dependencies = [
"anstyle",
"once_cell",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "bumpalo"
version = "3.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
[[package]]
name = "cc"
version = "1.2.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "chrono"
version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "clap"
version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "colorchoice"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "find-msvc-tools"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
[[package]]
name = "flate2"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "iana-time-zone"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "js-sys"
version = "0.3.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
[[package]]
name = "libsqlite3-sys"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "947e6816f7825b2b45027c2c32e7085da9934defa535de4a6a46b10a4d5257fa"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "log_ingest"
version = "0.0.1"
dependencies = [
"anyhow",
"chrono",
"clap",
"flate2",
"regex",
"rusqlite",
]
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro2"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rusqlite"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a22715a5d6deef63c637207afbe68d0c72c3f8d0022d7cf9714c442d6157606b"
dependencies = [
"bitflags",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"smallvec",
]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "wasm-bindgen"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
dependencies = [
"unicode-ident",
]
[[package]]
name = "windows-core"
version = "0.62.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-result"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

13
Cargo.toml Normal file
View File

@@ -0,0 +1,13 @@
[package]
name = "log_ingest"
version = "0.0.1"
authors = ["Alexandr Mansurov"]
edition = "2024"
[dependencies]
clap = { version = "4.5.42", features = ["derive"] }
rusqlite = { version = "0.35", features = ["bundled"] }
chrono = "0.4"
regex = "1"
flate2 = "1"
anyhow = "1"

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2025 skanehira
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

120
README.md Normal file
View File

@@ -0,0 +1,120 @@
# Log ingest
This repo is created with cargo generate --git https://github.com/skanehira/rust-cli-template (start with cargo install cargo-generate )
## Overview
This repository serves as a template for quickly bootstrapping Rust command-line
interface (CLI) applications using `cargo-generate`. It provides a minimal yet
comprehensive foundation with the following features:
- CLI argument parsing using [clap](https://github.com/clap-rs/clap) with derive
macros
- GitHub Actions workflow for CI/CD
- Code coverage reporting with [octocov](https://github.com/k1LoW/octocov)
- Automatic benchmark result visualization and deployment with
[github-action-benchmark](https://github.com/benchmark-action/github-action-benchmark)
- Security audit checks for dependencies
- Automated release workflow for publishing
- Automated dependency updates with Dependabot
## Project Structure
Generated projects will have the following structure:
```
.
├── .github/ # GitHub Actions workflows
│ ├── workflows/ # CI/CD workflows for testing, benchmarking, and releasing
│ │ ├── ci.yml # Main CI workflow (tests, linting, coverage)
│ │ ├── audit.yml # Security audit workflow
│ │ └── release.yml # Release automation workflow
│ └── dependabot.yaml # Automated dependency update configuration file
├── benches/ # Benchmark code (requires nightly Rust)
├── src/ # Source code
├── .gitignore # Git ignore file
├── .octocov.yml # Code coverage configuration
├── goreleasser.yaml # GoReleaser configuration file for cross-platform builds and distribution
├── Cargo.toml # Project manifest
├── Cargo.lock # Dependency lock file
└── rust-toolchain.toml # Rust toolchain configuration
```
## Benchmark visualization
The benchmark results are automatically deployed to GitHub Pages for easy
visualization and performance tracking. You need to create a `gh-pages` branch
in your repository before first push.
<img width="1165" alt="image" src="https://github.com/user-attachments/assets/333631e2-dee0-48f9-bc8e-d72c583857de" />
<img width="874" alt="image" src="https://github.com/user-attachments/assets/6a07ea77-1294-422f-abd6-cb3e4281c26e" />
## Coverage
This project uses [octocov](https://github.com/k1LoW/octocov) to measure code
coverage. During CI execution, coverage reports are automatically generated and
displayed as comments on PRs or commits. The coverage history is also tracked,
allowing you to see changes over time.
The coverage reports are deployed to GitHub Pages for easy visualization.
Coverage information can also be displayed in the README as a badge.
<img width="936" alt="image" src="https://github.com/user-attachments/assets/8471d58a-06b3-4fd5-85e6-916959704c69" />
The detailed configuration for octocov is managed in the `.octocov.yml` file.
## Usage
### Prerequisites
- [cargo-generate](https://github.com/cargo-generate/cargo-generate)
- [gh](https://github.com/cli/cli)
### Creating a New Project
Create a new project using this template:
```bash
cargo generate --git https://github.com/skanehira/rust-cli-template.git
```
Follow the prompts to customize your project.
### Running Tests
```bash
cargo test
```
### Running Benchmarks
Benchmarks require the nightly Rust channel:
```bash
cargo +nightly bench
```
### Release Process
This template includes an automated release workflow. Follow these steps to
create a release:
1. Push a tag with your changes:
```bash
git tag v0.1.0 # Replace with the appropriate version number
git push origin v0.1.0
```
2. When the tag is pushed, the GitHub Actions `release.yml` workflow will
automatically execute. This workflow:
- Builds cross-platform binaries (Linux, macOS, Windows)
- Creates a GitHub Release
- Uploads binaries and changelog
The release configuration is managed in the `.github/workflows/release.yml` and
`goreleasser.yaml` files.
---
Feel free to customize this template to fit your specific needs!

2
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,2 @@
[toolchain]
channel = "1.90"

83
src/db.rs Normal file
View File

@@ -0,0 +1,83 @@
use anyhow::Result;
use rusqlite::{params, Connection, Transaction};
use crate::parser::SignatureEntry;
pub struct Database {
conn: Connection,
}
impl Database {
pub fn new(path: &str) -> Result<Self> {
let conn = Connection::open(path)?;
let db = Self { conn };
db.init_schema()?;
Ok(db)
}
fn init_schema(&self) -> Result<()> {
self.conn.execute_batch(
r#"
CREATE TABLE IF NOT EXISTS signature_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
timestamp TEXT NOT NULL,
app TEXT NOT NULL,
version TEXT NOT NULL,
offline_login_usage INTEGER NOT NULL,
is_password_autofill_enabled INTEGER NOT NULL,
camera_roll_usage INTEGER NOT NULL,
os TEXT NOT NULL,
app_name TEXT NOT NULL,
touch_id INTEGER NOT NULL,
is_offline_login_enabled INTEGER NOT NULL,
model TEXT NOT NULL,
device TEXT NOT NULL,
password_autofill_usage INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_session_id ON signature_entries(session_id);
CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version);
"#,
)?;
Ok(())
}
pub fn begin_transaction(&mut self) -> Result<Transaction<'_>> {
Ok(self.conn.transaction()?)
}
pub fn insert_signature_batch(tx: &Transaction<'_>, entries: &[SignatureEntry]) -> Result<()> {
let mut stmt = tx.prepare_cached(
r#"
INSERT INTO signature_entries (
session_id, timestamp, app, version,
offline_login_usage, is_password_autofill_enabled, camera_roll_usage,
os, app_name, touch_id, is_offline_login_enabled,
model, device, password_autofill_usage
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#,
)?;
for entry in entries {
stmt.execute(params![
entry.session_id,
entry.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(),
entry.app,
entry.version,
entry.offline_login_usage,
entry.is_password_autofill_enabled as i32,
entry.camera_roll_usage,
entry.os,
entry.app_name,
entry.touch_id as i32,
entry.is_offline_login_enabled as i32,
entry.model,
entry.device,
entry.password_autofill_usage,
])?;
}
Ok(())
}
}

100
src/files.rs Normal file
View File

@@ -0,0 +1,100 @@
use anyhow::{anyhow, Result};
use chrono::NaiveDate;
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
/// Discovers log files for a given date range
pub struct LogFileDiscovery {
base_dir: PathBuf,
filename: String,
}
impl LogFileDiscovery {
pub fn new(base_dir: PathBuf, filename: String) -> Self {
Self { base_dir, filename }
}
/// Returns an iterator over all log files in the date range
pub fn discover(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LogFile>> {
let mut files = Vec::new();
let mut current = from;
while current <= to {
if let Some(log_file) = self.find_log_for_date(current)? {
files.push(log_file);
}
current = current
.succ_opt()
.ok_or_else(|| anyhow!("Date overflow"))?;
}
Ok(files)
}
fn find_log_for_date(&self, date: NaiveDate) -> Result<Option<LogFile>> {
// Build path: <base_dir>/yyyy/mm/dd/<filename>.gz or <filename>
let date_path = self
.base_dir
.join(date.format("%Y").to_string())
.join(date.format("%m").to_string())
.join(date.format("%d").to_string());
// Try gzipped first
let gz_path = date_path.join(format!("{}.gz", self.filename));
if gz_path.exists() {
return Ok(Some(LogFile {
path: gz_path,
compressed: true,
date,
}));
}
// Try uncompressed
let plain_path = date_path.join(&self.filename);
if plain_path.exists() {
return Ok(Some(LogFile {
path: plain_path,
compressed: false,
date,
}));
}
// No file found for this date
Ok(None)
}
}
#[derive(Debug)]
pub struct LogFile {
pub path: PathBuf,
pub compressed: bool,
pub date: NaiveDate,
}
impl LogFile {
/// Returns a buffered reader for this log file, handling compression transparently
pub fn reader(&self) -> Result<Box<dyn BufRead>> {
let file = File::open(&self.path)?;
if self.compressed {
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::new(decoder)))
} else {
Ok(Box::new(BufReader::new(file)))
}
}
}
/// For reading a single file directly (e.g., for testing)
pub fn read_log_file(path: &str) -> Result<Box<dyn BufRead>> {
let file = File::open(path)?;
if path.ends_with(".gz") {
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::new(decoder)))
} else {
Ok(Box::new(BufReader::new(file)))
}
}

174
src/main.rs Normal file
View File

@@ -0,0 +1,174 @@
use anyhow::{anyhow, Result};
use chrono::NaiveDate;
use clap::Parser;
use std::io::BufRead;
use std::path::PathBuf;
mod db;
mod files;
mod parser;
use db::Database;
use files::{read_log_file, LogFileDiscovery};
use parser::{ParsedMessage, ParserRegistry, SignatureEntry};
#[derive(Parser, Debug)]
#[command(author, version, about = "Load log files into SQLite database")]
struct Args {
/// Start date (YYYY/mm/dd)
#[arg(long)]
from: Option<String>,
/// End date (YYYY/mm/dd)
#[arg(long)]
to: Option<String>,
/// Base directory containing log files
#[arg(long)]
base_dir: Option<PathBuf>,
/// Log filename (without date path, e.g., "app.log")
#[arg(long)]
filename: Option<String>,
/// Single log file to process (alternative to date range)
#[arg(long)]
file: Option<PathBuf>,
/// Output SQLite database path
#[arg(long, short)]
output: String,
/// Batch size for database inserts
#[arg(long, default_value = "10000")]
batch_size: usize,
}
fn parse_date(s: &str) -> Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%Y/%m/%d")
.map_err(|e| anyhow!("Invalid date format '{}': {}. Expected YYYY/mm/dd", s, e))
}
fn main() -> Result<()> {
let args = Args::parse();
let mut db = Database::new(&args.output)?;
let registry = ParserRegistry::new();
if let Some(file_path) = &args.file {
// Process single file
eprintln!("Processing single file: {}", file_path.display());
let reader = read_log_file(file_path.to_str().unwrap())?;
process_reader(reader, &registry, &mut db, args.batch_size)?;
} else {
// Process date range
let from = parse_date(
args.from
.as_ref()
.ok_or_else(|| anyhow!("--from is required when not using --file"))?,
)?;
let to = parse_date(
args.to
.as_ref()
.ok_or_else(|| anyhow!("--to is required when not using --file"))?,
)?;
let base_dir = args
.base_dir
.as_ref()
.ok_or_else(|| anyhow!("--base-dir is required when not using --file"))?;
let filename = args
.filename
.as_ref()
.ok_or_else(|| anyhow!("--filename is required when not using --file"))?;
let discovery = LogFileDiscovery::new(base_dir.clone(), filename.clone());
let log_files = discovery.discover(from, to)?;
if log_files.is_empty() {
eprintln!("No log files found in the specified date range");
return Ok(());
}
eprintln!("Found {} log files to process", log_files.len());
for log_file in log_files {
eprintln!(
"Processing: {} ({})",
log_file.path.display(),
if log_file.compressed {
"compressed"
} else {
"plain"
}
);
let reader = log_file.reader()?;
process_reader(reader, &registry, &mut db, args.batch_size)?;
}
}
eprintln!("Done!");
Ok(())
}
fn process_reader(
reader: Box<dyn BufRead>,
registry: &ParserRegistry,
db: &mut Database,
batch_size: usize,
) -> Result<()> {
let mut signature_batch: Vec<SignatureEntry> = Vec::with_capacity(batch_size);
let mut total_lines = 0u64;
let mut parsed_lines = 0u64;
let mut error_lines = 0u64;
for line_result in reader.lines() {
let line = line_result?;
total_lines += 1;
if let Some(parse_result) = registry.parse(&line) {
match parse_result {
Ok(ParsedMessage::Signature(entry)) => {
signature_batch.push(entry);
parsed_lines += 1;
if signature_batch.len() >= batch_size {
flush_signature_batch(db, &mut signature_batch)?;
}
}
Err(e) => {
error_lines += 1;
if error_lines <= 10 {
eprintln!("Parse error: {}", e);
}
}
}
}
if total_lines % 100_000 == 0 {
eprintln!(
"Progress: {} lines read, {} parsed, {} errors",
total_lines, parsed_lines, error_lines
);
}
}
// Flush remaining entries
if !signature_batch.is_empty() {
flush_signature_batch(db, &mut signature_batch)?;
}
eprintln!(
"File complete: {} lines read, {} parsed, {} errors",
total_lines, parsed_lines, error_lines
);
Ok(())
}
fn flush_signature_batch(db: &mut Database, batch: &mut Vec<SignatureEntry>) -> Result<()> {
let tx = db.begin_transaction()?;
Database::insert_signature_batch(&tx, batch)?;
tx.commit()?;
batch.clear();
Ok(())
}

274
src/parser.rs Normal file
View File

@@ -0,0 +1,274 @@
use anyhow::{anyhow, Result};
use chrono::NaiveDateTime;
use regex::Regex;
use std::sync::LazyLock;
/// Represents a parsed signature log entry
#[derive(Debug, Clone)]
pub struct SignatureEntry {
pub session_id: String,
pub timestamp: NaiveDateTime,
pub app: String,
pub version: String,
pub offline_login_usage: i64,
pub is_password_autofill_enabled: bool,
pub camera_roll_usage: i64,
pub os: String,
pub app_name: String,
pub touch_id: bool,
pub is_offline_login_enabled: bool,
pub model: String,
pub device: String,
pub password_autofill_usage: i64,
}
/// Trait for parsing different message types from logs.
/// Implement this trait to add support for new message formats.
pub trait MessageParser: Send + Sync {
/// Returns the message type identifier (e.g., "signature")
fn message_type(&self) -> &'static str;
/// Attempts to parse a log line. Returns None if this parser doesn't handle this message type.
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>>;
}
/// Enum of all possible parsed message types.
/// Extend this when adding new message parsers.
#[derive(Debug, Clone)]
pub enum ParsedMessage {
Signature(SignatureEntry),
}
static SESSION_ID_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"sessionId=([^,\s]+)").unwrap());
static DATETIME_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"dt="(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})"#).unwrap());
static SIGNATURE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"msg="signature:([^/]+)/([^/]*)/\s*details:([^"]+)"#).unwrap());
pub struct SignatureParser;
impl MessageParser for SignatureParser {
fn message_type(&self) -> &'static str {
"signature"
}
fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> {
// Check if this line contains a signature message
if !line.contains("msg=\"signature:") {
return None;
}
Some(self.parse_signature_line(line))
}
}
impl SignatureParser {
fn parse_signature_line(&self, line: &str) -> Result<ParsedMessage> {
// Extract session ID
let session_id = SESSION_ID_RE
.captures(line)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.ok_or_else(|| anyhow!("Missing sessionId"))?;
// Extract timestamp
let datetime_str = DATETIME_RE
.captures(line)
.and_then(|c| c.get(1))
.map(|m| m.as_str())
.ok_or_else(|| anyhow!("Missing datetime"))?;
let timestamp = NaiveDateTime::parse_from_str(datetime_str, "%Y-%m-%d %H:%M:%S")
.map_err(|e| anyhow!("Invalid datetime format: {}", e))?;
// Extract signature details
let caps = SIGNATURE_RE
.captures(line)
.ok_or_else(|| anyhow!("Invalid signature format"))?;
let app = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
let details_str = caps.get(3).map(|m| m.as_str()).unwrap();
// Parse details key-value pairs
// Handle the tricky "device:iOS, Apple" case by parsing carefully
let details = parse_details(details_str)?;
let entry = SignatureEntry {
session_id,
timestamp,
app,
version,
offline_login_usage: parse_number(&details, "offlineLoginUsage")?,
is_password_autofill_enabled: parse_bool(&details, "isPasswordAutofillEnabled")?,
camera_roll_usage: parse_number(&details, "cameraRollUsage")?,
os: get_string(&details, "OS")?,
app_name: get_string(&details, "appName")?,
touch_id: parse_bool(&details, "touchID")?,
is_offline_login_enabled: parse_bool(&details, "isOfflineLoginEnabled")?,
model: get_string(&details, "model")?,
device: get_string(&details, "device")?,
password_autofill_usage: parse_number(&details, "passwordAutofillUsage")?,
};
Ok(ParsedMessage::Signature(entry))
}
}
/// Parse the details string which has format like:
/// offlineLoginUsage:0,isPasswordAutofillEnabled:no,...,device:iOS, Apple,passwordAutofillUsage:0
fn parse_details(details: &str) -> Result<std::collections::HashMap<String, String>> {
let mut map = std::collections::HashMap::new();
// Known keys in order they appear
let known_keys = [
"offlineLoginUsage",
"isPasswordAutofillEnabled",
"cameraRollUsage",
"OS",
"appName",
"touchID",
"isOfflineLoginEnabled",
"model",
"device",
"passwordAutofillUsage",
];
// Find positions of each key
let mut key_positions: Vec<(usize, &str)> = known_keys
.iter()
.filter_map(|&key| {
let pattern = format!("{}:", key);
details.find(&pattern).map(|pos| (pos, key))
})
.collect();
// Sort by position
key_positions.sort_by_key(|&(pos, _)| pos);
// Extract values between keys
for i in 0..key_positions.len() {
let (pos, key) = key_positions[i];
let value_start = pos + key.len() + 1; // +1 for ':'
let value_end = if i + 1 < key_positions.len() {
// Value ends at the comma before the next key
let next_pos = key_positions[i + 1].0;
// Find the comma before the next key
if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') {
next_pos - 1
} else {
next_pos
}
} else {
// Last key - value goes until " user-agent" or end
details
.find(" user-agent")
.unwrap_or(details.len())
};
let value = details[value_start..value_end].trim().to_string();
map.insert(key.to_string(), value);
}
Ok(map)
}
fn parse_number(map: &std::collections::HashMap<String, String>, key: &str) -> Result<i64> {
map.get(key)
.ok_or_else(|| anyhow!("Missing key: {}", key))?
.parse()
.map_err(|e| anyhow!("Invalid number for {}: {}", key, e))
}
fn parse_bool(map: &std::collections::HashMap<String, String>, key: &str) -> Result<bool> {
let value = map
.get(key)
.ok_or_else(|| anyhow!("Missing key: {}", key))?;
match value.to_lowercase().as_str() {
"yes" | "true" | "1" => Ok(true),
"no" | "false" | "0" => Ok(false),
_ => Err(anyhow!("Invalid boolean for {}: {}", key, value)),
}
}
fn get_string(map: &std::collections::HashMap<String, String>, key: &str) -> Result<String> {
map.get(key)
.ok_or_else(|| anyhow!("Missing key: {}", key))
.map(|s| s.to_string())
}
/// Registry of all available message parsers
pub struct ParserRegistry {
parsers: Vec<Box<dyn MessageParser>>,
}
impl ParserRegistry {
pub fn new() -> Self {
let mut registry = Self {
parsers: Vec::new(),
};
// Register default parsers
registry.register(Box::new(SignatureParser));
registry
}
pub fn register(&mut self, parser: Box<dyn MessageParser>) {
self.parsers.push(parser);
}
/// Try to parse a line with all registered parsers
pub fn parse(&self, line: &str) -> Option<Result<ParsedMessage>> {
for parser in &self.parsers {
if let Some(result) = parser.parse(line) {
return Some(result);
}
}
None
}
}
impl Default for ParserRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_signature_line() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", ll=INFO, lc=CreateSessionStep, threadId=188, externalUserId=null, clientIp=***, correlationId=***, sessionId=test-session-123, request_id=[***]***.*** t@tid@.pnull_uid_X_2631582, userId=X, msg="signature:XAMARIN_APP/5.23.0/ details:offlineLoginUsage:0,isPasswordAutofillEnabled:no,cameraRollUsage:0,OS:26.2.0,appName:App,touchID:no,isOfflineLoginEnabled:yes,model:iPhone18,1,device:iOS, Apple,passwordAutofillUsage:0 user-agent:mobileApp/5.23.0", ex=""#;
let registry = ParserRegistry::new();
let result = registry.parse(line).unwrap().unwrap();
match result {
ParsedMessage::Signature(entry) => {
assert_eq!(entry.session_id, "test-session-123");
assert_eq!(entry.app, "XAMARIN_APP");
assert_eq!(entry.version, "5.23.0");
assert_eq!(entry.offline_login_usage, 0);
assert!(!entry.is_password_autofill_enabled);
assert_eq!(entry.camera_roll_usage, 0);
assert_eq!(entry.os, "26.2.0");
assert_eq!(entry.app_name, "App");
assert!(!entry.touch_id);
assert!(entry.is_offline_login_enabled);
assert_eq!(entry.model, "iPhone18,1");
assert_eq!(entry.device, "iOS, Apple");
assert_eq!(entry.password_autofill_usage, 0);
}
}
}
#[test]
fn test_parse_non_signature_line() {
let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06", msg="some other message""#;
let registry = ParserRegistry::new();
assert!(registry.parse(line).is_none());
}
}