From 30cc4fae0192426a070a87a891c664697b0cac1c Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Wed, 21 Jan 2026 22:34:48 +0100 Subject: [PATCH] Add log ingestion tool for loading signature logs into SQLite - Parse signature messages from log files extracting app info, device details, and feature flags (autofill, touchID, offline login, etc.) - Support both plain .log and gzip compressed .log.gz files - File discovery by date range (YYYY/mm/dd directory structure) - Batch inserts for performance with large files (10GB+ per day) - Index on session_id and version for efficient queries - Extensible parser architecture via MessageParser trait Co-Authored-By: Claude Opus 4.5 --- .github/dependabot.yaml | 14 + .github/workflows/audit.yaml | 31 + .github/workflows/benchmark.yaml | 39 ++ .github/workflows/ci.yaml | 97 +++ .github/workflows/dependabot-auto-merge.yaml | 34 + .github/workflows/release.yaml | 134 ++++ .gitignore | 3 + CLAUDE.md | 71 ++ Cargo.lock | 658 +++++++++++++++++++ Cargo.toml | 13 + LICENSE | 21 + README.md | 120 ++++ rust-toolchain.toml | 2 + src/db.rs | 83 +++ src/files.rs | 100 +++ src/main.rs | 174 +++++ src/parser.rs | 274 ++++++++ 17 files changed, 1868 insertions(+) create mode 100644 .github/dependabot.yaml create mode 100644 .github/workflows/audit.yaml create mode 100644 .github/workflows/benchmark.yaml create mode 100644 .github/workflows/ci.yaml create mode 100644 .github/workflows/dependabot-auto-merge.yaml create mode 100644 .github/workflows/release.yaml create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 rust-toolchain.toml create mode 100644 src/db.rs create mode 100644 src/files.rs create mode 100644 src/main.rs create mode 100644 src/parser.rs diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000..903f8af --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,14 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + time: "07:00" + timezone: "Asia/Tokyo" + - package-ecosystem: cargo + directory: / + schedule: + interval: weekly + time: "07:00" + timezone: "Asia/Tokyo" diff --git a/.github/workflows/audit.yaml b/.github/workflows/audit.yaml new file mode 100644 index 0000000..58597e6 --- /dev/null +++ b/.github/workflows/audit.yaml @@ -0,0 +1,31 @@ +name: Security audit + +on: + schedule: + - cron: "0 0 */3 * *" + + push: + branches: [main] + paths: + - "**/Cargo.toml" + - "**/Cargo.lock" + + pull_request: + paths: + - "**/Cargo.toml" + - "**/Cargo.lock" + +jobs: + audit: + name: Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install cargo-audit + uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6 + with: + tool: cargo-audit + + - name: Run audit + run: cargo audit diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml new file mode 100644 index 0000000..a794c2c --- /dev/null +++ b/.github/workflows/benchmark.yaml @@ -0,0 +1,39 @@ +name: Benchmark +on: + push: + branches: + - main + paths: + - '**/*.rs' + - '**/Cargo.toml' + - '**/Cargo.lock' + - '.github/workflows/benchmark.yaml' + +permissions: + contents: write + deployments: write + +jobs: + benchmark: + name: Run Rust benchmark example + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + - name: Toolchain setup + run: rustup toolchain update nightly && rustup default nightly + - name: Run benchmark + run: cargo +nightly bench | tee output.txt + + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 + with: + name: Rust Benchmark + tool: 'cargo' + output-file-path: output.txt + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true + # Show alert with commit comment on detecting possible performance regression + alert-threshold: '200%' + comment-on-alert: true + fail-on-alert: true + benchmark-data-dir-path: docs diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..019aa36 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,97 @@ +name: Rust CI + +on: + push: + branches: [main] + paths: + - '**/*.rs' + - '**/Cargo.toml' + - '**/Cargo.lock' + - '.github/workflows/ci.yaml' + + pull_request: + paths: + - '**/*.rs' + - '**/Cargo.toml' + - '**/Cargo.lock' + - '.github/workflows/ci.yaml' + +env: + CARGO_TERM_COLOR: always + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + check: + name: Check + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + fetch-depth: 0 + + - uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 + with: + components: rustfmt, clippy + cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }} + + - name: Install reviewdog + uses: reviewdog/action-setup@d8a7baabd7f3e8544ee4dbde3ee41d0011c3a93f # v1.5.0 + + - name: Check format + run: | + cargo fmt --all -- --check + + - uses: giraffate/clippy-action@13b9d32482f25d29ead141b79e7e04e7900281e0 # v1.0.1 + with: + reporter: 'github-pr-review' + github_token: ${{ secrets.GITHUB_TOKEN }} + fail_on_error: true + filter_mode: nofilter + + - name: Build + run: cargo build + + test: + name: Test + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + fetch-depth: 0 + + - uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 + with: + components: llvm-tools-preview + cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }} + + - name: Install tools + uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6 + with: + tool: cargo-llvm-cov, cargo-nextest + + - name: Run test + if: runner.os != 'Linux' + run: | + cargo nextest run + + - name: Generate coverage + if: runner.os == 'Linux' + run: cargo llvm-cov nextest --lcov --output-path lcov.info + + - name: Upload coverage + if: runner.os == 'Linux' + uses: k1LoW/octocov-action@73d561f65d59e66899ed5c87e4621a913b5d5c20 # v1.5.0 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/dependabot-auto-merge.yaml b/.github/workflows/dependabot-auto-merge.yaml new file mode 100644 index 0000000..1bd95e6 --- /dev/null +++ b/.github/workflows/dependabot-auto-merge.yaml @@ -0,0 +1,34 @@ +name: Dependabot Auto-merge + +on: + pull_request: + types: + - opened + - synchronize + - reopened + +permissions: + contents: write + pull-requests: write + +jobs: + dependabot-automation: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' }} + timeout-minutes: 13 + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@21025c705c08248db411dc16f3619e6b5f9ea21a # v2.5.0 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Approve & enable auto-merge for Dependabot PR + if: | + steps.metadata.outputs.update-type == 'version-update:semver-patch' || + steps.metadata.outputs.update-type == 'version-update:semver-minor' + run: | + gh pr merge --auto -s "$PR_URL" + env: + PR_URL: ${{ github.event.pull_request.html_url }} + PR_TITLE: ${{ github.event.pull_request.title }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..d290d21 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,134 @@ +name: Release + +on: + push: + tags: + - 'v*' + workflow_dispatch: + +permissions: + contents: write + +jobs: + build: + name: Build - ${{ matrix.target }} + strategy: + matrix: + include: + - target: x86_64-unknown-linux-gnu + runner: ubuntu-latest + os: Linux + arch: x86_64 + ext: tar.gz + - target: aarch64-unknown-linux-gnu + runner: ubuntu-24.04-arm + os: Linux + arch: arm64 + ext: tar.gz + - target: x86_64-apple-darwin + runner: macos-15-intel + os: Darwin + arch: x86_64 + ext: tar.gz + - target: aarch64-apple-darwin + runner: macos-latest + os: Darwin + arch: arm64 + ext: tar.gz + - target: x86_64-pc-windows-msvc + runner: windows-latest + os: Windows + arch: x86_64 + ext: zip + runs-on: ${{ matrix.runner }} + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Setup sccache + uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9 + + - name: Setup environment variables for sccache + shell: bash + run: | + echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV" + echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV" + + - name: Setup Rust + uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 + with: + rustflags: "" + + - name: Get project name + id: project + shell: bash + run: | + name=$(cargo metadata --format-version 1 --no-deps | jq -r '.packages[0].name') + echo "name=$name" >> "$GITHUB_OUTPUT" + + - name: Build + run: cargo build --release + + - name: Create archive (Unix) + if: matrix.os != 'Windows' + shell: bash + run: | + name="${{ steps.project.outputs.name }}" + archive_name="${name}_${{ matrix.os }}_${{ matrix.arch }}.tar.gz" + tar -czvf "$archive_name" -C target/release "$name" + echo "archive_name=$archive_name" >> "$GITHUB_ENV" + + - name: Create archive (Windows) + if: matrix.os == 'Windows' + shell: pwsh + run: | + $name = "${{ steps.project.outputs.name }}" + $archiveName = "${name}_${{ matrix.os }}_${{ matrix.arch }}.zip" + Compress-Archive -Path "target/release/${name}.exe" -DestinationPath $archiveName + echo "archive_name=$archiveName" >> $env:GITHUB_ENV + + - name: Upload artifact + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: ${{ steps.project.outputs.name }}_${{ matrix.os }}_${{ matrix.arch }} + path: ${{ env.archive_name }} + if-no-files-found: error + + release: + name: Release + needs: build + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + fetch-depth: 0 + + - name: Download all artifacts + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + with: + path: artifacts + merge-multiple: true + + - name: Generate changelog + id: changelog + run: | + # Get the previous tag + prev_tag=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "") + + if [ -n "$prev_tag" ]; then + echo "## Changes since $prev_tag" > changelog.md + echo "" >> changelog.md + git log --pretty=format:"- %s" "$prev_tag"..HEAD >> changelog.md + else + echo "## Initial Release" > changelog.md + fi + + - name: Create release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh release create "${{ github.ref_name }}" \ + --title "${{ github.ref_name }}" \ + --notes-file changelog.md \ + artifacts/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..63ab76c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +.DS_Store +*.db diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6aa9d63 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,71 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +### Build & Run +```bash +# Build +cargo build + +# Release build +cargo build --release + +# Run (example) +cargo run -- --name "World" +``` + +### Testing +```bash +# Run all tests +cargo test + +# Fast test execution with cargo-nextest (recommended) +cargo nextest run + +# Run a single test +cargo test test_name + +# Generate coverage (requires cargo-llvm-cov) +cargo llvm-cov nextest --lcov --output-path lcov.info +``` + +### Quality Checks +```bash +# Format check +cargo fmt -- --check + +# Apply formatting +cargo fmt + +# Static analysis with Clippy +cargo clippy +``` + +### Benchmarks +```bash +# Requires nightly toolchain +cargo +nightly bench +``` + +## Architecture + +### Project Structure +- **cargo-generate template**: This repository is a template for generating new Rust CLI projects +- **CLI parser**: Uses clap v4 derive macros for command-line argument processing +- **Benchmarks**: Located in `benches/` directory, uses nightly compiler's test crate + +### CI/CD Configuration +- **ci.yaml**: Main CI workflow + - Runs formatting, Clippy, build, and tests + - Generates coverage on Linux with octocov reporting + - Automatic PR feedback via reviewdog +- **benchmark.yaml**: Auto-deploys benchmark results to GitHub Pages +- **audit.yaml**: Security audit for dependencies +- **release.yaml**: Automated release on tag push (cross-platform builds via GoReleaser) + +### Key Settings +- **Rust version**: Fixed to 1.87 in `rust-toolchain.toml` +- **Edition**: Uses Rust 2024 edition +- **Test tools**: cargo-nextest and cargo-llvm-cov recommended \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..33db092 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,658 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "cc" +version = "1.2.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "find-msvc-tools" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + +[[package]] +name = "flate2" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "js-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libsqlite3-sys" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "947e6816f7825b2b45027c2c32e7085da9934defa535de4a6a46b10a4d5257fa" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "log_ingest" +version = "0.0.1" +dependencies = [ + "anyhow", + "chrono", + "clap", + "flate2", + "regex", + "rusqlite", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "proc-macro2" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rusqlite" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a22715a5d6deef63c637207afbe68d0c72c3f8d0022d7cf9714c442d6157606b" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "wasm-bindgen" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..760f8f8 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "log_ingest" +version = "0.0.1" +authors = ["Alexandr Mansurov"] +edition = "2024" + +[dependencies] +clap = { version = "4.5.42", features = ["derive"] } +rusqlite = { version = "0.35", features = ["bundled"] } +chrono = "0.4" +regex = "1" +flate2 = "1" +anyhow = "1" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..468c4a1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2025 skanehira + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..013aa3c --- /dev/null +++ b/README.md @@ -0,0 +1,120 @@ +# Log ingest + +This repo is created with cargo generate --git https://github.com/skanehira/rust-cli-template (start with cargo install cargo-generate ) + +## Overview + +This repository serves as a template for quickly bootstrapping Rust command-line +interface (CLI) applications using `cargo-generate`. It provides a minimal yet +comprehensive foundation with the following features: + +- CLI argument parsing using [clap](https://github.com/clap-rs/clap) with derive + macros +- GitHub Actions workflow for CI/CD + - Code coverage reporting with [octocov](https://github.com/k1LoW/octocov) + - Automatic benchmark result visualization and deployment with + [github-action-benchmark](https://github.com/benchmark-action/github-action-benchmark) + - Security audit checks for dependencies + - Automated release workflow for publishing + - Automated dependency updates with Dependabot + +## Project Structure + +Generated projects will have the following structure: + +``` +. +├── .github/ # GitHub Actions workflows +│ ├── workflows/ # CI/CD workflows for testing, benchmarking, and releasing +│ │ ├── ci.yml # Main CI workflow (tests, linting, coverage) +│ │ ├── audit.yml # Security audit workflow +│ │ └── release.yml # Release automation workflow +│ └── dependabot.yaml # Automated dependency update configuration file +├── benches/ # Benchmark code (requires nightly Rust) +├── src/ # Source code +├── .gitignore # Git ignore file +├── .octocov.yml # Code coverage configuration +├── goreleasser.yaml # GoReleaser configuration file for cross-platform builds and distribution +├── Cargo.toml # Project manifest +├── Cargo.lock # Dependency lock file +└── rust-toolchain.toml # Rust toolchain configuration +``` + +## Benchmark visualization + +The benchmark results are automatically deployed to GitHub Pages for easy +visualization and performance tracking. You need to create a `gh-pages` branch +in your repository before first push. + +image + +image + +## Coverage + +This project uses [octocov](https://github.com/k1LoW/octocov) to measure code +coverage. During CI execution, coverage reports are automatically generated and +displayed as comments on PRs or commits. The coverage history is also tracked, +allowing you to see changes over time. + +The coverage reports are deployed to GitHub Pages for easy visualization. +Coverage information can also be displayed in the README as a badge. + +image + +The detailed configuration for octocov is managed in the `.octocov.yml` file. + +## Usage + +### Prerequisites + +- [cargo-generate](https://github.com/cargo-generate/cargo-generate) +- [gh](https://github.com/cli/cli) + +### Creating a New Project + +Create a new project using this template: + +```bash +cargo generate --git https://github.com/skanehira/rust-cli-template.git +``` + +Follow the prompts to customize your project. + +### Running Tests + +```bash +cargo test +``` + +### Running Benchmarks + +Benchmarks require the nightly Rust channel: + +```bash +cargo +nightly bench +``` + +### Release Process + +This template includes an automated release workflow. Follow these steps to +create a release: + +1. Push a tag with your changes: + ```bash + git tag v0.1.0 # Replace with the appropriate version number + git push origin v0.1.0 + ``` + +2. When the tag is pushed, the GitHub Actions `release.yml` workflow will + automatically execute. This workflow: + - Builds cross-platform binaries (Linux, macOS, Windows) + - Creates a GitHub Release + - Uploads binaries and changelog + +The release configuration is managed in the `.github/workflows/release.yml` and +`goreleasser.yaml` files. + +--- + +Feel free to customize this template to fit your specific needs! diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..73328e0 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "1.90" diff --git a/src/db.rs b/src/db.rs new file mode 100644 index 0000000..09d6790 --- /dev/null +++ b/src/db.rs @@ -0,0 +1,83 @@ +use anyhow::Result; +use rusqlite::{params, Connection, Transaction}; + +use crate::parser::SignatureEntry; + +pub struct Database { + conn: Connection, +} + +impl Database { + pub fn new(path: &str) -> Result { + let conn = Connection::open(path)?; + let db = Self { conn }; + db.init_schema()?; + Ok(db) + } + + fn init_schema(&self) -> Result<()> { + self.conn.execute_batch( + r#" + CREATE TABLE IF NOT EXISTS signature_entries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + timestamp TEXT NOT NULL, + app TEXT NOT NULL, + version TEXT NOT NULL, + offline_login_usage INTEGER NOT NULL, + is_password_autofill_enabled INTEGER NOT NULL, + camera_roll_usage INTEGER NOT NULL, + os TEXT NOT NULL, + app_name TEXT NOT NULL, + touch_id INTEGER NOT NULL, + is_offline_login_enabled INTEGER NOT NULL, + model TEXT NOT NULL, + device TEXT NOT NULL, + password_autofill_usage INTEGER NOT NULL + ); + + CREATE INDEX IF NOT EXISTS idx_session_id ON signature_entries(session_id); + CREATE INDEX IF NOT EXISTS idx_version ON signature_entries(version); + "#, + )?; + Ok(()) + } + + pub fn begin_transaction(&mut self) -> Result> { + Ok(self.conn.transaction()?) + } + + pub fn insert_signature_batch(tx: &Transaction<'_>, entries: &[SignatureEntry]) -> Result<()> { + let mut stmt = tx.prepare_cached( + r#" + INSERT INTO signature_entries ( + session_id, timestamp, app, version, + offline_login_usage, is_password_autofill_enabled, camera_roll_usage, + os, app_name, touch_id, is_offline_login_enabled, + model, device, password_autofill_usage + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + )?; + + for entry in entries { + stmt.execute(params![ + entry.session_id, + entry.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(), + entry.app, + entry.version, + entry.offline_login_usage, + entry.is_password_autofill_enabled as i32, + entry.camera_roll_usage, + entry.os, + entry.app_name, + entry.touch_id as i32, + entry.is_offline_login_enabled as i32, + entry.model, + entry.device, + entry.password_autofill_usage, + ])?; + } + + Ok(()) + } +} diff --git a/src/files.rs b/src/files.rs new file mode 100644 index 0000000..f8557a2 --- /dev/null +++ b/src/files.rs @@ -0,0 +1,100 @@ +use anyhow::{anyhow, Result}; +use chrono::NaiveDate; +use flate2::read::GzDecoder; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; + +/// Discovers log files for a given date range +pub struct LogFileDiscovery { + base_dir: PathBuf, + filename: String, +} + +impl LogFileDiscovery { + pub fn new(base_dir: PathBuf, filename: String) -> Self { + Self { base_dir, filename } + } + + /// Returns an iterator over all log files in the date range + pub fn discover(&self, from: NaiveDate, to: NaiveDate) -> Result> { + let mut files = Vec::new(); + + let mut current = from; + while current <= to { + if let Some(log_file) = self.find_log_for_date(current)? { + files.push(log_file); + } + current = current + .succ_opt() + .ok_or_else(|| anyhow!("Date overflow"))?; + } + + Ok(files) + } + + fn find_log_for_date(&self, date: NaiveDate) -> Result> { + // Build path: /yyyy/mm/dd/.gz or + let date_path = self + .base_dir + .join(date.format("%Y").to_string()) + .join(date.format("%m").to_string()) + .join(date.format("%d").to_string()); + + // Try gzipped first + let gz_path = date_path.join(format!("{}.gz", self.filename)); + if gz_path.exists() { + return Ok(Some(LogFile { + path: gz_path, + compressed: true, + date, + })); + } + + // Try uncompressed + let plain_path = date_path.join(&self.filename); + if plain_path.exists() { + return Ok(Some(LogFile { + path: plain_path, + compressed: false, + date, + })); + } + + // No file found for this date + Ok(None) + } +} + +#[derive(Debug)] +pub struct LogFile { + pub path: PathBuf, + pub compressed: bool, + pub date: NaiveDate, +} + +impl LogFile { + /// Returns a buffered reader for this log file, handling compression transparently + pub fn reader(&self) -> Result> { + let file = File::open(&self.path)?; + + if self.compressed { + let decoder = GzDecoder::new(file); + Ok(Box::new(BufReader::new(decoder))) + } else { + Ok(Box::new(BufReader::new(file))) + } + } +} + +/// For reading a single file directly (e.g., for testing) +pub fn read_log_file(path: &str) -> Result> { + let file = File::open(path)?; + + if path.ends_with(".gz") { + let decoder = GzDecoder::new(file); + Ok(Box::new(BufReader::new(decoder))) + } else { + Ok(Box::new(BufReader::new(file))) + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..cb5b2b9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,174 @@ +use anyhow::{anyhow, Result}; +use chrono::NaiveDate; +use clap::Parser; +use std::io::BufRead; +use std::path::PathBuf; + +mod db; +mod files; +mod parser; + +use db::Database; +use files::{read_log_file, LogFileDiscovery}; +use parser::{ParsedMessage, ParserRegistry, SignatureEntry}; + +#[derive(Parser, Debug)] +#[command(author, version, about = "Load log files into SQLite database")] +struct Args { + /// Start date (YYYY/mm/dd) + #[arg(long)] + from: Option, + + /// End date (YYYY/mm/dd) + #[arg(long)] + to: Option, + + /// Base directory containing log files + #[arg(long)] + base_dir: Option, + + /// Log filename (without date path, e.g., "app.log") + #[arg(long)] + filename: Option, + + /// Single log file to process (alternative to date range) + #[arg(long)] + file: Option, + + /// Output SQLite database path + #[arg(long, short)] + output: String, + + /// Batch size for database inserts + #[arg(long, default_value = "10000")] + batch_size: usize, +} + +fn parse_date(s: &str) -> Result { + NaiveDate::parse_from_str(s, "%Y/%m/%d") + .map_err(|e| anyhow!("Invalid date format '{}': {}. Expected YYYY/mm/dd", s, e)) +} + +fn main() -> Result<()> { + let args = Args::parse(); + + let mut db = Database::new(&args.output)?; + let registry = ParserRegistry::new(); + + if let Some(file_path) = &args.file { + // Process single file + eprintln!("Processing single file: {}", file_path.display()); + let reader = read_log_file(file_path.to_str().unwrap())?; + process_reader(reader, ®istry, &mut db, args.batch_size)?; + } else { + // Process date range + let from = parse_date( + args.from + .as_ref() + .ok_or_else(|| anyhow!("--from is required when not using --file"))?, + )?; + let to = parse_date( + args.to + .as_ref() + .ok_or_else(|| anyhow!("--to is required when not using --file"))?, + )?; + let base_dir = args + .base_dir + .as_ref() + .ok_or_else(|| anyhow!("--base-dir is required when not using --file"))?; + let filename = args + .filename + .as_ref() + .ok_or_else(|| anyhow!("--filename is required when not using --file"))?; + + let discovery = LogFileDiscovery::new(base_dir.clone(), filename.clone()); + let log_files = discovery.discover(from, to)?; + + if log_files.is_empty() { + eprintln!("No log files found in the specified date range"); + return Ok(()); + } + + eprintln!("Found {} log files to process", log_files.len()); + + for log_file in log_files { + eprintln!( + "Processing: {} ({})", + log_file.path.display(), + if log_file.compressed { + "compressed" + } else { + "plain" + } + ); + let reader = log_file.reader()?; + process_reader(reader, ®istry, &mut db, args.batch_size)?; + } + } + + eprintln!("Done!"); + Ok(()) +} + +fn process_reader( + reader: Box, + registry: &ParserRegistry, + db: &mut Database, + batch_size: usize, +) -> Result<()> { + let mut signature_batch: Vec = Vec::with_capacity(batch_size); + let mut total_lines = 0u64; + let mut parsed_lines = 0u64; + let mut error_lines = 0u64; + + for line_result in reader.lines() { + let line = line_result?; + total_lines += 1; + + if let Some(parse_result) = registry.parse(&line) { + match parse_result { + Ok(ParsedMessage::Signature(entry)) => { + signature_batch.push(entry); + parsed_lines += 1; + + if signature_batch.len() >= batch_size { + flush_signature_batch(db, &mut signature_batch)?; + } + } + Err(e) => { + error_lines += 1; + if error_lines <= 10 { + eprintln!("Parse error: {}", e); + } + } + } + } + + if total_lines % 100_000 == 0 { + eprintln!( + "Progress: {} lines read, {} parsed, {} errors", + total_lines, parsed_lines, error_lines + ); + } + } + + // Flush remaining entries + if !signature_batch.is_empty() { + flush_signature_batch(db, &mut signature_batch)?; + } + + eprintln!( + "File complete: {} lines read, {} parsed, {} errors", + total_lines, parsed_lines, error_lines + ); + + Ok(()) +} + +fn flush_signature_batch(db: &mut Database, batch: &mut Vec) -> Result<()> { + let tx = db.begin_transaction()?; + Database::insert_signature_batch(&tx, batch)?; + tx.commit()?; + batch.clear(); + Ok(()) +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..5fdb0f8 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,274 @@ +use anyhow::{anyhow, Result}; +use chrono::NaiveDateTime; +use regex::Regex; +use std::sync::LazyLock; + +/// Represents a parsed signature log entry +#[derive(Debug, Clone)] +pub struct SignatureEntry { + pub session_id: String, + pub timestamp: NaiveDateTime, + pub app: String, + pub version: String, + pub offline_login_usage: i64, + pub is_password_autofill_enabled: bool, + pub camera_roll_usage: i64, + pub os: String, + pub app_name: String, + pub touch_id: bool, + pub is_offline_login_enabled: bool, + pub model: String, + pub device: String, + pub password_autofill_usage: i64, +} + +/// Trait for parsing different message types from logs. +/// Implement this trait to add support for new message formats. +pub trait MessageParser: Send + Sync { + /// Returns the message type identifier (e.g., "signature") + fn message_type(&self) -> &'static str; + + /// Attempts to parse a log line. Returns None if this parser doesn't handle this message type. + fn parse(&self, line: &str) -> Option>; +} + +/// Enum of all possible parsed message types. +/// Extend this when adding new message parsers. +#[derive(Debug, Clone)] +pub enum ParsedMessage { + Signature(SignatureEntry), +} + +static SESSION_ID_RE: LazyLock = + LazyLock::new(|| Regex::new(r"sessionId=([^,\s]+)").unwrap()); +static DATETIME_RE: LazyLock = + LazyLock::new(|| Regex::new(r#"dt="(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})"#).unwrap()); +static SIGNATURE_RE: LazyLock = + LazyLock::new(|| Regex::new(r#"msg="signature:([^/]+)/([^/]*)/\s*details:([^"]+)"#).unwrap()); + +pub struct SignatureParser; + +impl MessageParser for SignatureParser { + fn message_type(&self) -> &'static str { + "signature" + } + + fn parse(&self, line: &str) -> Option> { + // Check if this line contains a signature message + if !line.contains("msg=\"signature:") { + return None; + } + + Some(self.parse_signature_line(line)) + } +} + +impl SignatureParser { + fn parse_signature_line(&self, line: &str) -> Result { + // Extract session ID + let session_id = SESSION_ID_RE + .captures(line) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().to_string()) + .ok_or_else(|| anyhow!("Missing sessionId"))?; + + // Extract timestamp + let datetime_str = DATETIME_RE + .captures(line) + .and_then(|c| c.get(1)) + .map(|m| m.as_str()) + .ok_or_else(|| anyhow!("Missing datetime"))?; + + let timestamp = NaiveDateTime::parse_from_str(datetime_str, "%Y-%m-%d %H:%M:%S") + .map_err(|e| anyhow!("Invalid datetime format: {}", e))?; + + // Extract signature details + let caps = SIGNATURE_RE + .captures(line) + .ok_or_else(|| anyhow!("Invalid signature format"))?; + + let app = caps.get(1).map(|m| m.as_str().to_string()).unwrap(); + let version = caps.get(2).map(|m| m.as_str().to_string()).unwrap(); + let details_str = caps.get(3).map(|m| m.as_str()).unwrap(); + + // Parse details key-value pairs + // Handle the tricky "device:iOS, Apple" case by parsing carefully + let details = parse_details(details_str)?; + + let entry = SignatureEntry { + session_id, + timestamp, + app, + version, + offline_login_usage: parse_number(&details, "offlineLoginUsage")?, + is_password_autofill_enabled: parse_bool(&details, "isPasswordAutofillEnabled")?, + camera_roll_usage: parse_number(&details, "cameraRollUsage")?, + os: get_string(&details, "OS")?, + app_name: get_string(&details, "appName")?, + touch_id: parse_bool(&details, "touchID")?, + is_offline_login_enabled: parse_bool(&details, "isOfflineLoginEnabled")?, + model: get_string(&details, "model")?, + device: get_string(&details, "device")?, + password_autofill_usage: parse_number(&details, "passwordAutofillUsage")?, + }; + + Ok(ParsedMessage::Signature(entry)) + } +} + +/// Parse the details string which has format like: +/// offlineLoginUsage:0,isPasswordAutofillEnabled:no,...,device:iOS, Apple,passwordAutofillUsage:0 +fn parse_details(details: &str) -> Result> { + let mut map = std::collections::HashMap::new(); + + // Known keys in order they appear + let known_keys = [ + "offlineLoginUsage", + "isPasswordAutofillEnabled", + "cameraRollUsage", + "OS", + "appName", + "touchID", + "isOfflineLoginEnabled", + "model", + "device", + "passwordAutofillUsage", + ]; + + // Find positions of each key + let mut key_positions: Vec<(usize, &str)> = known_keys + .iter() + .filter_map(|&key| { + let pattern = format!("{}:", key); + details.find(&pattern).map(|pos| (pos, key)) + }) + .collect(); + + // Sort by position + key_positions.sort_by_key(|&(pos, _)| pos); + + // Extract values between keys + for i in 0..key_positions.len() { + let (pos, key) = key_positions[i]; + let value_start = pos + key.len() + 1; // +1 for ':' + + let value_end = if i + 1 < key_positions.len() { + // Value ends at the comma before the next key + let next_pos = key_positions[i + 1].0; + // Find the comma before the next key + if next_pos > 0 && details.as_bytes().get(next_pos - 1) == Some(&b',') { + next_pos - 1 + } else { + next_pos + } + } else { + // Last key - value goes until " user-agent" or end + details + .find(" user-agent") + .unwrap_or(details.len()) + }; + + let value = details[value_start..value_end].trim().to_string(); + map.insert(key.to_string(), value); + } + + Ok(map) +} + +fn parse_number(map: &std::collections::HashMap, key: &str) -> Result { + map.get(key) + .ok_or_else(|| anyhow!("Missing key: {}", key))? + .parse() + .map_err(|e| anyhow!("Invalid number for {}: {}", key, e)) +} + +fn parse_bool(map: &std::collections::HashMap, key: &str) -> Result { + let value = map + .get(key) + .ok_or_else(|| anyhow!("Missing key: {}", key))?; + match value.to_lowercase().as_str() { + "yes" | "true" | "1" => Ok(true), + "no" | "false" | "0" => Ok(false), + _ => Err(anyhow!("Invalid boolean for {}: {}", key, value)), + } +} + +fn get_string(map: &std::collections::HashMap, key: &str) -> Result { + map.get(key) + .ok_or_else(|| anyhow!("Missing key: {}", key)) + .map(|s| s.to_string()) +} + +/// Registry of all available message parsers +pub struct ParserRegistry { + parsers: Vec>, +} + +impl ParserRegistry { + pub fn new() -> Self { + let mut registry = Self { + parsers: Vec::new(), + }; + // Register default parsers + registry.register(Box::new(SignatureParser)); + registry + } + + pub fn register(&mut self, parser: Box) { + self.parsers.push(parser); + } + + /// Try to parse a line with all registered parsers + pub fn parse(&self, line: &str) -> Option> { + for parser in &self.parsers { + if let Some(result) = parser.parse(line) { + return Some(result); + } + } + None + } +} + +impl Default for ParserRegistry { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_signature_line() { + let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06,154", ll=INFO, lc=CreateSessionStep, threadId=188, externalUserId=null, clientIp=***, correlationId=***, sessionId=test-session-123, request_id=[***]***.*** t@tid@.pnull_uid_X_2631582, userId=X, msg="signature:XAMARIN_APP/5.23.0/ details:offlineLoginUsage:0,isPasswordAutofillEnabled:no,cameraRollUsage:0,OS:26.2.0,appName:App,touchID:no,isOfflineLoginEnabled:yes,model:iPhone18,1,device:iOS, Apple,passwordAutofillUsage:0 user-agent:mobileApp/5.23.0", ex=""#; + + let registry = ParserRegistry::new(); + let result = registry.parse(line).unwrap().unwrap(); + + match result { + ParsedMessage::Signature(entry) => { + assert_eq!(entry.session_id, "test-session-123"); + assert_eq!(entry.app, "XAMARIN_APP"); + assert_eq!(entry.version, "5.23.0"); + assert_eq!(entry.offline_login_usage, 0); + assert!(!entry.is_password_autofill_enabled); + assert_eq!(entry.camera_roll_usage, 0); + assert_eq!(entry.os, "26.2.0"); + assert_eq!(entry.app_name, "App"); + assert!(!entry.touch_id); + assert!(entry.is_offline_login_enabled); + assert_eq!(entry.model, "iPhone18,1"); + assert_eq!(entry.device, "iOS, Apple"); + assert_eq!(entry.password_autofill_usage, 0); + } + } + } + + #[test] + fn test_parse_non_signature_line() { + let line = r#"Jan 21 00:00:06 tom013 m1s-kv dt="2026-01-21 00:00:06", msg="some other message""#; + let registry = ParserRegistry::new(); + assert!(registry.parse(line).is_none()); + } +}