diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml deleted file mode 100644 index 903f8af..0000000 --- a/.github/dependabot.yaml +++ /dev/null @@ -1,14 +0,0 @@ -version: 2 -updates: - - package-ecosystem: github-actions - directory: / - schedule: - interval: weekly - time: "07:00" - timezone: "Asia/Tokyo" - - package-ecosystem: cargo - directory: / - schedule: - interval: weekly - time: "07:00" - timezone: "Asia/Tokyo" diff --git a/.github/workflows/audit.yaml b/.github/workflows/audit.yaml deleted file mode 100644 index 58597e6..0000000 --- a/.github/workflows/audit.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: Security audit - -on: - schedule: - - cron: "0 0 */3 * *" - - push: - branches: [main] - paths: - - "**/Cargo.toml" - - "**/Cargo.lock" - - pull_request: - paths: - - "**/Cargo.toml" - - "**/Cargo.lock" - -jobs: - audit: - name: Audit - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - - name: Install cargo-audit - uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6 - with: - tool: cargo-audit - - - name: Run audit - run: cargo audit diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml deleted file mode 100644 index a794c2c..0000000 --- a/.github/workflows/benchmark.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: Benchmark -on: - push: - branches: - - main - paths: - - '**/*.rs' - - '**/Cargo.toml' - - '**/Cargo.lock' - - '.github/workflows/benchmark.yaml' - -permissions: - contents: write - deployments: write - -jobs: - benchmark: - name: Run Rust benchmark example - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - name: Toolchain setup - run: rustup toolchain update nightly && rustup default nightly - - name: Run benchmark - run: cargo +nightly bench | tee output.txt - - - name: Store benchmark result - uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 - with: - name: Rust Benchmark - tool: 'cargo' - output-file-path: output.txt - github-token: ${{ secrets.GITHUB_TOKEN }} - auto-push: true - # Show alert with commit comment on detecting possible performance regression - alert-threshold: '200%' - comment-on-alert: true - fail-on-alert: true - benchmark-data-dir-path: docs diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml deleted file mode 100644 index 019aa36..0000000 --- a/.github/workflows/ci.yaml +++ /dev/null @@ -1,97 +0,0 @@ -name: Rust CI - -on: - push: - branches: [main] - paths: - - '**/*.rs' - - '**/Cargo.toml' - - '**/Cargo.lock' - - '.github/workflows/ci.yaml' - - pull_request: - paths: - - '**/*.rs' - - '**/Cargo.toml' - - '**/Cargo.lock' - - '.github/workflows/ci.yaml' - -env: - CARGO_TERM_COLOR: always - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - check: - name: Check - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - with: - fetch-depth: 0 - - - uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 - with: - components: rustfmt, clippy - cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }} - - - name: Install reviewdog - uses: reviewdog/action-setup@d8a7baabd7f3e8544ee4dbde3ee41d0011c3a93f # v1.5.0 - - - name: Check format - run: | - cargo fmt --all -- --check - - - uses: giraffate/clippy-action@13b9d32482f25d29ead141b79e7e04e7900281e0 # v1.0.1 - with: - reporter: 'github-pr-review' - github_token: ${{ secrets.GITHUB_TOKEN }} - fail_on_error: true - filter_mode: nofilter - - - name: Build - run: cargo build - - test: - name: Test - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - runs-on: ${{ matrix.os }} - permissions: - contents: write - pull-requests: write - steps: - - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - with: - fetch-depth: 0 - - - uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 - with: - components: llvm-tools-preview - cache-shared-key: setup-rust-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.lock') }} - - - name: Install tools - uses: taiki-e/install-action@30eab0fabba9ea3f522099957e668b21876aa39e # v2.66.6 - with: - tool: cargo-llvm-cov, cargo-nextest - - - name: Run test - if: runner.os != 'Linux' - run: | - cargo nextest run - - - name: Generate coverage - if: runner.os == 'Linux' - run: cargo llvm-cov nextest --lcov --output-path lcov.info - - - name: Upload coverage - if: runner.os == 'Linux' - uses: k1LoW/octocov-action@73d561f65d59e66899ed5c87e4621a913b5d5c20 # v1.5.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/dependabot-auto-merge.yaml b/.github/workflows/dependabot-auto-merge.yaml deleted file mode 100644 index 1bd95e6..0000000 --- a/.github/workflows/dependabot-auto-merge.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: Dependabot Auto-merge - -on: - pull_request: - types: - - opened - - synchronize - - reopened - -permissions: - contents: write - pull-requests: write - -jobs: - dependabot-automation: - runs-on: ubuntu-latest - if: ${{ github.actor == 'dependabot[bot]' }} - timeout-minutes: 13 - steps: - - name: Dependabot metadata - id: metadata - uses: dependabot/fetch-metadata@21025c705c08248db411dc16f3619e6b5f9ea21a # v2.5.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - - name: Approve & enable auto-merge for Dependabot PR - if: | - steps.metadata.outputs.update-type == 'version-update:semver-patch' || - steps.metadata.outputs.update-type == 'version-update:semver-minor' - run: | - gh pr merge --auto -s "$PR_URL" - env: - PR_URL: ${{ github.event.pull_request.html_url }} - PR_TITLE: ${{ github.event.pull_request.title }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml deleted file mode 100644 index d290d21..0000000 --- a/.github/workflows/release.yaml +++ /dev/null @@ -1,134 +0,0 @@ -name: Release - -on: - push: - tags: - - 'v*' - workflow_dispatch: - -permissions: - contents: write - -jobs: - build: - name: Build - ${{ matrix.target }} - strategy: - matrix: - include: - - target: x86_64-unknown-linux-gnu - runner: ubuntu-latest - os: Linux - arch: x86_64 - ext: tar.gz - - target: aarch64-unknown-linux-gnu - runner: ubuntu-24.04-arm - os: Linux - arch: arm64 - ext: tar.gz - - target: x86_64-apple-darwin - runner: macos-15-intel - os: Darwin - arch: x86_64 - ext: tar.gz - - target: aarch64-apple-darwin - runner: macos-latest - os: Darwin - arch: arm64 - ext: tar.gz - - target: x86_64-pc-windows-msvc - runner: windows-latest - os: Windows - arch: x86_64 - ext: zip - runs-on: ${{ matrix.runner }} - steps: - - name: Checkout - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - - name: Setup sccache - uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9 - - - name: Setup environment variables for sccache - shell: bash - run: | - echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV" - echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV" - - - name: Setup Rust - uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 - with: - rustflags: "" - - - name: Get project name - id: project - shell: bash - run: | - name=$(cargo metadata --format-version 1 --no-deps | jq -r '.packages[0].name') - echo "name=$name" >> "$GITHUB_OUTPUT" - - - name: Build - run: cargo build --release - - - name: Create archive (Unix) - if: matrix.os != 'Windows' - shell: bash - run: | - name="${{ steps.project.outputs.name }}" - archive_name="${name}_${{ matrix.os }}_${{ matrix.arch }}.tar.gz" - tar -czvf "$archive_name" -C target/release "$name" - echo "archive_name=$archive_name" >> "$GITHUB_ENV" - - - name: Create archive (Windows) - if: matrix.os == 'Windows' - shell: pwsh - run: | - $name = "${{ steps.project.outputs.name }}" - $archiveName = "${name}_${{ matrix.os }}_${{ matrix.arch }}.zip" - Compress-Archive -Path "target/release/${name}.exe" -DestinationPath $archiveName - echo "archive_name=$archiveName" >> $env:GITHUB_ENV - - - name: Upload artifact - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: ${{ steps.project.outputs.name }}_${{ matrix.os }}_${{ matrix.arch }} - path: ${{ env.archive_name }} - if-no-files-found: error - - release: - name: Release - needs: build - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - with: - fetch-depth: 0 - - - name: Download all artifacts - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - path: artifacts - merge-multiple: true - - - name: Generate changelog - id: changelog - run: | - # Get the previous tag - prev_tag=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "") - - if [ -n "$prev_tag" ]; then - echo "## Changes since $prev_tag" > changelog.md - echo "" >> changelog.md - git log --pretty=format:"- %s" "$prev_tag"..HEAD >> changelog.md - else - echo "## Initial Release" > changelog.md - fi - - - name: Create release - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh release create "${{ github.ref_name }}" \ - --title "${{ github.ref_name }}" \ - --notes-file changelog.md \ - artifacts/* diff --git a/Cargo.lock b/Cargo.lock index 1b17b8a..e04b5ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -230,6 +230,22 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -242,6 +258,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "find-msvc-tools" version = "0.1.8" @@ -264,6 +286,19 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -273,13 +308,19 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "hashlink" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" dependencies = [ - "hashbrown", + "hashbrown 0.15.5", ] [[package]] @@ -312,12 +353,36 @@ dependencies = [ "cc", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "js-sys" version = "0.3.85" @@ -328,6 +393,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" version = "0.2.180" @@ -345,6 +416,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "log" version = "0.4.29" @@ -353,7 +430,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "log_ingest" -version = "0.0.1" +version = "0.0.2" dependencies = [ "anyhow", "chrono", @@ -363,6 +440,7 @@ dependencies = [ "rayon", "regex", "rusqlite", + "tempfile", ] [[package]] @@ -402,6 +480,16 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.93" @@ -420,6 +508,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "rayon" version = "1.11.0" @@ -483,12 +577,73 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "shlex" version = "1.3.0" @@ -515,21 +670,40 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.98" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + [[package]] name = "unicode-ident" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "utf8parse" version = "0.2.2" @@ -542,6 +716,24 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasm-bindgen" version = "0.2.108" @@ -587,6 +779,40 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -718,3 +944,97 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 13351e7..6ac95f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "log_ingest" -version = "0.0.1" +version = "0.0.2" authors = ["Alexandr Mansurov"] edition = "2024" @@ -13,3 +13,4 @@ flate2 = "1" anyhow = "1" rayon = "1" crossbeam-channel = "0.5" +tempfile = "3" diff --git a/README.md b/README.md index 87fc798..c6abf3a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # log_ingest -A Rust CLI tool for loading log files into a SQLite database for analysis. +A Rust CLI tool for ingesting and searching application log files. ## Overview -Parses application logs containing signature messages and loads them into SQLite for querying. Designed to handle large log volumes (10GB+ per day) with batched inserts and efficient parsing. +Parses application logs containing signature messages and loads them into SQLite for querying. Also provides fast text search across log files with session-aware context expansion. Designed to handle large log volumes (10GB+ per day) with parallel processing. ## Features @@ -15,6 +15,9 @@ Parses application logs containing signature messages and loads them into SQLite - Parallel file processing for multi-day ingestion - Indexed columns (`session_id`, `version`) for efficient queries - Extensible parser architecture for adding new message types +- Full-text search with timestamp and message extraction +- Session-aware expanded search that follows `changeSessionId` chains and correlation IDs +- Exception search filtered by app signature ## Installation @@ -24,16 +27,20 @@ cargo build --release ## Usage -### Process a single file +The CLI uses subcommands: `signature`, `search`, and `search-exceptions`. + +### `signature` — Load log entries into SQLite + +#### Process a single file ```bash -log_ingest --file /path/to/logs.log --output output.db +log_ingest signature --file /path/to/logs.log --output output.db ``` -### Process a date range +#### Process a date range ```bash -log_ingest \ +log_ingest signature \ --from 2026/01/20 \ --to 2026/01/21 \ --base-dir /var/log/myapp \ @@ -43,22 +50,22 @@ log_ingest \ The tool will look for files at `/YYYY/MM/DD/.gz` or `/YYYY/MM/DD/` for each day in the range. -### Parallel processing +#### Parallel processing When processing multiple files, parsing runs in parallel by default using all available CPU cores. A single writer thread handles database inserts to avoid SQLite contention. ```bash # Use all CPU cores (default) -log_ingest --from 2026/01/01 --to 2026/01/31 ... +log_ingest signature --from 2026/01/01 --to 2026/01/31 ... # Limit to 4 threads -log_ingest --threads 4 --from 2026/01/01 --to 2026/01/31 ... +log_ingest signature --threads 4 --from 2026/01/01 --to 2026/01/31 ... # Sequential processing (disable parallelism) -log_ingest --threads 1 --from 2026/01/01 --to 2026/01/31 ... +log_ingest signature --threads 1 --from 2026/01/01 --to 2026/01/31 ... ``` -### Options +#### Options | Option | Description | |--------|-------------| @@ -71,6 +78,63 @@ log_ingest --threads 1 --from 2026/01/01 --to 2026/01/31 ... | `--batch-size ` | Batch size for inserts (default: 10000) | | `--threads ` | Number of parallel threads (0 = all cores, 1 = sequential) | +### `search` — Search log files for matching lines + +Searches a log file for lines containing a query string and prints the timestamp and message for each match. Supports both plain `.log` and gzip `.log.gz` files, with parallel chunk-based processing for plain files. + +```bash +# Basic search +log_ingest search --file /path/to/logs.log --query "NullPointerException" + +# Include correlationId in output +log_ingest search --file /path/to/logs.log --query "timeout" -c + +# Expanded search: find all lines sharing sessionId/correlationId with matches, +# following changeSessionId chains backward to the session start (signature line) +log_ingest search --file /path/to/logs.log --query "Exception" -e +``` + +Expand mode (`-e`) performs a two-pass search: +1. **Pass 1**: Scans the entire file to find matching lines and collects their session IDs and correlation IDs. Also builds a `changeSessionId` graph and tracks which sessions have signature lines. +2. **Expansion**: Follows `changeSessionId` chains backward from seed sessions, stopping at sessions that have a signature (session start boundary). +3. **Pass 2**: Re-reads the file and outputs all lines belonging to expanded session IDs or correlation IDs, stopping early when all expanded sessions are destroyed (`sessionDestroyed`). + +#### Options + +| Option | Description | +|--------|-------------| +| `--file ` | Log file to search | +| `--query ` | Text to search for in log lines | +| `-c, --correlation-id` | Include correlationId in output | +| `-e, --expand` | Expand results to full session context | +| `--threads ` | Number of parallel threads (0 = all cores, 1 = sequential) | + +### `search-exceptions` — Search for exceptions filtered by app + +A specialized search that finds `Exception` lines and expands them to full session context, filtered to only sessions belonging to specific apps (identified by their `signature:` line). + +```bash +# Find exceptions for a specific app +log_ingest search-exceptions --file /path/to/logs.log --app XAMARIN_APP + +# Filter to multiple apps +log_ingest search-exceptions --file /path/to/logs.log --app XAMARIN_APP --app ANOTHER_APP +``` + +This uses the same two-pass expand approach as `search -e`, with an additional app-filtering step that: +- Matches expanded sessions to their app via the `signature:APP/...` line +- Propagates app membership forward through `changeSessionId` chains +- Filters correlation IDs to only those originating from matching-app sessions +- Uses strict app isolation in pass 2 to prevent lines from non-matching apps leaking through shared correlation IDs + +#### Options + +| Option | Description | +|--------|-------------| +| `--file ` | Log file to search | +| `--app ` | Filter to sessions with this app signature (repeatable) | +| `--threads ` | Number of parallel threads (0 = all cores, 1 = sequential) | + ## Database Schema The schema uses normalized lookup tables to minimize disk usage for large datasets. diff --git a/src/main.rs b/src/main.rs index 9eb1411..23524ed 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use anyhow::{Result, anyhow}; use chrono::NaiveDate; -use clap::Parser; +use clap::{Parser, Subcommand}; use crossbeam_channel::{Sender, bounded}; use rayon::prelude::*; use std::collections::HashMap; @@ -13,14 +13,31 @@ use std::thread; mod db; mod files; mod parser; +mod search; use db::Database; use files::{LogFile, LogFileDiscovery, LogReader, read_log_file}; use parser::{ParsedMessage, ParserRegistry, SignatureEntry}; #[derive(Parser, Debug)] -#[command(author, version, about = "Load log files into SQLite database")] +#[command(author, version, about = "Log file analysis tool")] struct Args { + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand, Debug)] +enum Command { + /// Load signature log entries into SQLite database + Signature(SignatureArgs), + /// Search log file for lines matching a query and print timestamp + message + Search(SearchArgs), + /// Search for Exception lines with expand, filtered by app signature + SearchExceptions(SearchExceptionsArgs), +} + +#[derive(Parser, Debug)] +struct SignatureArgs { /// Start date (YYYY/mm/dd) #[arg(long)] from: Option, @@ -54,6 +71,45 @@ struct Args { threads: usize, } +#[derive(Parser, Debug)] +struct SearchArgs { + /// Log file to search + #[arg(long)] + file: PathBuf, + + /// Text to search for in log lines + #[arg(long)] + query: String, + + /// Include correlationId in output + #[arg(short = 'c', long = "correlation-id")] + correlation_id: bool, + + /// Expand results: find all lines sharing sessionId/correlationId, + /// follow changeSessionId chains backward to session start (signature line) + #[arg(short = 'e', long = "expand")] + expand: bool, + + /// Number of parallel threads (0 = use all available cores, 1 = sequential) + #[arg(long, default_value = "0")] + threads: usize, +} + +#[derive(Parser, Debug)] +struct SearchExceptionsArgs { + /// Log file to search + #[arg(long)] + file: PathBuf, + + /// Filter results to sessions with signature: only (repeatable) + #[arg(long, required = true)] + app: Vec, + + /// Number of parallel threads (0 = use all available cores, 1 = sequential) + #[arg(long, default_value = "0")] + threads: usize, +} + fn parse_date(s: &str) -> Result { NaiveDate::parse_from_str(s, "%Y/%m/%d") .map_err(|e| anyhow!("Invalid date format '{}': {}. Expected YYYY/mm/dd", s, e)) @@ -62,6 +118,32 @@ fn parse_date(s: &str) -> Result { fn main() -> Result<()> { let args = Args::parse(); + match args.command { + Command::Signature(sig_args) => run_signature(sig_args), + Command::Search(search_args) => { + let path = search_args + .file + .to_str() + .ok_or_else(|| anyhow!("File path contains invalid UTF-8"))?; + search::run_search( + path, + &search_args.query, + search_args.correlation_id, + search_args.expand, + search_args.threads, + ) + } + Command::SearchExceptions(args) => { + let path = args + .file + .to_str() + .ok_or_else(|| anyhow!("File path contains invalid UTF-8"))?; + search::run_search_exceptions(path, &args.app, args.threads) + } + } +} + +fn run_signature(args: SignatureArgs) -> Result<()> { // Configure rayon thread pool if threads specified if args.threads > 0 && let Err(e) = rayon::ThreadPoolBuilder::new() diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 0000000..15a2894 --- /dev/null +++ b/src/search.rs @@ -0,0 +1,1263 @@ +use anyhow::Result; +use rayon::prelude::*; +use regex::Regex; +use std::collections::{HashMap, HashSet}; +use std::io::{BufRead, BufReader, Read as _, Seek, SeekFrom}; +use std::sync::LazyLock; + +use crate::files::read_log_file; + +static SYSLOG_TIMESTAMP_RE: LazyLock = + LazyLock::new(|| Regex::new(r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})").unwrap()); + +static MSG_RE: LazyLock = LazyLock::new(|| Regex::new(r#"msg="([^"]+)""#).unwrap()); + +static CORRELATION_ID_RE: LazyLock = + LazyLock::new(|| Regex::new(r"correlationId=([^,\s]+)").unwrap()); + +static SESSION_ID_RE: LazyLock = + LazyLock::new(|| Regex::new(r"sessionId=([^,\s]+)").unwrap()); + +/// Matches sessionDestroyed lines and captures the sid value. +/// Example: sessionDestroyed #s=8 sid=2010F74...node003 isnew=false +static SESSION_DESTROYED_RE: LazyLock = + LazyLock::new(|| Regex::new(r"sessionDestroyed\b.*?\bsid=([^,\s]+)").unwrap()); + +/// Extracts the app name from any signature line format. +/// Matches both `msg="signature:APP/..."` and `msg="MOBILE_CLIENT_LOG: signature:APP/..."`. +static SIGNATURE_APP_RE: LazyLock = + LazyLock::new(|| Regex::new(r"signature:([^/\s]+)/").unwrap()); + +/// Matches changeSessionId messages and captures the long-form new and old session IDs. +/// Example: changeSessionId: newSessionId: sDF080BBD / DF080BBD...node011 replaces oldSessionId: sF9EE9D52 / F9EE9D52...node011 +static CHANGE_SESSION_RE: LazyLock = LazyLock::new(|| { + Regex::new( + r#"changeSessionId:.*?newSessionId:\s*\S+\s*/\s*([^,\s"]+).*?replaces\s+oldSessionId:\s*\S+\s*/\s*([^,\s"]+)"#, + ) + .unwrap() +}); + +/// Strips the `.nodeXXX` suffix from a session ID for comparison purposes. +/// "DF080BBD8D5E954C642F6C3B5639D6EE.node011" -> "DF080BBD8D5E954C642F6C3B5639D6EE" +/// "noSession" -> "noSession" +fn normalize_session_id(sid: &str) -> &str { + if let Some(dot_pos) = sid.rfind('.') { + let suffix = &sid[dot_pos + 1..]; + if suffix.starts_with("node") { + return &sid[..dot_pos]; + } + } + sid +} + +fn build_thread_pool(threads: usize) -> Result { + let mut builder = rayon::ThreadPoolBuilder::new(); + if threads > 0 { + builder = builder.num_threads(threads); + } + builder + .build() + .map_err(|e| anyhow::anyhow!("Failed to build thread pool: {}", e)) +} + +/// Split a file into byte-offset chunks for parallel processing. +/// Each chunk is a (start, end) pair. Threads adjust to line boundaries at runtime. +fn compute_chunks(file_size: u64, num_chunks: usize) -> Vec<(u64, u64)> { + if file_size == 0 || num_chunks == 0 { + return vec![]; + } + let effective = num_chunks.min(file_size as usize); + let chunk_size = file_size / effective as u64; + (0..effective) + .map(|i| { + let start = i as u64 * chunk_size; + let end = if i == effective - 1 { + file_size + } else { + (i as u64 + 1) * chunk_size + }; + (start, end) + }) + .collect() +} + +/// Returns true if parallel chunk processing can be used (plain file + multiple threads). +fn can_parallelize(file_path: &str, num_threads: usize) -> bool { + num_threads > 1 && !file_path.ends_with(".gz") +} + +/// Stream all lines from a file (works with gzip and plain). Calls `process` for each line. +fn for_each_line_streaming(file_path: &str, mut process: F) -> Result<()> +where + F: FnMut(&str), +{ + let mut reader = read_log_file(file_path)?; + let mut line = String::new(); + loop { + line.clear(); + if reader.read_line(&mut line)? == 0 { + break; + } + process(line.trim_end()); + } + Ok(()) +} + +/// Process lines in a byte-offset chunk of a plain file. Calls `process` for each complete line. +/// +/// Chunk boundaries may fall mid-line. Convention: +/// - The chunk that started reading a line owns it (reads past `end` to finish it). +/// - The next chunk checks byte `start-1`: if it's `\n`, we're at a line start; otherwise +/// skip the partial first line (the previous chunk already handled it). +fn for_each_line_in_chunk(file_path: &str, start: u64, end: u64, mut process: F) -> Result<()> +where + F: FnMut(&str), +{ + let file = std::fs::File::open(file_path)?; + let mut reader = BufReader::with_capacity(256 * 1024, file); + let mut pos = start; + + if start > 0 { + // Check if we're at a line boundary or mid-line + reader.seek(SeekFrom::Start(start - 1))?; + let mut byte = [0u8; 1]; + reader.read_exact(&mut byte)?; + // Now positioned at `start` + if byte[0] != b'\n' { + // Mid-line: skip remainder (previous chunk owns this line) + let mut skip = String::new(); + let n = reader.read_line(&mut skip)?; + pos += n as u64; + } + } + + let mut line = String::new(); + while pos < end { + line.clear(); + let n = reader.read_line(&mut line)?; + if n == 0 { + break; + } + pos += n as u64; + process(line.trim_end()); + } + Ok(()) +} + +pub fn run_search( + file_path: &str, + query: &str, + show_correlation_id: bool, + expand: bool, + threads: usize, +) -> Result<()> { + let pool = build_thread_pool(threads)?; + if expand { + run_search_expanded(file_path, query, &pool) + } else { + run_search_simple(file_path, query, show_correlation_id, &pool) + } +} + +fn format_simple_match(line: &str, show_correlation_id: bool) -> String { + let ts = SYSLOG_TIMESTAMP_RE + .captures(line) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap_or("?"); + let msg = MSG_RE + .captures(line) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap_or(""); + if show_correlation_id { + let cid = CORRELATION_ID_RE + .captures(line) + .map(|c| c.get(1).unwrap().as_str()); + if let Some(cid) = cid { + return format!("[{}] [{}] {}", ts, cid, msg); + } + } + format!("[{}] {}", ts, msg) +} + +fn run_search_simple( + file_path: &str, + query: &str, + show_correlation_id: bool, + pool: &rayon::ThreadPool, +) -> Result<()> { + let num_threads = pool.current_num_threads(); + + if !can_parallelize(file_path, num_threads) { + // Sequential: stream directly (works for gzip and plain) + let mut match_count = 0u64; + for_each_line_streaming(file_path, |trimmed| { + if trimmed.contains(query) { + println!("{}", format_simple_match(trimmed, show_correlation_id)); + match_count += 1; + } + })?; + eprintln!("{} matching lines found", match_count); + return Ok(()); + } + + // Parallel: split plain file into chunks + let file_size = std::fs::metadata(file_path)?.len(); + let chunks = compute_chunks(file_size, num_threads); + + eprintln!( + "Searching with {} threads across {} chunks", + num_threads, + chunks.len() + ); + + let results: Vec, u64)>> = pool.install(|| { + chunks + .par_iter() + .map(|&(start, end)| { + let mut lines = Vec::new(); + let mut count = 0u64; + for_each_line_in_chunk(file_path, start, end, |trimmed| { + if trimmed.contains(query) { + lines.push(format_simple_match(trimmed, show_correlation_id)); + count += 1; + } + })?; + Ok((lines, count)) + }) + .collect() + }); + + let mut total = 0u64; + for result in results { + let (lines, count) = result?; + for line in lines { + println!("{}", line); + } + total += count; + } + + eprintln!("{} matching lines found", total); + Ok(()) +} + +// --- Expand mode --- + +#[derive(Default)] +struct Pass1Result { + seed_session_ids: HashSet, + seed_correlation_ids: HashSet, + change_session_map: HashMap, + sessions_with_signature: HashSet, + /// Maps normalized session ID to app name from its signature line + session_app_map: HashMap, + /// Maps seed correlation ID to the session ID from the same line (for app filtering) + seed_cid_sessions: HashMap, + line_count: u64, +} + +impl Pass1Result { + fn merge(mut self, other: Pass1Result) -> Self { + self.seed_session_ids.extend(other.seed_session_ids); + self.seed_correlation_ids.extend(other.seed_correlation_ids); + self.change_session_map.extend(other.change_session_map); + self.sessions_with_signature + .extend(other.sessions_with_signature); + self.session_app_map.extend(other.session_app_map); + self.seed_cid_sessions.extend(other.seed_cid_sessions); + self.line_count += other.line_count; + self + } +} + +fn process_line_pass1(trimmed: &str, query: &str, result: &mut Pass1Result) { + let sid = SESSION_ID_RE + .captures(trimmed) + .and_then(|c| c.get(1)) + .map(|m| normalize_session_id(m.as_str())); + + // Detect signature lines using broad regex (matches both msg="signature:APP/..." + // and msg="MOBILE_CLIENT_LOG: signature:APP/...") + if let Some(sig_caps) = SIGNATURE_APP_RE.captures(trimmed) { + let app = sig_caps.get(1).unwrap().as_str(); + if let Some(s) = sid { + result.sessions_with_signature.insert(s.to_string()); + result + .session_app_map + .insert(s.to_string(), app.to_string()); + } + } + + if trimmed.contains("changeSessionId:") + && let Some(caps) = CHANGE_SESSION_RE.captures(trimmed) + { + let new_sid = normalize_session_id(caps.get(1).unwrap().as_str()).to_string(); + let old_sid = normalize_session_id(caps.get(2).unwrap().as_str()).to_string(); + result.change_session_map.insert(new_sid, old_sid); + } + + if trimmed.contains(query) { + if let Some(s) = sid + && s != "noSession" + { + result.seed_session_ids.insert(s.to_string()); + // Track which correlation IDs belong to which sessions (for app filtering) + if let Some(cid) = CORRELATION_ID_RE + .captures(trimmed) + .and_then(|c| c.get(1)) + { + result + .seed_cid_sessions + .insert(cid.as_str().to_string(), s.to_string()); + } + } + if let Some(cid) = CORRELATION_ID_RE + .captures(trimmed) + .and_then(|c| c.get(1)) + { + result + .seed_correlation_ids + .insert(cid.as_str().to_string()); + } + } + + result.line_count += 1; +} + +fn run_search_expanded( + file_path: &str, + query: &str, + pool: &rayon::ThreadPool, +) -> Result<()> { + let num_threads = pool.current_num_threads(); + let use_parallel = can_parallelize(file_path, num_threads); + + // Pass 1: collect metadata + let pass1 = run_pass1(file_path, query, use_parallel, pool)?; + + // Expansion (in-memory graph traversal — inherently sequential) + let (expanded_sids, expanded_cids) = expand_seeds( + &pass1.seed_session_ids, + &pass1.seed_correlation_ids, + &pass1.change_session_map, + &pass1.sessions_with_signature, + ); + + if expanded_sids.is_empty() && expanded_cids.is_empty() { + eprintln!("0 matching lines found (no sessions or correlations to expand)"); + return Ok(()); + } + + eprintln!( + "Expanding: {} session IDs, {} correlation IDs", + expanded_sids.len(), + expanded_cids.len() + ); + + // Pass 2: filter and print (re-reads file; for gzip this re-decompresses from stream) + let match_count = run_pass2(file_path, query, &expanded_sids, &expanded_cids, use_parallel, pool, false)?; + + eprintln!("{} lines output", match_count); + Ok(()) +} + +fn run_pass1( + file_path: &str, + query: &str, + use_parallel: bool, + pool: &rayon::ThreadPool, +) -> Result { + if !use_parallel { + eprintln!("Pass 1: scanning sequentially..."); + let mut result = Pass1Result::default(); + for_each_line_streaming(file_path, |trimmed| { + process_line_pass1(trimmed, query, &mut result); + })?; + eprintln!( + "Pass 1 complete: {} lines, {} seed sessions, {} seed correlations, {} session changes", + result.line_count, + result.seed_session_ids.len(), + result.seed_correlation_ids.len(), + result.change_session_map.len() + ); + return Ok(result); + } + + let file_size = std::fs::metadata(file_path)?.len(); + let num_threads = pool.current_num_threads(); + let chunks = compute_chunks(file_size, num_threads); + + eprintln!("Pass 1: scanning with {} threads...", chunks.len()); + + let results: Vec> = pool.install(|| { + chunks + .par_iter() + .map(|&(start, end)| { + let mut chunk_result = Pass1Result::default(); + for_each_line_in_chunk(file_path, start, end, |trimmed| { + process_line_pass1(trimmed, query, &mut chunk_result); + })?; + Ok(chunk_result) + }) + .collect() + }); + + let mut merged = Pass1Result::default(); + for r in results { + merged = merged.merge(r?); + } + + eprintln!( + "Pass 1 complete: {} lines, {} seed sessions, {} seed correlations, {} session changes", + merged.line_count, + merged.seed_session_ids.len(), + merged.seed_correlation_ids.len(), + merged.change_session_map.len() + ); + + Ok(merged) +} + +/// Expand seed session IDs by following changeSessionId chains backward. +/// Stops recursion when an old session has a signature line (session start). +fn expand_seeds( + seed_sids: &HashSet, + seed_cids: &HashSet, + change_map: &HashMap, + sig_sessions: &HashSet, +) -> (HashSet, HashSet) { + let mut expanded_sids = seed_sids.clone(); + let mut work_queue: Vec = seed_sids.iter().cloned().collect(); + + while let Some(current) = work_queue.pop() { + if let Some(old_sid) = change_map.get(¤t) + && expanded_sids.insert(old_sid.clone()) + { + // Newly added — include its lines. + // If it has a signature, stop recursing from it. + if !sig_sessions.contains(old_sid) { + work_queue.push(old_sid.clone()); + } + } + } + + (expanded_sids, seed_cids.clone()) +} + +fn format_pass2_match( + trimmed: &str, + query: &str, + expanded_sids: &HashSet, + expanded_cids: &HashSet, + strict_app_isolation: bool, +) -> Option { + let is_direct_match = trimmed.contains(query); + + let sid = SESSION_ID_RE + .captures(trimmed) + .and_then(|c| c.get(1)) + .map(|m| normalize_session_id(m.as_str())); + + let cid = CORRELATION_ID_RE + .captures(trimmed) + .and_then(|c| c.get(1)) + .map(|m| m.as_str()); + + let sid_match = sid.is_some_and(|s| expanded_sids.contains(s)); + let cid_match = cid.is_some_and(|c| expanded_cids.contains(c)); + + // When strict_app_isolation is enabled (search-exceptions), a CID match + // alone is not enough — the line's session must also be in the filtered set + // (or absent). This prevents leaking lines from non-matching apps that + // happen to share a correlation ID. + let effective_cid_match = + cid_match && !(strict_app_isolation && sid.is_some() && !sid_match); + + if !is_direct_match && !sid_match && !effective_cid_match { + return None; + } + + let timestamp = SYSLOG_TIMESTAMP_RE + .captures(trimmed) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap_or("?"); + + let msg = MSG_RE + .captures(trimmed) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap_or(""); + + let sid_display = sid.unwrap_or("-"); + let cid_display = cid.unwrap_or("-"); + let prefix = if is_direct_match { "*" } else { " " }; + + Some(format!( + "{} [{}] [cid:{}] [sid:{}] {}", + prefix, timestamp, cid_display, sid_display, msg + )) +} + +/// Events collected per-line during pass2 parallel chunk processing. +/// Preserves file order so the assembly phase can stop at sessionDestroyed. +enum Pass2Event { + /// Line matched and should be output. + Match(String), + /// An expanded session was destroyed (no output for this line). + Destroy(String), + /// Line matched AND an expanded session was destroyed on this line. + MatchAndDestroy(String, String), +} + +/// Check whether a line destroys one of the expanded sessions. +fn check_session_destroyed(trimmed: &str, expanded_sids: &HashSet) -> Option { + if trimmed.contains("sessionDestroyed") + && let Some(caps) = SESSION_DESTROYED_RE.captures(trimmed) + { + let sid = normalize_session_id(caps.get(1).unwrap().as_str()); + if expanded_sids.contains(sid) { + return Some(sid.to_string()); + } + } + None +} + +fn run_pass2( + file_path: &str, + query: &str, + expanded_sids: &HashSet, + expanded_cids: &HashSet, + use_parallel: bool, + pool: &rayon::ThreadPool, + strict_app_isolation: bool, +) -> Result { + if !use_parallel { + eprintln!("Pass 2: filtering sequentially..."); + let mut count = 0u64; + let mut reader = read_log_file(file_path)?; + let mut line = String::new(); + let mut remaining_sids: HashSet<&str> = + expanded_sids.iter().map(|s| s.as_str()).collect(); + + loop { + line.clear(); + if reader.read_line(&mut line)? == 0 { + break; + } + let trimmed = line.trim_end(); + + // Check for sessionDestroyed + if trimmed.contains("sessionDestroyed") + && let Some(caps) = SESSION_DESTROYED_RE.captures(trimmed) + { + let destroyed_sid = normalize_session_id(caps.get(1).unwrap().as_str()); + if remaining_sids.remove(destroyed_sid) { + // Still output the sessionDestroyed line itself if it matches + if let Some(formatted) = + format_pass2_match(trimmed, query, expanded_sids, expanded_cids, strict_app_isolation) + { + println!("{}", formatted); + count += 1; + } + if remaining_sids.is_empty() { + eprintln!( + "All {} expanded sessions destroyed, stopping early", + expanded_sids.len() + ); + break; + } + continue; + } + } + + if let Some(formatted) = + format_pass2_match(trimmed, query, expanded_sids, expanded_cids, strict_app_isolation) + { + println!("{}", formatted); + count += 1; + } + } + return Ok(count); + } + + let file_size = std::fs::metadata(file_path)?.len(); + let num_threads = pool.current_num_threads(); + let chunks = compute_chunks(file_size, num_threads); + + eprintln!("Pass 2: filtering with {} threads...", chunks.len()); + + // Each chunk collects events preserving file order so the assembly + // phase can apply the same sessionDestroyed stop logic as sequential. + let results: Vec>> = pool.install(|| { + chunks + .par_iter() + .map(|&(start, end)| { + let mut events = Vec::new(); + for_each_line_in_chunk(file_path, start, end, |trimmed| { + let formatted = format_pass2_match( + trimmed, query, expanded_sids, expanded_cids, strict_app_isolation, + ); + let destroyed = check_session_destroyed(trimmed, expanded_sids); + match (formatted, destroyed) { + (Some(f), Some(d)) => events.push(Pass2Event::MatchAndDestroy(f, d)), + (Some(f), None) => events.push(Pass2Event::Match(f)), + (None, Some(d)) => events.push(Pass2Event::Destroy(d)), + (None, None) => {} + } + })?; + Ok(events) + }) + .collect() + }); + + // Assemble in chunk order, stopping when all expanded sessions are destroyed. + let mut remaining_sids: HashSet<&str> = + expanded_sids.iter().map(|s| s.as_str()).collect(); + let mut total = 0u64; + 'outer: for result in results { + let events = result?; + for event in events { + match event { + Pass2Event::Match(line) => { + println!("{}", line); + total += 1; + } + Pass2Event::Destroy(sid) => { + remaining_sids.remove(sid.as_str()); + if remaining_sids.is_empty() { + break 'outer; + } + } + Pass2Event::MatchAndDestroy(line, sid) => { + println!("{}", line); + total += 1; + remaining_sids.remove(sid.as_str()); + if remaining_sids.is_empty() { + break 'outer; + } + } + } + } + } + + Ok(total) +} + +// --- search_exceptions --- + +/// Filter expanded session IDs to only those in chains containing a matching-app signature. +/// Builds a reverse change_session_map (old → [new]) and propagates forward from matching roots. +fn filter_expanded_by_app( + expanded_sids: &HashSet, + change_map: &HashMap, + session_app_map: &HashMap, + app_filters: &[String], +) -> HashSet { + // Build reverse map: old → [new1, new2, ...] + let mut reverse_map: HashMap<&str, Vec<&str>> = HashMap::new(); + for (new_sid, old_sid) in change_map { + reverse_map + .entry(old_sid.as_str()) + .or_default() + .push(new_sid.as_str()); + } + + // Find all sessions with any matching app + let matching_roots: Vec<&str> = session_app_map + .iter() + .filter(|(_, app)| app_filters.iter().any(|f| f == app.as_str())) + .map(|(sid, _)| sid.as_str()) + .collect(); + + // Propagate forward from matching roots through the reverse map + let mut matching_sessions: HashSet = HashSet::new(); + let mut work_queue: Vec<&str> = matching_roots; + while let Some(current) = work_queue.pop() { + if !matching_sessions.insert(current.to_string()) { + continue; + } + if let Some(nexts) = reverse_map.get(current) { + work_queue.extend(nexts.iter().copied()); + } + } + + // Intersect with expanded_sids + expanded_sids + .intersection(&matching_sessions) + .cloned() + .collect() +} + +pub fn run_search_exceptions(file_path: &str, app_filters: &[String], threads: usize) -> Result<()> { + let pool = build_thread_pool(threads)?; + let query = "Exception"; + let num_threads = pool.current_num_threads(); + let use_parallel = can_parallelize(file_path, num_threads); + + // Pass 1: collect metadata + let pass1 = run_pass1(file_path, query, use_parallel, &pool)?; + + // Expand seeds + let (expanded_sids, expanded_cids) = expand_seeds( + &pass1.seed_session_ids, + &pass1.seed_correlation_ids, + &pass1.change_session_map, + &pass1.sessions_with_signature, + ); + + // Filter by app + let filtered_sids = filter_expanded_by_app( + &expanded_sids, + &pass1.change_session_map, + &pass1.session_app_map, + app_filters, + ); + + // Filter correlation IDs: keep only those whose seed line's session is in the filtered set + let filtered_cids: HashSet = expanded_cids + .iter() + .filter(|cid| { + pass1 + .seed_cid_sessions + .get(cid.as_str()) + .is_some_and(|sid| filtered_sids.contains(sid)) + }) + .cloned() + .collect(); + + if filtered_sids.is_empty() && filtered_cids.is_empty() { + eprintln!( + "0 matching lines found (no sessions matching apps {:?})", + app_filters + ); + return Ok(()); + } + + eprintln!( + "Expanding: {} session IDs (filtered from {}), {} correlation IDs (filtered from {})", + filtered_sids.len(), + expanded_sids.len(), + filtered_cids.len(), + expanded_cids.len() + ); + + // Pass 2: filter and print (strict isolation prevents CID leaking across apps) + let match_count = run_pass2( + file_path, + query, + &filtered_sids, + &filtered_cids, + use_parallel, + &pool, + true, + )?; + + eprintln!("{} lines output", match_count); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn test_syslog_timestamp_extraction() { + let line = r#"Jan 27 17:21:17 a.b.c.d m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, msg="hello""#; + let caps = SYSLOG_TIMESTAMP_RE.captures(line).unwrap(); + assert_eq!(caps.get(1).unwrap().as_str(), "Jan 27 17:21:17"); + } + + #[test] + fn test_msg_extraction() { + let line = r#"some prefix msg="getUnreadFilesCount(externalUserId=abc123)", ex=""#; + let caps = MSG_RE.captures(line).unwrap(); + assert_eq!( + caps.get(1).unwrap().as_str(), + "getUnreadFilesCount(externalUserId=abc123)" + ); + } + + #[test] + fn test_full_line_extraction() { + let line = r#"Jan 27 17:21:17 a.b.c.d m1s-kv dt="2026-01-27 17:21:17,524", ll=INFO, lc=com.a.b.c.d.e.v5.endpoint.f, threadId=183, externalUserId=null, clientIp=1.1.1.1, xsrfToken=null, correlationId=abcd, sessionId=noSession, securityContext=CA_LOGGED_IN, userId=123, request_id=[(null)]snoSessio.abc, msg="getUnreadFilesCount(externalUserId=aaaaa,externalTeamSafeIds=bbbbb)", ex=""#; + + let ts = SYSLOG_TIMESTAMP_RE + .captures(line) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap(); + assert_eq!(ts, "Jan 27 17:21:17"); + + let msg = MSG_RE + .captures(line) + .map(|c| c.get(1).unwrap().as_str()) + .unwrap(); + assert_eq!( + msg, + "getUnreadFilesCount(externalUserId=aaaaa,externalTeamSafeIds=bbbbb)" + ); + } + + #[test] + fn test_no_timestamp() { + let line = r#"some garbage line without proper timestamp msg="hello""#; + assert!(SYSLOG_TIMESTAMP_RE.captures(line).is_none()); + } + + #[test] + fn test_no_msg() { + let line = "Jan 27 17:21:17 some line without msg field"; + assert!(MSG_RE.captures(line).is_none()); + } + + // --- normalize_session_id tests --- + + #[test] + fn test_normalize_session_id_with_node_suffix() { + assert_eq!( + normalize_session_id("DF080BBD8D5E954C642F6C3B5639D6EE.node011"), + "DF080BBD8D5E954C642F6C3B5639D6EE" + ); + } + + #[test] + fn test_normalize_session_id_without_suffix() { + assert_eq!( + normalize_session_id("DF080BBD8D5E954C642F6C3B5639D6EE"), + "DF080BBD8D5E954C642F6C3B5639D6EE" + ); + } + + #[test] + fn test_normalize_session_id_no_session() { + assert_eq!(normalize_session_id("noSession"), "noSession"); + } + + #[test] + fn test_normalize_session_id_non_node_dot() { + assert_eq!(normalize_session_id("some.session.id"), "some.session.id"); + } + + // --- CHANGE_SESSION_RE tests --- + + #[test] + fn test_change_session_id_regex() { + let line = r#"msg="changeSessionId: newSessionId: sDF080BBD / DF080BBD8D5E954C642F6C3B5639D6EE.node011 replaces oldSessionId: sF9EE9D52 / F9EE9D52FDB4502EB5CE6FFA24194AFD.node011""#; + let caps = CHANGE_SESSION_RE.captures(line).unwrap(); + assert_eq!( + caps.get(1).unwrap().as_str(), + "DF080BBD8D5E954C642F6C3B5639D6EE.node011" + ); + assert_eq!( + caps.get(2).unwrap().as_str(), + "F9EE9D52FDB4502EB5CE6FFA24194AFD.node011" + ); + } + + // --- SESSION_ID_RE tests --- + + #[test] + fn test_session_id_extraction() { + let line = "sessionId=ABC123DEF456.node005, something else"; + let caps = SESSION_ID_RE.captures(line).unwrap(); + assert_eq!(caps.get(1).unwrap().as_str(), "ABC123DEF456.node005"); + } + + // --- expand_seeds tests --- + + #[test] + fn test_expand_seeds_no_chain() { + let seed_sids: HashSet = ["A".to_string()].into(); + let seed_cids: HashSet = ["c1".to_string()].into(); + let change_map = HashMap::new(); + let sig_sessions = HashSet::new(); + + let (sids, cids) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions); + assert_eq!(sids, seed_sids); + assert_eq!(cids, seed_cids); + } + + #[test] + fn test_expand_seeds_single_chain() { + // B replaced A (A is old, B is new). Seed is B. + let seed_sids: HashSet = ["B".to_string()].into(); + let seed_cids: HashSet = HashSet::new(); + let change_map: HashMap = [("B".to_string(), "A".to_string())].into(); + let sig_sessions: HashSet = ["A".to_string()].into(); + + let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions); + assert!(sids.contains("A")); + assert!(sids.contains("B")); + assert_eq!(sids.len(), 2); + } + + #[test] + fn test_expand_seeds_multi_hop_chain() { + // C replaced B, B replaced A. Seed is C. + let seed_sids: HashSet = ["C".to_string()].into(); + let seed_cids: HashSet = HashSet::new(); + let change_map: HashMap = [ + ("C".to_string(), "B".to_string()), + ("B".to_string(), "A".to_string()), + ] + .into(); + let sig_sessions: HashSet = ["A".to_string()].into(); + + let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions); + assert!(sids.contains("A")); + assert!(sids.contains("B")); + assert!(sids.contains("C")); + assert_eq!(sids.len(), 3); + } + + #[test] + fn test_expand_seeds_stops_at_signature() { + // D replaced C, C replaced B, B replaced A. B has signature. Seed is D. + let seed_sids: HashSet = ["D".to_string()].into(); + let seed_cids: HashSet = HashSet::new(); + let change_map: HashMap = [ + ("D".to_string(), "C".to_string()), + ("C".to_string(), "B".to_string()), + ("B".to_string(), "A".to_string()), + ] + .into(); + let sig_sessions: HashSet = ["B".to_string()].into(); + + let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions); + assert!(sids.contains("D")); + assert!(sids.contains("C")); + assert!(sids.contains("B")); + assert!(!sids.contains("A")); + assert_eq!(sids.len(), 3); + } + + #[test] + fn test_expand_seeds_cycle_protection() { + // A -> B -> A (cycle) + let seed_sids: HashSet = ["A".to_string()].into(); + let seed_cids: HashSet = HashSet::new(); + let change_map: HashMap = [ + ("A".to_string(), "B".to_string()), + ("B".to_string(), "A".to_string()), + ] + .into(); + let sig_sessions: HashSet = HashSet::new(); + + let (sids, _) = expand_seeds(&seed_sids, &seed_cids, &change_map, &sig_sessions); + assert!(sids.contains("A")); + assert!(sids.contains("B")); + assert_eq!(sids.len(), 2); + } + + // --- Integration test --- + + #[test] + fn test_expand_integration() -> Result<()> { + let dir = tempfile::tempdir()?; + let log_path = dir.path().join("test.log"); + let mut file = std::fs::File::create(&log_path)?; + + // Line 1: signature for session A + writeln!( + file, + r#"Jan 01 00:00:01 host app dt="2026-01-01 00:00:01,000", sessionId=AAAA.node001, correlationId=c1, msg="signature:APP/1.0/ details:OS:1""# + )?; + // Line 2: normal line for session A, matches query + writeln!( + file, + r#"Jan 01 00:00:02 host app dt="2026-01-01 00:00:02,000", sessionId=AAAA.node001, correlationId=c1, msg="findme something""# + )?; + // Line 3: changeSessionId: B replaces A + writeln!( + file, + r#"Jan 01 00:00:03 host app dt="2026-01-01 00:00:03,000", sessionId=BBBB.node001, correlationId=c2, msg="changeSessionId: newSessionId: sBBBB / BBBB.node001 replaces oldSessionId: sAAAA / AAAA.node001""# + )?; + // Line 4: normal line for session B + writeln!( + file, + r#"Jan 01 00:00:04 host app dt="2026-01-01 00:00:04,000", sessionId=BBBB.node001, correlationId=c2, msg="some other action""# + )?; + // Line 5: unrelated session X + writeln!( + file, + r#"Jan 01 00:00:05 host app dt="2026-01-01 00:00:05,000", sessionId=XXXX.node002, correlationId=c9, msg="unrelated""# + )?; + + // "findme" matches line 2 (session AAAA, correlation c1). + // Session AAAA is in seeds. No changeSessionId has AAAA as new → no backward expansion. + // Correlation c1 is in seeds. + // Expected output: lines 1 and 2 (both have session AAAA or correlation c1). + // Lines 3,4 (session BBBB) should NOT be included (AAAA is old, not new). + // Line 5 (session XXXX) should NOT be included. + run_search(log_path.to_str().unwrap(), "findme", false, true, 1)?; + Ok(()) + } + + #[test] + fn test_expand_follows_change_session_backward() -> Result<()> { + let dir = tempfile::tempdir()?; + let log_path = dir.path().join("test.log"); + let mut file = std::fs::File::create(&log_path)?; + + // Line 1: signature for session OLD + writeln!( + file, + r#"Jan 01 00:00:01 host app dt="2026-01-01 00:00:01,000", sessionId=OLD.node001, correlationId=c0, msg="signature:APP/1.0/ details:OS:1""# + )?; + // Line 2: normal line for session OLD + writeln!( + file, + r#"Jan 01 00:00:02 host app dt="2026-01-01 00:00:02,000", sessionId=OLD.node001, correlationId=c1, msg="doing stuff""# + )?; + // Line 3: changeSessionId: NEW replaces OLD + writeln!( + file, + r#"Jan 01 00:00:03 host app dt="2026-01-01 00:00:03,000", sessionId=NEW.node001, correlationId=c2, msg="changeSessionId: newSessionId: sNEW / NEW.node001 replaces oldSessionId: sOLD / OLD.node001""# + )?; + // Line 4: normal line for session NEW, matches query + writeln!( + file, + r#"Jan 01 00:00:04 host app dt="2026-01-01 00:00:04,000", sessionId=NEW.node001, correlationId=c3, msg="findme in new session""# + )?; + + // "findme" matches line 4 (session NEW). changeSessionId maps NEW→OLD. + // OLD has a signature → include OLD but stop recursing. + // Expected: lines 1-4 all included (sessions OLD and NEW). + run_search(log_path.to_str().unwrap(), "findme", false, true, 1)?; + Ok(()) + } + + // --- Chunk boundary tests --- + + #[test] + fn test_chunk_boundary_no_lost_lines() -> Result<()> { + // Write lines of known byte sizes, then split exactly on a line boundary + let dir = tempfile::tempdir()?; + let path = dir.path().join("test.log"); + let mut f = std::fs::File::create(&path)?; + // 3 lines: "line1\n", "line2\n", "line3\n" (6 bytes each) + write!(f, "line1\nline2\nline3\n")?; + drop(f); + + // Split at byte 6 (exactly between line1 and line2) + let mut collected = Vec::new(); + for_each_line_in_chunk(path.to_str().unwrap(), 0, 6, |l| { + collected.push(l.to_string()); + })?; + for_each_line_in_chunk(path.to_str().unwrap(), 6, 18, |l| { + collected.push(l.to_string()); + })?; + assert_eq!(collected, vec!["line1", "line2", "line3"]); + Ok(()) + } + + #[test] + fn test_chunk_boundary_mid_line() -> Result<()> { + let dir = tempfile::tempdir()?; + let path = dir.path().join("test.log"); + let mut f = std::fs::File::create(&path)?; + write!(f, "line1\nline2\nline3\n")?; + drop(f); + + // Split at byte 3 (middle of "line1") + let mut collected = Vec::new(); + for_each_line_in_chunk(path.to_str().unwrap(), 0, 3, |l| { + collected.push(l.to_string()); + })?; + for_each_line_in_chunk(path.to_str().unwrap(), 3, 18, |l| { + collected.push(l.to_string()); + })?; + assert_eq!(collected, vec!["line1", "line2", "line3"]); + Ok(()) + } + + #[test] + fn test_chunk_four_way_split() -> Result<()> { + let dir = tempfile::tempdir()?; + let path = dir.path().join("test.log"); + let mut f = std::fs::File::create(&path)?; + for i in 0..20 { + writeln!(f, "line {:02}", i)?; + } + drop(f); + + let file_size = std::fs::metadata(path.as_path())?.len(); + let chunks = compute_chunks(file_size, 4); + + let mut collected = Vec::new(); + for (start, end) in chunks { + for_each_line_in_chunk(path.to_str().unwrap(), start, end, |l| { + collected.push(l.to_string()); + })?; + } + let expected: Vec = (0..20).map(|i| format!("line {:02}", i)).collect(); + assert_eq!(collected, expected); + Ok(()) + } + + // --- Regression: thread-consistency (P1) --- + // Both sequential and parallel pass2 must stop at sessionDestroyed + // and produce identical results regardless of thread count. + + #[test] + fn test_pass2_stops_at_session_destroyed() -> Result<()> { + let dir = tempfile::tempdir()?; + let log_path = dir.path().join("test.log"); + let mut file = std::fs::File::create(&log_path)?; + + // Session AAAA with signature + writeln!( + file, + r#"Jan 01 00:00:01 host app sessionId=AAAA.node001, correlationId=c1, msg="signature:APP_A/1.0/ details:OS:1""# + )?; + // Line that matches query, with CID c1 + writeln!( + file, + r#"Jan 01 00:00:02 host app sessionId=AAAA.node001, correlationId=c1, msg="findme error""# + )?; + // Session destroyed for AAAA + writeln!( + file, + r#"Jan 01 00:00:03 host app msg="sessionDestroyed #s=1 sid=AAAA.node001 isnew=false""# + )?; + // Post-destroy line with same CID c1 — must NOT be included + writeln!( + file, + r#"Jan 01 00:00:04 host app sessionId=noSession, correlationId=c1, msg="async callback after destroy""# + )?; + + // Run pass1 + expand + let pool = build_thread_pool(1)?; + let pass1 = run_pass1(log_path.to_str().unwrap(), "findme", false, &pool)?; + let (expanded_sids, expanded_cids) = expand_seeds( + &pass1.seed_session_ids, + &pass1.seed_correlation_ids, + &pass1.change_session_map, + &pass1.sessions_with_signature, + ); + + assert!(expanded_cids.contains("c1")); + + // Sequential: lines 1 (sid match) + 2 (query+sid+cid) = 2, then + // line 3 destroys AAAA (the only expanded session) → stop. + // Line 4 must NOT be included. + let seq_count = run_pass2( + log_path.to_str().unwrap(), + "findme", + &expanded_sids, + &expanded_cids, + false, + &pool, + false, + )?; + assert_eq!(seq_count, 2, "sequential pass2 must stop at sessionDestroyed"); + + // Parallel: must produce the same count. + let par_pool = build_thread_pool(2)?; + let par_count = run_pass2( + log_path.to_str().unwrap(), + "findme", + &expanded_sids, + &expanded_cids, + true, + &par_pool, + false, + )?; + assert_eq!(par_count, seq_count, "parallel pass2 must match sequential"); + + Ok(()) + } + + #[test] + fn test_session_destroyed_regex() { + let line = "sessionDestroyed #s=8 sid=2010F74498079D00A5647F3777545A64.node003 isnew=false age=693s last=644s attrs=loginState,lastRequest,userSessionData"; + let caps = SESSION_DESTROYED_RE.captures(line).unwrap(); + assert_eq!( + caps.get(1).unwrap().as_str(), + "2010F74498079D00A5647F3777545A64.node003" + ); + } + + // --- Regression: strict app isolation (P1) --- + // search-exceptions --app APP_A must not include lines from APP_B even when + // they share a correlation ID with an APP_A session. + + #[test] + fn test_search_exceptions_strict_app_isolation() -> Result<()> { + let dir = tempfile::tempdir()?; + let log_path = dir.path().join("test.log"); + let mut file = std::fs::File::create(&log_path)?; + + // APP_A session + writeln!( + file, + r#"Jan 01 00:00:01 host app sessionId=AAAA.node001, correlationId=c1, msg="signature:APP_A/1.0/ details:OS:1""# + )?; + // APP_A Exception line with shared CID + writeln!( + file, + r#"Jan 01 00:00:02 host app sessionId=AAAA.node001, correlationId=shared_cid, msg="Exception in APP_A""# + )?; + // APP_B session with same shared CID + writeln!( + file, + r#"Jan 01 00:00:03 host app sessionId=BBBB.node001, correlationId=c2, msg="signature:APP_B/2.0/ details:OS:1""# + )?; + writeln!( + file, + r#"Jan 01 00:00:04 host app sessionId=BBBB.node001, correlationId=shared_cid, msg="handling request from APP_B""# + )?; + + // Run the full pipeline as search-exceptions does + let pool = build_thread_pool(1)?; + let query = "Exception"; + let pass1 = run_pass1(log_path.to_str().unwrap(), query, false, &pool)?; + + let (expanded_sids, expanded_cids) = expand_seeds( + &pass1.seed_session_ids, + &pass1.seed_correlation_ids, + &pass1.change_session_map, + &pass1.sessions_with_signature, + ); + + let app_filters = vec!["APP_A".to_string()]; + let filtered_sids = filter_expanded_by_app( + &expanded_sids, + &pass1.change_session_map, + &pass1.session_app_map, + &app_filters, + ); + + let filtered_cids: HashSet = expanded_cids + .iter() + .filter(|cid| { + pass1 + .seed_cid_sessions + .get(cid.as_str()) + .is_some_and(|sid| filtered_sids.contains(sid)) + }) + .cloned() + .collect(); + + // shared_cid should be in filtered_cids (it came from APP_A's session) + assert!(filtered_cids.contains("shared_cid")); + // BBBB should NOT be in filtered_sids + assert!(!filtered_sids.contains("BBBB")); + + // Run pass2 with strict_app_isolation=true + let count = run_pass2( + log_path.to_str().unwrap(), + query, + &filtered_sids, + &filtered_cids, + false, + &pool, + true, + )?; + + // Line 1 (sig for APP_A, sid match) = included + // Line 2 (Exception, sid+cid match) = included + // Line 3 (sig for APP_B, sid NOT in filtered) = excluded + // Line 4 (APP_B line, cid=shared_cid but sid=BBBB not in filtered) = excluded by strict isolation + assert_eq!(count, 2, "APP_B lines must not leak through shared CID"); + Ok(()) + } +}