From 78a6e4a418e4eb2c9c49a55b927d966f8e507d3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Lesimple?= Date: Sun, 19 Apr 2026 09:14:21 +0000 Subject: [PATCH] chore: move cron vuln-watch workflow script files to their own branch --- .github/workflows/daily_vuln_scan_prompt.md | 199 -------------------- .github/workflows/vuln-scan.yml | 129 ------------- .github/workflows/vuln-watch.yml | 163 ++++++++++++++++ 3 files changed, 163 insertions(+), 328 deletions(-) delete mode 100644 .github/workflows/daily_vuln_scan_prompt.md delete mode 100644 .github/workflows/vuln-scan.yml create mode 100644 .github/workflows/vuln-watch.yml diff --git a/.github/workflows/daily_vuln_scan_prompt.md b/.github/workflows/daily_vuln_scan_prompt.md deleted file mode 100644 index 7c4eeda..0000000 --- a/.github/workflows/daily_vuln_scan_prompt.md +++ /dev/null @@ -1,199 +0,0 @@ -# Daily transient-execution vulnerability scan - -You are a scheduled agent running inside a GitHub Actions job. Your job -is to audit public news/advisory sources for **transient-execution and -CPU side-channel vulnerabilities** that may need to be added to -**spectre-meltdown-checker** (this repository). - -## What counts as "relevant" - -spectre-meltdown-checker detects, reports, and suggests mitigations for -CPU vulnerabilities such as: Spectre v1/v2/v4, Meltdown, Foreshadow/L1TF, -MDS (ZombieLoad/RIDL/Fallout), TAA, SRBDS, iTLB Multihit, Zenbleed, -Downfall (GDS), Retbleed, Inception, SRSO, BHI, RFDS, Reptar, FP-DSS, -and any similar microarchitectural side-channel or speculative-execution -issue on x86 (Intel/AMD) or ARM CPUs. It also surfaces related hardware -mitigation features (SMAP/SMEP/UMIP/IBPB/eIBRS/STIBP…) when they gate -the remediation for a tracked CVE. - -It does **not** track generic software CVEs, GPU driver bugs, networking -stacks, filesystem bugs, userspace crypto issues, or unrelated kernel -subsystems. - -## Inputs handed to you by the workflow - -- Working directory: the repo root (`/github/workspace` in Actions, or - wherever `actions/checkout` placed it). You may `grep` the repo to - check whether a CVE or codename is already covered. -- `state/seen.json` — memory carried over from the previous run, with - shape: - - ```json - { - "last_run": "2026-04-17T08:00:12Z", - "seen": { - "": { "bucket": "unrelated", "seen_at": "2026-04-17T08:00:12Z", "source": "phoronix" }, - "": { "bucket": "tocheck", "seen_at": "2026-04-17T08:00:12Z", "source": "oss-sec", "cve": "CVE-2026-1234" } - } - } - ``` - - On the very first run, or when the prior artifact has expired, - the file exists but `seen` is empty and `last_run` is `null`. - -- Environment: `SCAN_DATE` (ISO-8601 timestamp of the run start, set by - the workflow). Treat this as "now" for all time-window decisions. - -## Time window - -This is a belt-and-suspenders design — use **both** mechanisms: - -1. **Primary: stable-id dedup.** If an item's stable identifier (see - below) is already present in `state.seen`, skip it entirely — it - was classified on a previous day. -2. **Secondary: 25-hour window.** Among *new* items, prefer those whose - publication/update timestamp is within the last 25 h relative to - `SCAN_DATE`. This bounds work when the prior artifact expired - (90-day retention) or when `last_run` is stale (missed runs). - If `last_run` is older than 25 h, widen the window to - `now - last_run + 1h` so no items are lost across missed runs. -3. Items without a parseable timestamp: include them (fail-safe). - -## Sources to poll - -Fetch each URL with -`curl -sS -A "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" -L --max-time 20`. -On non-2xx or timeout, record the failure in the run summary and -continue — do not abort. - -### RSS / Atom feeds (primary — parse feed timestamps) - -| Short name | URL | -|-----------------|-----| -| phoronix | https://www.phoronix.com/rss.php | -| oss-sec | https://seclists.org/rss/oss-sec.rss | -| lwn | https://lwn.net/headlines/newrss | -| project-zero | https://googleprojectzero.blogspot.com/feeds/posts/default | -| vusec | https://www.vusec.net/feed/ | -| comsec-eth | https://comsec.ethz.ch/category/news/feed/ | -| msrc | https://msrc.microsoft.com/update-guide/rss | -| cisa | https://www.cisa.gov/cybersecurity-advisories/all.xml | -| cert-cc | https://www.kb.cert.org/vuls/atomfeed/ | - -### HTML pages (no RSS — fetch, extract dated entries) - -| Short name | URL | -|-----------------|-----| -| intel-psirt | https://www.intel.com/content/www/us/en/security-center/default.html | -| amd-psirt | https://www.amd.com/en/resources/product-security.html | -| arm-spec | https://developer.arm.com/Arm%20Security%20Center/Speculative%20Processor%20Vulnerability | -| transient-fail | https://transient.fail/ | - -For HTML pages: look for advisory tables or listings with dates. Extract -the advisory title, permalink, and date. If a page has no dates at all, -compare its content against `state.seen` — any new advisory IDs not yet -classified count as "new this run". - -## Stable identifier per source - -Use the first available of these, in order, as the dedup key: - -1. Vendor advisory ID (`INTEL-SA-01234`, `AMD-SB-7001`, `ARM-2024-0042`, - `VU#123456`, `CVE-YYYY-NNNNN`) -2. RSS `` / Atom `` -3. Permalink URL (``) - -Always also record the permalink URL in the output file so a human can -click through. - -## Classification rules - -For each **new** item (not in `state.seen`) that passes the time window, -pick exactly one bucket: - -- **toimplement** — a clearly-identified new transient-execution / CPU - side-channel vulnerability in scope, **and not already covered by - this repo**. Verify the second half by grepping the repo for the CVE - ID *and* the codename before classifying; if either matches existing - code, demote to `tocheck`. -- **tocheck** — plausibly in-scope but ambiguous: mitigation-only - feature (LASS, IBT, APIC-virt, etc.); item seemingly already - implemented but worth confirming scope; unclear applicability - (e.g. embedded-only ARM SKU); CVE-ID pending; contradictory info - across sources. State clearly what would resolve the ambiguity. -- **unrelated** — everything else. - -Tie-breakers: prefer `tocheck` over `unrelated` when uncertain. Prefer -`tocheck` over `toimplement` when the CVE ID is still "reserved" / -"pending" — false positives in `toimplement` waste human time more than -false positives in `tocheck`. - -## Outputs - -Compute `TODAY=$(date -u -d "$SCAN_DATE" +%F)`. Write these files under -the repo root, overwriting if they already exist (they shouldn't unless -the workflow re-ran the same day): - -- `rss_${TODAY}_toimplement.md` -- `rss_${TODAY}_tocheck.md` -- `rss_${TODAY}_unrelated.md` - -Each file uses level-2 headers per source short-name, then one bullet -per item: the stable ID (if any), the permalink URL, and 1–2 sentences. -Keep entries terse — a human skims these daily. - -```markdown -## oss-sec -- **CVE-2026-1234** — https://www.openwall.com/lists/oss-security/2026/04/18/3 - New Intel transient-execution bug "Foo" disclosed today; affects - Redwood Cove cores, microcode fix pending. Not yet covered by this - repo (grepped for CVE-2026-1234 and "Foo" — no matches). - -## phoronix -- https://www.phoronix.com/news/Some-Article - Linux 7.2 drops a compiler-target flag; unrelated to CPU side channels. -``` - -If a bucket has no items, write the file with a single line -`(no new items in this window)` so it is obvious the job ran. - -### Run summary - -Append this block to the **tocheck** file (creating it if empty): - -```markdown -## Run summary -- SCAN_DATE: -- window cutoff: -- prior state size: entries, last_run= -- per-source new item counts: phoronix=, oss-sec=, lwn=, ... -- fetch failures: -- total classified this run: toimplement=, tocheck=, unrelated= -``` - -### State update - -Rewrite `state/seen.json` with: - -- `last_run` = `SCAN_DATE` -- `seen` = union of (pruned prior `seen`) ∪ (all items classified this - run, keyed by stable ID, with `{bucket, seen_at=SCAN_DATE, source, cve?}`) - -Pruning (keep state bounded): drop any entry whose `seen_at` is older -than 30 days before `SCAN_DATE`. The workflow step also does this as -a safety net, but do it here too so the in-memory view is consistent. - -## Guardrails - -- Do NOT modify any repo source code. Only write the three markdown - output files and `state/seen.json`. -- Do NOT create commits, branches, or PRs. -- Do NOT call any tool that posts externally (Slack, GitHub comments, - issues, email, etc.). -- Do NOT follow links off-site for deeper investigation unless strictly - needed to resolve a `tocheck` ambiguity — budget of at most 5 such - follow-ups per run. -- If a source returns unexpectedly large content, truncate to the first - ~200 items before parsing. -- If total runtime exceeds 15 minutes, finish whatever you can, - write partial outputs, and note it in the run summary. diff --git a/.github/workflows/vuln-scan.yml b/.github/workflows/vuln-scan.yml deleted file mode 100644 index 62ca910..0000000 --- a/.github/workflows/vuln-scan.yml +++ /dev/null @@ -1,129 +0,0 @@ -name: Online search for vulns - -on: - schedule: - - cron: '42 8 * * *' - workflow_dispatch: {} # allow manual trigger - -permissions: - contents: read - actions: read # needed to list/download previous run artifacts - id-token: write # needed to mint OIDC token - -concurrency: - group: vuln-scan - cancel-in-progress: true - -jobs: - scan: - runs-on: ubuntu-latest - timeout-minutes: 20 - - steps: - - name: Checkout repository (for grep-based dedup against existing checks) - uses: actions/checkout@v5 - with: - fetch-depth: 1 - persist-credentials: false - - # ---- Load previous state --------------------------------------------- - # Find the most recent successful run of THIS workflow (other than the - # current one) and pull its `vuln-scan-state` artifact. On the very - # first run there will be none — that's fine, we start empty. - - name: Find previous successful run id - id: prev - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -e - run_id=$(gh run list \ - --workflow="${{ github.workflow }}" \ - --status=success \ - --limit 1 \ - --json databaseId \ - --jq '.[0].databaseId // empty') - echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" - if [ -n "$run_id" ]; then - echo "Found previous successful run: $run_id" - else - echo "No previous successful run — starting from empty state." - fi - - - name: Download previous state artifact - if: steps.prev.outputs.run_id != '' - uses: actions/download-artifact@v4 - continue-on-error: true # tolerate retention expiry - with: - name: vuln-scan-state - path: state/ - run-id: ${{ steps.prev.outputs.run_id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Ensure state file exists - run: | - mkdir -p state - if [ ! -f state/seen.json ]; then - echo '{"last_run": null, "seen": {}}' > state/seen.json - echo "Initialized empty state." - fi - echo "State size: $(wc -c < state/seen.json) bytes" - - # ---- Run the scan ---------------------------------------------------- - # Runs Claude Code against daily_vuln_scan_prompt.md. - # That prompt file fully specifies: sources to poll, how to read - # state/seen.json, the 25-hour window, the output files to write, - # and how to rewrite state/seen.json at the end of the run. - - name: Research for online mentions of new vulns - id: scan - uses: anthropics/claude-code-action@v1 - env: - SCAN_DATE: ${{ github.run_started_at }} - with: - claude_args: | - --model claude-opus-4-7 --allowedTools "Read,Write,Edit,Bash,Grep,Glob,WebFetch" - prompt: | - Read the full task instructions from .github/workflows/daily_vuln_scan_prompt.md and execute them end-to-end. That file fully specifies: sources to poll, how to read and update state/seen.json, the 25-hour window, which rss_YYYY-MM-DD_*.md files to write, and the run guardrails. Use $SCAN_DATE (env var) as "now" for time-window decisions. - claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - - - name: Upload Claude execution log - if: ${{ always() && steps.scan.outputs.execution_file != '' }} - uses: actions/upload-artifact@v4 - with: - name: claude-execution-log-${{ github.run_id }} - path: ${{ steps.scan.outputs.execution_file }} - retention-days: 30 - if-no-files-found: warn - - # ---- Persist outputs ------------------------------------------------- - - name: Prune state (keep only entries from the last 30 days) - run: | - python3 - <<'PY' - import json, datetime, pathlib - p = pathlib.Path("state/seen.json") - data = json.loads(p.read_text()) - cutoff = (datetime.datetime.utcnow() - datetime.timedelta(days=30)).isoformat() - before = len(data.get("seen", {})) - data["seen"] = { - k: v for k, v in data.get("seen", {}).items() - if v.get("seen_at", "9999") >= cutoff - } - after = len(data["seen"]) - p.write_text(json.dumps(data, indent=2, sort_keys=True)) - print(f"Pruned state: {before} -> {after} entries") - PY - - - name: Upload new state artifact - uses: actions/upload-artifact@v4 - with: - name: vuln-scan-state - path: state/seen.json - retention-days: 90 - if-no-files-found: error - - - name: Upload daily report - uses: actions/upload-artifact@v4 - with: - name: vuln-scan-report-${{ github.run_id }} - path: rss_*.md - retention-days: 90 - if-no-files-found: warn diff --git a/.github/workflows/vuln-watch.yml b/.github/workflows/vuln-watch.yml new file mode 100644 index 0000000..fe5b0bb --- /dev/null +++ b/.github/workflows/vuln-watch.yml @@ -0,0 +1,163 @@ +name: Online search for vulns + +on: + schedule: + - cron: '42 8 * * *' + workflow_dispatch: {} # allow manual trigger + +permissions: + contents: read + actions: read # needed to list/download previous run artifacts + id-token: write # needed by claude-code-action for OIDC auth + +concurrency: + group: vuln-watch + cancel-in-progress: true + +jobs: + watch: + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + # The scripts driving this workflow live on the `vuln-watch` branch so + # they don't clutter master (which is what ships to production). The + # workflow file itself MUST stay on the default branch, as GitHub only + # honors `schedule:` triggers on the default branch. + - name: Checkout vuln-watch branch (scripts + prompt) + uses: actions/checkout@v5 + with: + ref: vuln-watch + fetch-depth: 1 + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Python dependencies + run: python -m pip install --quiet feedparser + + # ---- Load previous state --------------------------------------------- + # Find the most recent successful run of THIS workflow (other than the + # current one) and pull its `vuln-watch-state` artifact. On the very + # first run there will be none — that's fine, we start empty. + - name: Find previous successful run id + id: prev + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -e + run_id=$(gh run list \ + --workflow="${{ github.workflow }}" \ + --status=success \ + --limit 1 \ + --json databaseId \ + --jq '.[0].databaseId // empty') + echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" + if [ -n "$run_id" ]; then + echo "Found previous successful run: $run_id" + else + echo "No previous successful run — starting from empty state." + fi + + - name: Download previous state artifact + if: steps.prev.outputs.run_id != '' + uses: actions/download-artifact@v5 + continue-on-error: true # tolerate retention expiry + with: + name: vuln-watch-state + path: state/ + run-id: ${{ steps.prev.outputs.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + # ---- Fetch + diff (token-free; runs every time) --------------------- + # Performs conditional GETs (ETag / If-Modified-Since) against every + # source, parses RSS/Atom/HTML, dedups against state.seen + state.aliases, + # applies the time-window filter, and emits new_items.json. + # Updates state.sources (HTTP cache metadata + per-source high-water + # marks) in place so the cache survives even when Claude doesn't run. + - name: Fetch + diff all sources + id: diff + env: + SCAN_DATE: ${{ github.run_started_at }} + run: python -m scripts.vuln_watch.fetch_and_diff + + # ---- Fetch checker code so Claude can grep it for coverage --------- + # The orphan vuln-watch branch has none of the actual checker code, + # so we pull the `test` branch (the dev branch where coded-but- + # unreleased CVE checks live) into ./checker/. The prompt tells + # Claude this is the canonical source of truth for "is CVE-X already + # implemented?". Only fetched on days with something to classify. + - name: Checkout checker code (test branch) for coverage grep + if: steps.diff.outputs.new_count != '0' + uses: actions/checkout@v5 + with: + ref: test + path: checker + fetch-depth: 1 + persist-credentials: false + + # ---- Classify new items with Claude (skipped when nothing is new) --- + - name: Run classifier with Claude Opus + id: classify + if: steps.diff.outputs.new_count != '0' + uses: anthropics/claude-code-action@v1 + env: + SCAN_DATE: ${{ github.run_started_at }} + with: + prompt: | + Read the full task instructions from scripts/daily_vuln_watch_prompt.md + and execute them end-to-end. Your input is new_items.json (already + deduped, windowed, and pre-filtered — do NOT re-fetch sources). + Write the three watch_${TODAY}_*.md files and classifications.json. + Use $SCAN_DATE as the canonical timestamp. + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + # model + tool allowlist now pass through claude_args (v1 dropped + # the dedicated `model:` and `allowed_tools:` inputs). Job-level + # `timeout-minutes: 20` above bounds total runtime. + claude_args: | + --model claude-opus-4-7 + --allowedTools "Read,Write,Edit,Bash,Grep,Glob,WebFetch" + + - name: Upload Claude execution log + if: ${{ always() && steps.classify.outputs.execution_file != '' }} + uses: actions/upload-artifact@v5 + with: + name: claude-execution-log-${{ github.run_id }} + path: ${{ steps.classify.outputs.execution_file }} + retention-days: 30 + if-no-files-found: warn + + # ---- Merge classifications back into state -------------------------- + # Also writes stub watch_*.md files if the classify step was skipped, so + # the report artifact is consistent across runs. + - name: Merge classifications into state + if: always() + env: + SCAN_DATE: ${{ github.run_started_at }} + run: python -m scripts.vuln_watch.merge_state + + - name: Upload new state artifact + if: always() + uses: actions/upload-artifact@v5 + with: + name: vuln-watch-state + path: state/seen.json + retention-days: 90 + if-no-files-found: error + + - name: Upload daily report + if: always() + uses: actions/upload-artifact@v5 + with: + name: vuln-watch-report-${{ github.run_id }} + path: | + watch_*.md + current_toimplement.md + current_tocheck.md + new_items.json + classifications.json + retention-days: 90 + if-no-files-found: warn