name: Online search for vulns on: schedule: - cron: '42 8 * * *' workflow_dispatch: inputs: model: description: 'Claude model to use (cron runs default to Sonnet)' required: false type: choice default: claude-sonnet-4-6 options: - claude-sonnet-4-6 - claude-opus-4-7 - claude-haiku-4-5-20251001 window_hours: description: 'Lookback window in hours (cron runs use 25)' required: false type: string default: '25' reconsider_age_days: description: 'Only reconsider backlog entries last reviewed ≥ N days ago (0 = all, default 7)' required: false type: string default: '7' permissions: contents: read actions: read # needed to list/download previous run artifacts id-token: write # needed by claude-code-action for OIDC auth concurrency: group: vuln-watch cancel-in-progress: true jobs: watch: runs-on: ubuntu-latest timeout-minutes: 20 steps: # The scripts driving this workflow live on the `vuln-watch` branch so # they don't clutter master (which is what ships to production). The # workflow file itself MUST stay on the default branch, as GitHub only # honors `schedule:` triggers on the default branch. - name: Checkout vuln-watch branch (scripts + prompt) uses: actions/checkout@v5 with: ref: vuln-watch fetch-depth: 1 persist-credentials: false - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install Python dependencies run: python -m pip install --quiet feedparser # ---- Load previous state --------------------------------------------- # Find the most recent successful run of THIS workflow (other than the # current one) and pull its `vuln-watch-state` artifact. On the very # first run there will be none — that's fine, we start empty. - name: Find previous successful run id id: prev env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | set -e run_id=$(gh run list \ --workflow="${{ github.workflow }}" \ --status=success \ --limit 1 \ --json databaseId \ --jq '.[0].databaseId // empty') echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" if [ -n "$run_id" ]; then echo "Found previous successful run: $run_id" else echo "No previous successful run — starting from empty state." fi - name: Download previous state artifact if: steps.prev.outputs.run_id != '' uses: actions/download-artifact@v5 continue-on-error: true # tolerate retention expiry with: name: vuln-watch-state path: state/ run-id: ${{ steps.prev.outputs.run_id }} github-token: ${{ secrets.GITHUB_TOKEN }} # ---- Fetch + diff (token-free; runs every time) --------------------- # Performs conditional GETs (ETag / If-Modified-Since) against every # source, parses RSS/Atom/HTML, dedups against state.seen + state.aliases, # applies the time-window filter, and emits new_items.json. # Updates state.sources (HTTP cache metadata + per-source high-water # marks) in place so the cache survives even when Claude doesn't run. - name: Fetch + diff all sources id: diff env: SCAN_DATE: ${{ github.run_started_at }} # Cron runs have no `inputs` context, so the fallback kicks in. WINDOW_HOURS: ${{ inputs.window_hours || '25' }} RECONSIDER_AGE_DAYS: ${{ inputs.reconsider_age_days || '7' }} run: python -m scripts.vuln_watch.fetch_and_diff # ---- Fetch checker code so Claude can grep it for coverage --------- # The orphan vuln-watch branch has none of the actual checker code, # so we pull the `test` branch (the dev branch where coded-but- # unreleased CVE checks live) into ./checker/. The prompt tells # Claude this is the canonical source of truth for "is CVE-X already # implemented?". Only fetched on days with something to classify. - name: Checkout checker code (test branch) for coverage grep if: steps.diff.outputs.new_count != '0' || steps.diff.outputs.reconsider_count != '0' uses: actions/checkout@v5 with: ref: test path: checker fetch-depth: 1 persist-credentials: false # ---- Classify new items with Claude (skipped when nothing is new) --- # Model selection: a manual workflow_dispatch run picks from a dropdown # (defaulting to Sonnet). Scheduled cron runs have no `inputs` context, # so the `|| 'claude-sonnet-4-6'` fallback kicks in — cron always uses # Sonnet to keep the daily cost floor low. - name: Run classifier with Claude id: classify if: steps.diff.outputs.new_count != '0' || steps.diff.outputs.reconsider_count != '0' uses: anthropics/claude-code-action@v1 env: SCAN_DATE: ${{ github.run_started_at }} with: prompt: | Read the full task instructions from scripts/daily_vuln_watch_prompt.md and execute them end-to-end. Your input is new_items.json (already deduped, windowed, and pre-filtered — do NOT re-fetch sources). Write the three watch_${TODAY}_*.md files and classifications.json. Use $SCAN_DATE as the canonical timestamp. claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} # model + tool allowlist pass through claude_args (v1 dropped the # dedicated `model:` and `allowed_tools:` inputs). Job-level # `timeout-minutes: 20` above bounds total runtime. claude_args: | --model ${{ inputs.model || 'claude-sonnet-4-6' }} --allowedTools "Read,Write,Edit,Bash,Grep,Glob,WebFetch" - name: Upload Claude execution log if: ${{ always() && steps.classify.outputs.execution_file != '' }} uses: actions/upload-artifact@v5 with: name: claude-execution-log-${{ github.run_id }} path: ${{ steps.classify.outputs.execution_file }} retention-days: 30 if-no-files-found: warn # ---- Merge classifications back into state -------------------------- # Also writes stub watch_*.md files if the classify step was skipped, so # the report artifact is consistent across runs. - name: Merge classifications into state if: always() env: SCAN_DATE: ${{ github.run_started_at }} run: python -m scripts.vuln_watch.merge_state - name: Upload new state artifact if: always() uses: actions/upload-artifact@v5 with: name: vuln-watch-state path: state/seen.json retention-days: 90 if-no-files-found: error - name: Upload daily report if: always() uses: actions/upload-artifact@v5 with: name: vuln-watch-report-${{ github.run_id }} path: | watch_*.md current_toimplement.md current_tocheck.md new_items.json classifications.json retention-days: 90 if-no-files-found: warn