ci: Rework Claude review workflow to use CLI directly

author Daan De Meyer <daan@amutable.com>

Tue, 31 Mar 2026 19:18:12 +0000 (21:18 +0200)

committer Daan De Meyer <daan.j.demeyer@gmail.com>

Wed, 1 Apr 2026 08:34:54 +0000 (10:34 +0200)
author Daan De Meyer <daan@amutable.com>
Tue, 31 Mar 2026 19:18:12 +0000 (21:18 +0200)
committer Daan De Meyer <daan.j.demeyer@gmail.com>
Wed, 1 Apr 2026 08:34:54 +0000 (10:34 +0200)
diff --git a/.github/workflows/claude-review.yml b/.github/workflows/claude-review.yml

index f05ea14d2d5db49dc75b47fefd0b429798fb08eb..eea4a5ed3375649cd162d0ef87e0d841c95aef5e 100644 (file)
--- a/.github/workflows/claude-review.yml
+++ b/.github/workflows/claude-review.yml
@@ -139,6 +139,7 @@ jobs:
    review:
      runs-on: ubuntu-latest
      needs: setup
+    timeout-minutes: 60
  
      permissions:
        contents: read
@@ -147,21 +148,29 @@ jobs:
      steps:
        - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
          with:
-          # Need full history so git diff <sha>~1..<sha> works for all PR commits.
+          # Need full history for git worktree add to work on all PR commits.
            fetch-depth: 0
  
-      - name: Fetch PR branch
-        env:
-          PR_NUMBER: ${{ needs.setup.outputs.pr_number }}
-        run: git fetch origin "pull/${PR_NUMBER}/head:pr-review"
-
        - name: Download PR context
          uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c
          with:
            name: pr-context.json
  
        - name: Prettify PR context
-        run: python3 -m json.tool pr-context.json > pr-context-pretty.json && mv pr-context-pretty.json pr-context.json
+        run: |
+          jq . pr-context.json > pr-context-pretty.json
+          mv pr-context-pretty.json pr-context.json
+
+      - name: Prepare PR worktrees
+        env:
+          PR_NUMBER: ${{ needs.setup.outputs.pr_number }}
+        run: |
+          git fetch origin "pull/${PR_NUMBER}/head"
+          for sha in $(git log --reverse --format=%H HEAD..FETCH_HEAD); do
+            git worktree add "worktrees/$sha" "$sha"
+            git -C "worktrees/$sha" diff HEAD~..HEAD > "worktrees/$sha/commit.patch"
+            git -C "worktrees/$sha" log -1 --format='%B' HEAD > "worktrees/$sha/commit-message.txt"
+          done
  
        - name: Install sandbox dependencies
          run: |
@@ -175,226 +184,189 @@ jobs:
            role-session-name: GitHubActions-Claude-${{ github.run_id }}
            aws-region: us-east-1
  
+      - name: Install Claude Code
+        run: curl -fsSL https://claude.ai/install.sh | bash
+
        - name: Run Claude Code
-        id: claude
-        uses: anthropics/claude-code-action@26ec041249acb0a944c0a47b6c0c13f05dbc5b44
-        with:
-          use_bedrock: "true"
-          # Required by claude-code-action even though Claude itself doesn't
-          # call the GitHub API — the action uses it for permission checks.
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          # Safe because the workflow's `if` condition already restricts
-          # execution to trusted actors (MEMBER/OWNER/COLLABORATOR) or PRs
-          # that a trusted actor explicitly labeled, and this job only has
-          # read-only permissions.
-          allowed_non_write_users: "*"
-          track_progress: false
-          show_full_output: "true"
-          # Sandbox Bash commands to prevent network access and restrict
-          # filesystem writes to the working directory.
-          settings: |
-            {
-              "permissions": {
-                "allow": ["*"]
-              },
-              "sandbox": {
-                "enabled": true,
-                "autoAllowBashIfSandboxed": true,
-                "allowUnsandboxedCommands": false,
-                "filesystem": {
-                  "allowWrite": ["//tmp", "//var/tmp"]
-                }
+        env:
+          CLAUDE_CODE_DISABLE_BACKGROUND_TASKS: "1"
+          CLAUDE_CODE_USE_BEDROCK: "1"
+        run: |
+          mkdir -p ~/.claude
+
+          cat > ~/.claude/settings.json << 'SETTINGS'
+          {
+            "permissions": {
+              "allow": [
+                "Bash",
+                "Read",
+                "Edit(//${{ github.workspace }}/**)",
+                "Write(//${{ github.workspace }}/**)",
+                "Grep",
+                "Glob",
+                "Agent",
+                "Task",
+                "TaskOutput",
+                "ToolSearch"
+              ]
+            },
+            "sandbox": {
+              "enabled": true,
+              "autoAllowBashIfSandboxed": true,
+              "allowUnsandboxedCommands": false,
+              "filesystem": {
+                "allowWrite": ["/tmp", "/var/tmp", "${{ github.workspace }}"]
                }
              }
-          claude_args: |
-            --model us.anthropic.claude-opus-4-6-v1
-            --effort max
-            --max-turns 200
-            --disallowedTools "WebFetch,WebSearch"
-            --setting-sources user
-          prompt: |
-              REPO: ${{ github.repository }}
-              PR NUMBER: ${{ needs.setup.outputs.pr_number }}
-
-              You are a code reviewer for the ${{ github.repository }} project. Review this pull request and
-              produce a JSON result containing your review and write it to
-              `review-result.json` in the repo root. Do NOT attempt to post
-              comments yourself. You are in the upstream repo with the PR branch
-              available as `pr-review`. Do not apply or merge the patch.
-              You have no network access — all required context has been
-              pre-fetched locally.
-
-              ## Phase 1: Read context
-
-              All PR data has been pre-fetched. Read `pr-context.json` from the repo root.
-              It contains a JSON object with:
-              - `pr` — full GitHub PR object (title, body, user, head SHA, etc.)
-              - `reviews` — array of PR reviews from the GitHub API
-              - `issue_comments` — array of issue comments on the PR from the GitHub API
-              - `tracking_comment` — body of the existing tracking comment (null on first run);
-                if present, use it as the basis for your `summary` in Phase 3
-              - `review_comments` — array of ALL inline review comments on the PR from the
-                GitHub API. Use these as context, but observe the following rules:
-                - Only re-check your own comments (user.login == "github-actions[bot]" and
-                  body starts with "Claude: "). Do NOT validate, re-raise, respond to, or
-                  duplicate comments from other authors.
-                - Items checked off in the tracking comment (`- [x]`) are resolved. Do NOT
-                  re-check or re-raise review comments that correspond to resolved items.
-                - You will need the `id` fields of your own unresolved comments in Phase 3
-                  to populate the `resolve` array.
-
-              The PR branch has been fetched locally as `pr-review`. Use
-              `git log --reverse --format=%H HEAD..pr-review` to list the PR commits, and
-              `git show <sha>` or `git diff <sha>~1..<sha>` to access commit diffs.
-
-              ## Phase 2: Review commits
-
-              Review every commit in the PR. Use subagents to parallelize the
-              work — decide how many subagents to spawn and how to divide commits
-              between them and the main conversation based on the number and size
-              of commits. Very large commits can be assigned to multiple subagents
-              for extra thoroughness. Always review some commits yourself so you
-              have useful work to do while subagents run in the background. For
-              single-commit PRs with small diffs, just review directly without
-              subagents. For large single-commit PRs, have multiple subagents
-              review it independently to maximize coverage.
-
-              IMPORTANT: Always spawn subagents with `isolation: "worktree"` so
-              each gets its own git worktree. This prevents concurrent git
-              operations from interfering with each other. Because worktrees
-              do not include untracked files, first `git add pr-context.json`
-              so it is available in worktrees.
-
-              Each reviewer (you or a subagent) uses `git show <sha>` or
-              `git diff <sha>~1..<sha>` to fetch diffs, reads `pr-context.json`
-              for PR context, and reads the codebase to verify findings.
-
-              Each reviewer reviews code quality, style, potential bugs, and security
-              implications. It must return a JSON array of issues matching this schema:
-
-              ```json
-              {
+          }
+          SETTINGS
+
+          cat > review-schema.json << 'SCHEMA'
+          {
+            "type": "object",
+            "required": ["summary", "comments"],
+            "properties": {
+              "summary": { "type": "string" },
+              "comments": {
                  "type": "array",
                  "items": {
                    "type": "object",
                    "required": ["path", "line", "severity", "body", "commit"],
                    "properties": {
-                    "path":       { "type": "string", "description": "File path relative to repo root" },
-                    "line":       { "type": "integer", "description": "Diff line number (last line for multi-line)" },
-                    "side":       { "enum": ["LEFT", "RIGHT"], "description": "Diff side: LEFT for deletions, RIGHT for additions/context (default: RIGHT)" },
-                    "start_line": { "type": "integer", "description": "First line of a multi-line comment range" },
-                    "start_side": { "enum": ["LEFT", "RIGHT"], "description": "Diff side for start_line" },
+                    "path":       { "type": "string" },
+                    "line":       { "type": "integer" },
+                    "side":       { "enum": ["LEFT", "RIGHT"] },
+                    "start_line": { "type": "integer" },
+                    "start_side": { "enum": ["LEFT", "RIGHT"] },
                      "severity":   { "enum": ["must-fix", "suggestion", "nit"] },
-                    "body":       { "type": "string", "description": "Review comment in markdown" },
-                    "commit":     { "type": "string", "description": "SHA of the commit being reviewed" }
+                    "body":       { "type": "string" },
+                    "commit":     { "type": "string" }
                    }
                  }
-              }
+              },
+              "resolve": { "type": "array", "items": { "type": "integer" } }
+            }
+          }
+          SCHEMA
+
+          cat > /tmp/review-prompt.txt << 'PROMPT'
+          You are a code reviewer for the ${{ github.repository }} project.
+          Review this pull request. All required context has been
+          pre-fetched into local files.
+
+          ## Phase 1: Review commits
+
+          Read `pr-context.json` from the repository root. `pr-context.json` contains
+          PR metadata from the GitHub API. Rules for its `review_comments` field:
+          - Only re-check your own comments (user.login == "github-actions[bot]" and
+            body starts with "Claude: ").
+          - Items checked off in the `tracking_comment` (`- [x]`) are resolved.
+          - You will need the `id` fields of your own unresolved comments in Phase 2
+            to populate the `resolve` array.
+          - If `tracking_comment` is non-null, use it as the basis for your summary
+            in Phase 2.
+
+          Then, list the directories in `worktrees/` — there is one per commit. Each
+          worktree at `worktrees/<sha>/` contains the full source tree checked out at
+          that commit, plus `commit.patch` (the diff) and `commit-message.txt`
+          (the commit message). Spawn one
+          review subagent per worktree, all in a single message so they run concurrently.
+          Do NOT pre-compute diffs or read any other files before spawning — the subagents
+          will do that themselves.
+
+          Each reviewer reviews design, code quality, style, potential bugs, and
+          security implications.
+
+          Each subagent prompt must include:
+          - Instructions to read `pr-context.json` in the repository root for context.
+          - Instructions to read `review-schema.json` in the repository root and
+            return a JSON array matching the `comments` items schema from that file.
+          - The worktree path.
+          - Instructions to read `commit-message.txt` and `commit.patch` in the
+            worktree for the commit message and diff.
+          - Instructions to verify every `line` and `start_line` value
+            against the hunk ranges in `commit.patch` before returning.
+
+          ## Phase 2: Collect, deduplicate, and summarize
+
+          After all reviews (yours and any subagents') are done:
+          1. Collect all issues. Merge duplicates (same file, lines within 3 of each other, same problem).
+          2. Drop low-confidence findings.
+          3. Check the existing inline review comments fetched in Phase 1. Do NOT include a
+              comment if one already exists on the same file and line about the same problem.
+              Also check for author replies that dismiss or reject a previous comment — do NOT
+              re-raise an issue the PR author has already responded to disagreeing with.
+              Populate the `resolve` array with the REST API `id` (integer) of existing
+              review comments whose threads should be resolved. A thread should be resolved if:
+              - The issue it raised has been addressed in the current PR (i.e. your review
+                no longer flags it), or
+              - The PR author (or another reviewer) left a reply disagreeing with or
+                dismissing the comment.
+              Only include the `id` of the **first** comment in each thread (the one that
+              started the conversation). Do not resolve threads for issues that are still
+              present and unaddressed.
+          4. Write a `summary` field in markdown for a top-level tracking comment.
+
+              **If no existing tracking comment was found (first run):**
+              Use this format:
+
                ```
+              ## Claude review of PR #<number> (<HEAD SHA>)
  
-              The `commit` field MUST be the SHA of the commit being reviewed. Only
-              comment on changes in that commit — not preceding commits.
-
-              `line` should be a line number from the diff **that appears inside a
-              diff hunk**. GitHub rejects lines outside the diff context. `side`
-              indicates which side of the diff (`LEFT` for deletions, `RIGHT` for
-              additions or context lines); defaults to `RIGHT` if omitted. For
-              multi-line comments, set `start_line` and `start_side` to the first
-              line of the range and `line`/`side` to the last.
-
-              Each reviewer MUST verify findings before returning them:
-              - For style/convention claims, check at least 3 existing examples in the
-                codebase to confirm the pattern actually exists before flagging a violation.
-              - For "use X instead of Y" suggestions, confirm X actually exists and works.
-              - If unsure, don't include the issue.
-
-              ## Phase 3: Collect, deduplicate, and summarize
-
-              After all reviews (yours and any subagents') are done:
-              1. Collect all issues. Merge duplicates (same file, lines within 3 of each other, same problem).
-              2. Drop low-confidence findings.
-              3. Check the existing inline review comments fetched in Phase 1. Do NOT include a
-                 comment if one already exists on the same file and line about the same problem.
-                 Also check for author replies that dismiss or reject a previous comment — do NOT
-                 re-raise an issue the PR author has already responded to disagreeing with.
-                 Populate the `resolve` array with the REST API `id` (integer) of existing
-                 review comments whose threads should be resolved. A thread should be resolved if:
-                 - The issue it raised has been addressed in the current PR (i.e. your review
-                   no longer flags it), OR
-                 - The PR author (or another reviewer) left a reply disagreeing with or
-                   dismissing the comment.
-                 Only include the `id` of the **first** comment in each thread (the one that
-                 started the conversation). Do NOT resolve threads for issues that are still
-                 present and unaddressed.
-              4. Do NOT prefix `body` with a severity tag — the severity is already
-                 captured in the `severity` field and will be added automatically when
-                 posting inline comments.
-              5. Write a `summary` field in markdown for a top-level tracking comment.
-
-                 **If no existing tracking comment was found (first run):**
-                 Use this format:
-
-                 ```
-                 ## Claude review of PR #<number> (<HEAD SHA>)
-
-                 <!-- claude-pr-review -->
-
-                 ### Must fix
-                 - [ ] **short title** — `path:line` — brief explanation
-
-                 ### Suggestions
-                 - [ ] **short title** — `path:line` — brief explanation
-
-                 ### Nits
-                 - [ ] **short title** — `path:line` — brief explanation
-                 ```
-
-                 Omit empty sections. Each checkbox item must correspond to an entry in `comments`.
-                 If there are no issues at all, write a short message saying the PR looks good.
-
-                 **If an existing tracking comment was found (subsequent run):**
-                 Use the existing comment as the starting point. Preserve the order and wording
-                 of all existing items. Then apply these updates:
-                 - Update the HEAD SHA in the header line.
-                 - For each existing item, re-check whether the issue is still present in the
-                   current diff. If it has been fixed, mark it checked: `- [x]`.
-                 - If the PR author replied dismissing an item, mark it:
-                   `- [x] ~~short title~~ (dismissed)`.
-                 - Preserve checkbox state that was already set by previous runs or by hand.
-                 - Append any NEW issues found in this run that aren't already listed,
-                   in the appropriate severity section, after the existing items.
-                 - Do NOT reorder, reword, or remove existing items.
-
-              ## Error tracking
-
-              If any errors prevented you from doing your job fully (tools that were
-              not available, git commands that failed, etc.), append a `### Errors`
-              section to the summary listing each failed action and the error message.
-
-              ## Output formatting
-
-              Do NOT escape characters in `body` or `summary`. Write plain markdown — no
-              backslash escaping of `!` or other characters. In particular, HTML comments
-              like `<!-- ... -->` must be written verbatim, never as `<\!-- ... -->`.
-
-              ## CRITICAL: Write review result to file
-
-              Your FINAL action must be to write `review-result.json` in the repo
-              root. The file must contain a JSON object with the following schema:
-
-              ```json
-              {
-                "type": "object",
-                "required": ["summary", "comments"],
-                "properties": {
-                  "summary":  { "type": "string", "description": "Markdown summary for the tracking comment" },
-                  "comments": { "description": "Array of review comments (same schema as the reviewer output above)" },
-                  "resolve":  { "type": "array", "items": { "type": "integer" }, "description": "REST API IDs of review comment threads to resolve" }
-                }
-              }
+              <!-- claude-pr-review -->
+
+              ### Must fix
+              - [ ] **short title** — `path:line` — brief explanation
+
+              ### Suggestions
+              - [ ] **short title** — `path:line` — brief explanation
+
+              ### Nits
+              - [ ] **short title** — `path:line` — brief explanation
                ```
  
-              Do NOT attempt to post comments or use any MCP tools to modify the PR.
+              Omit empty sections. Each checkbox item must correspond to an entry in `comments`.
+              If there are no issues at all, write a short message saying the PR looks good.
+
+              **If an existing tracking comment was found (subsequent run):**
+              Use the existing comment as the starting point. Preserve the order and wording
+              of all existing items. Then apply these updates:
+              - Update the HEAD SHA in the header line.
+              - For each existing item, re-check whether the issue is still present in the
+                current diff. If it has been fixed, mark it checked: `- [x]`.
+              - If the PR author replied dismissing an item, mark it:
+                `- [x] ~~short title~~ (dismissed)`.
+              - Preserve checkbox state that was already set by previous runs or by hand.
+              - Append any new issues found in this run that aren't already listed,
+                in the appropriate severity section, after the existing items.
+              - Do not reorder, reword, or remove existing items.
+
+          ## Error tracking
+
+          If any errors prevented you from doing your job fully (tools that were
+          not available, git commands that failed, etc.), append a `### Errors`
+          section to the summary listing each failed action and the error message.
+
+          ## Review result
+
+          Produce your review result as structured output. The fields are:
+          - `summary`: Markdown summary for the tracking comment.
+          - `comments`: Array of review comments (same schema as the reviewer output above).
+          - `resolve`: REST API IDs of review comment threads to resolve.
+          PROMPT
+
+          claude \
+            --model us.anthropic.claude-opus-4-6-v1 \
+            --effort max \
+            --max-turns 200 \
+            --setting-sources user \
+            --output-format stream-json \
+            --json-schema "$(cat review-schema.json)" \
+            --verbose \
+            -p "$(cat /tmp/review-prompt.txt)" \
+            | tee claude.json
+
+          jq '.structured_output | select(. != null)' claude.json > review-result.json
  
        - name: Upload review result
          if: always()
author	Daan De Meyer <daan@amutable.com>
	Tue, 31 Mar 2026 19:18:12 +0000 (21:18 +0200)
committer	Daan De Meyer <daan.j.demeyer@gmail.com>
	Wed, 1 Apr 2026 08:34:54 +0000 (10:34 +0200)