From 45f43ef943a7bba63b80914bccdb9c6215972f8e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 16:51:56 +0000 Subject: [PATCH 01/15] Initial plan From 944d5cfb351258a0c428e9a9b60ae6165eed2626 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 17:04:45 +0000 Subject: [PATCH 02/15] feat: infer gh CLI permissions for activation job pre-steps Create a gh CLI command-to-permissions mapping (data/gh_cli_permissions.json and gh_cli_permissions.go) and integrate it into buildActivationPermissions so that activation job pre-steps calling `gh pr diff`, `gh issue view`, etc. automatically get the required GitHub Actions permissions (pull-requests: read, issues: read, actions: read, etc.) without manual declarations. Fixes: compiler omitting pull-requests: read from the activation job even when a pre-step calls `gh pr diff` (as reported for the gh-aw-docs-review workflow). Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_activation_job_builder.go | 11 + pkg/workflow/data/gh_cli_permissions.json | 73 +++++ pkg/workflow/gh_cli_permissions.go | 223 +++++++++++++++ pkg/workflow/gh_cli_permissions_test.go | 263 ++++++++++++++++++ 4 files changed, 570 insertions(+) create mode 100644 pkg/workflow/data/gh_cli_permissions.json create mode 100644 pkg/workflow/gh_cli_permissions.go create mode 100644 pkg/workflow/gh_cli_permissions_test.go diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index b4299bd736..fc39d901db 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -541,6 +541,17 @@ func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) st permsMap[PermissionDiscussions] = PermissionWrite } } + // Infer permissions required by gh CLI calls in jobs.activation.pre-steps run scripts. + // This ensures that user-defined pre-steps that call `gh pr diff`, `gh issue view`, etc. + // get the permissions they need without requiring manual permission declarations. + activationPreStepScripts := extractRunScriptsFromJobPreSteps(ctx.data.Jobs, string(constants.ActivationJobName)) + if len(activationPreStepScripts) > 0 { + for scope, level := range inferPermissionsFromShellScripts(activationPreStepScripts) { + if _, exists := permsMap[scope]; !exists { + permsMap[scope] = level + } + } + } return NewPermissionsFromMap(permsMap).RenderToYAML() } diff --git a/pkg/workflow/data/gh_cli_permissions.json b/pkg/workflow/data/gh_cli_permissions.json new file mode 100644 index 0000000000..4bdf4d5f1a --- /dev/null +++ b/pkg/workflow/data/gh_cli_permissions.json @@ -0,0 +1,73 @@ +{ + "version": "1.0", + "description": "GitHub CLI (gh) subcommand patterns and their required GitHub Actions permissions. Used by the compiler to infer permissions needed by pre-steps that call the gh CLI.", + "subcommand_groups": { + "pr": { + "description": "Pull request operations", + "read_subcommands": ["checks", "comment", "diff", "list", "status", "view"], + "write_subcommands": ["close", "convert-to-draft", "create", "edit", "lock", "merge", "ready", "reopen", "review", "unlock"], + "read_permissions": ["pull-requests"], + "write_permissions": ["pull-requests"] + }, + "issue": { + "description": "Issue operations", + "read_subcommands": ["comment", "list", "status", "view"], + "write_subcommands": ["close", "create", "delete", "edit", "lock", "pin", "reopen", "transfer", "unlock", "unpin"], + "read_permissions": ["issues"], + "write_permissions": ["issues"] + }, + "workflow": { + "description": "GitHub Actions workflow operations", + "read_subcommands": ["list", "view"], + "write_subcommands": ["disable", "enable", "run"], + "read_permissions": ["actions"], + "write_permissions": ["actions"] + }, + "run": { + "description": "GitHub Actions run operations", + "read_subcommands": ["download", "list", "view", "watch"], + "write_subcommands": ["cancel", "delete", "rerun"], + "read_permissions": ["actions"], + "write_permissions": ["actions"] + }, + "release": { + "description": "Release operations", + "read_subcommands": ["download", "list", "view"], + "write_subcommands": ["create", "delete", "delete-asset", "edit", "upload"], + "read_permissions": ["contents"], + "write_permissions": ["contents"] + } + }, + "api_path_patterns": [ + { + "pattern": "^/repos/[^/\\s]+/[^/\\s]+/pulls(/|$|\\?)", + "description": "Pull requests REST API", + "permissions": ["pull-requests"] + }, + { + "pattern": "^/repos/[^/\\s]+/[^/\\s]+/issues(/|$|\\?)", + "description": "Issues REST API", + "permissions": ["issues"] + }, + { + "pattern": "^/repos/[^/\\s]+/[^/\\s]+/actions(/|$|\\?)", + "description": "GitHub Actions REST API", + "permissions": ["actions"] + }, + { + "pattern": "^repos/[^/\\s]+/[^/\\s]+/pulls(/|$|\\?)", + "description": "Pull requests REST API (without leading slash)", + "permissions": ["pull-requests"] + }, + { + "pattern": "^repos/[^/\\s]+/[^/\\s]+/issues(/|$|\\?)", + "description": "Issues REST API (without leading slash)", + "permissions": ["issues"] + }, + { + "pattern": "^repos/[^/\\s]+/[^/\\s]+/actions(/|$|\\?)", + "description": "GitHub Actions REST API (without leading slash)", + "permissions": ["actions"] + } + ] +} diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go new file mode 100644 index 0000000000..79450b177b --- /dev/null +++ b/pkg/workflow/gh_cli_permissions.go @@ -0,0 +1,223 @@ +package workflow + +import ( + _ "embed" + "encoding/json" + "fmt" + "regexp" + "strings" +) + +//go:embed data/gh_cli_permissions.json +var ghCLIPermissionsJSON []byte + +// ghCLISubcommandGroup maps a gh subcommand group (e.g. "pr", "issue") to its permissions. +type ghCLISubcommandGroup struct { + Description string `json:"description"` + ReadSubcommands []string `json:"read_subcommands"` + WriteSubcommands []string `json:"write_subcommands"` + ReadPermissions []string `json:"read_permissions"` + WritePermissions []string `json:"write_permissions"` +} + +// ghCLIAPIPathPattern maps a REST API path pattern to the required permissions. +type ghCLIAPIPathPattern struct { + Pattern string `json:"pattern"` + Description string `json:"description"` + Permissions []string `json:"permissions"` +} + +// ghCLIPermissionsData is the top-level structure of gh_cli_permissions.json. +type ghCLIPermissionsData struct { + Version string `json:"version"` + Description string `json:"description"` + SubcommandGroups map[string]ghCLISubcommandGroup `json:"subcommand_groups"` + APIPathPatterns []ghCLIAPIPathPattern `json:"api_path_patterns"` +} + +// compiledGHCLIPermissions holds pre-compiled lookup data built from the JSON. +type compiledGHCLIPermissions struct { + // readCommands maps "group action" (e.g. "pr diff") to read permission scopes. + readCommands map[string][]PermissionScope + // writeCommands maps "group action" (e.g. "pr create") to write permission scopes. + writeCommands map[string][]PermissionScope + // groupReadPermissions maps a subcommand group name (e.g. "pr") to read permission scopes + // used as fallback when the specific action is not recognised. + groupReadPermissions map[string][]PermissionScope + // apiPathPatterns holds compiled regexps paired with required permission scopes. + apiPathPatterns []compiledAPIPathPattern +} + +type compiledAPIPathPattern struct { + re *regexp.Regexp + permissions []PermissionScope +} + +var ghCLIPermissions compiledGHCLIPermissions + +func init() { + var data ghCLIPermissionsData + if err := json.Unmarshal(ghCLIPermissionsJSON, &data); err != nil { + panic(fmt.Sprintf("failed to load gh CLI permissions from JSON: %v", err)) + } + + cp := compiledGHCLIPermissions{ + readCommands: make(map[string][]PermissionScope), + writeCommands: make(map[string][]PermissionScope), + groupReadPermissions: make(map[string][]PermissionScope), + } + + for group, sg := range data.SubcommandGroups { + readPerms := make([]PermissionScope, len(sg.ReadPermissions)) + for i, p := range sg.ReadPermissions { + readPerms[i] = PermissionScope(p) + } + writePerms := make([]PermissionScope, len(sg.WritePermissions)) + for i, p := range sg.WritePermissions { + writePerms[i] = PermissionScope(p) + } + + // Store group-level fallback (used when specific action is unknown). + cp.groupReadPermissions[group] = readPerms + + for _, action := range sg.ReadSubcommands { + key := group + " " + action + cp.readCommands[key] = readPerms + } + for _, action := range sg.WriteSubcommands { + key := group + " " + action + cp.writeCommands[key] = writePerms + } + } + + for _, ap := range data.APIPathPatterns { + re, err := regexp.Compile(ap.Pattern) + if err != nil { + panic(fmt.Sprintf("invalid gh API path pattern %q in gh_cli_permissions.json: %v", ap.Pattern, err)) + } + perms := make([]PermissionScope, len(ap.Permissions)) + for i, p := range ap.Permissions { + perms[i] = PermissionScope(p) + } + cp.apiPathPatterns = append(cp.apiPathPatterns, compiledAPIPathPattern{re: re, permissions: perms}) + } + + ghCLIPermissions = cp +} + +// ghSubcommandRE matches invocations of the gh CLI followed by a known subcommand group +// and an action word. It is designed to handle both: +// - simple single-line calls (gh pr diff "$PR" --name-only) +// - multi-line shell scripts where the `gh` binary may be preceded by whitespace. +// +// Capture groups: (1) subcommand group, (2) action word. +var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|workflow|run|release)\s+([\w][\w-]*)`) + +// ghAPIRE matches `gh api ` invocations. +// Capture group: (1) API path (up to the first whitespace, pipe, or quote). +var ghAPIRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+([^\s|;&"'\\]+)`) + +// inferPermissionsFromShellScripts scans one or more shell script strings for +// gh CLI invocations and returns the minimum set of GitHub Actions permissions +// required to run those commands. +// +// Only read-level permissions are inferred here; write-level operations in +// pre-steps are intentionally not auto-escalated because they would require +// explicit user intent and additional security review. +func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]PermissionLevel { + perms := make(map[PermissionScope]PermissionLevel) + + for _, script := range scripts { + // Match gh patterns. + for _, m := range ghSubcommandRE.FindAllStringSubmatch(script, -1) { + group := strings.ToLower(m[1]) + action := strings.ToLower(m[2]) + key := group + " " + action + + // Check explicit read mapping first. + if readPerms, ok := ghCLIPermissions.readCommands[key]; ok { + for _, scope := range readPerms { + if _, exists := perms[scope]; !exists { + perms[scope] = PermissionRead + } + } + continue + } + // Check explicit write mapping (we still grant at least read for pre-steps). + if readPerms, ok := ghCLIPermissions.writeCommands[key]; ok { + for _, scope := range readPerms { + if _, exists := perms[scope]; !exists { + perms[scope] = PermissionRead + } + } + continue + } + // Fall back to group-level read permissions for unrecognised actions. + if readPerms, ok := ghCLIPermissions.groupReadPermissions[group]; ok { + for _, scope := range readPerms { + if _, exists := perms[scope]; !exists { + perms[scope] = PermissionRead + } + } + } + } + + // Match gh api patterns. + for _, m := range ghAPIRE.FindAllStringSubmatch(script, -1) { + path := m[1] + for _, ap := range ghCLIPermissions.apiPathPatterns { + if ap.re.MatchString(path) { + for _, scope := range ap.permissions { + if _, exists := perms[scope]; !exists { + perms[scope] = PermissionRead + } + } + } + } + } + } + + return perms +} + +// extractRunScriptsFromJobPreSteps returns the `run` script text from every +// pre-step in the named job configuration inside the frontmatter jobs map. +// +// It is a read-only extraction: it never mutates the jobs map. +func extractRunScriptsFromJobPreSteps(jobs map[string]any, jobName string) []string { + if len(jobs) == 0 { + return nil + } + + jobConfig, ok := jobs[jobName] + if !ok { + return nil + } + + configMap, ok := jobConfig.(map[string]any) + if !ok { + return nil + } + + raw, ok := configMap["pre-steps"] + if !ok { + return nil + } + + stepsList, ok := raw.([]any) + if !ok { + return nil + } + + var scripts []string + for _, step := range stepsList { + stepMap, ok := step.(map[string]any) + if !ok { + continue + } + if runVal, ok := stepMap["run"].(string); ok && runVal != "" { + scripts = append(scripts, runVal) + } + } + return scripts +} diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go new file mode 100644 index 0000000000..a74bba99f7 --- /dev/null +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -0,0 +1,263 @@ +//go:build !integration + +package workflow + +import ( + "os" + "path/filepath" + "testing" + + "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/stringutil" + "github.com/github/gh-aw/pkg/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestInferPermissionsFromShellScripts_GhPrDiff verifies that `gh pr diff` in a +// shell script is recognized as requiring pull-requests: read. +func TestInferPermissionsFromShellScripts_GhPrDiff(t *testing.T) { + scripts := []string{ + `gh pr diff "$PR_NUMBER" --name-only | awk '/\.md$/' > /tmp/changed.txt`, + } + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests], "gh pr diff should require pull-requests: read") +} + +// TestInferPermissionsFromShellScripts_GhPrView verifies pull-requests: read for gh pr view. +func TestInferPermissionsFromShellScripts_GhPrView(t *testing.T) { + scripts := []string{`gh pr view 123 --json title`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests]) +} + +// TestInferPermissionsFromShellScripts_GhIssueList verifies issues: read for gh issue list. +func TestInferPermissionsFromShellScripts_GhIssueList(t *testing.T) { + scripts := []string{`gh issue list --label bug --json number`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionIssues]) +} + +// TestInferPermissionsFromShellScripts_GhWorkflowList verifies actions: read for gh workflow list. +func TestInferPermissionsFromShellScripts_GhWorkflowList(t *testing.T) { + scripts := []string{`gh workflow list`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionActions]) +} + +// TestInferPermissionsFromShellScripts_GhRunView verifies actions: read for gh run view. +func TestInferPermissionsFromShellScripts_GhRunView(t *testing.T) { + scripts := []string{`gh run view $RUN_ID --json conclusion`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionActions]) +} + +// TestInferPermissionsFromShellScripts_GhAPI verifies pull-requests: read for gh api pulls endpoint. +func TestInferPermissionsFromShellScripts_GhAPI(t *testing.T) { + scripts := []string{`gh api /repos/owner/repo/pulls/1 --jq .title`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests], "gh api /repos/.../pulls should require pull-requests: read") +} + +// TestInferPermissionsFromShellScripts_GhAPIIssues verifies issues: read for gh api issues endpoint. +func TestInferPermissionsFromShellScripts_GhAPIIssues(t *testing.T) { + scripts := []string{`gh api /repos/owner/repo/issues --jq '.[].number'`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionIssues], "gh api /repos/.../issues should require issues: read") +} + +// TestInferPermissionsFromShellScripts_NoGhCommand verifies no permissions are inferred when +// there are no gh CLI calls in the script. +func TestInferPermissionsFromShellScripts_NoGhCommand(t *testing.T) { + scripts := []string{`echo "hello" && ls /tmp`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Empty(t, perms, "no gh commands should produce no permission requirements") +} + +// TestInferPermissionsFromShellScripts_MultiLine verifies multi-line shell scripts work correctly. +func TestInferPermissionsFromShellScripts_MultiLine(t *testing.T) { + scripts := []string{ + `gh pr diff "$PR_NUMBER" --name-only \ + | awk '/\.md$/' \ + > /tmp/gh-aw/docs-review-data/changed-md.txt`, + } + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests], "multi-line gh pr diff should require pull-requests: read") +} + +// TestInferPermissionsFromShellScripts_MultipleCommands verifies multiple gh commands are aggregated. +func TestInferPermissionsFromShellScripts_MultipleCommands(t *testing.T) { + scripts := []string{ + `gh pr diff "$PR_NUMBER" --name-only > /tmp/changed.txt +gh issue view $ISSUE_NUMBER --json body > /tmp/issue.json`, + } + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests], "should infer pull-requests: read") + assert.Equal(t, PermissionRead, perms[PermissionIssues], "should infer issues: read") +} + +// TestExtractRunScriptsFromJobPreSteps verifies extraction of run scripts from jobs map. +func TestExtractRunScriptsFromJobPreSteps(t *testing.T) { + jobs := map[string]any{ + "activation": map[string]any{ + "pre-steps": []any{ + map[string]any{ + "name": "Get changed markdown files", + "run": `gh pr diff "$PR_NUMBER" --name-only | awk '/\.md$/' > /tmp/changed.txt`, + }, + map[string]any{ + "name": "Echo step", + "run": `echo "hello"`, + }, + }, + }, + } + + scripts := extractRunScriptsFromJobPreSteps(jobs, "activation") + require.Len(t, scripts, 2) + assert.Contains(t, scripts[0], "gh pr diff") + assert.Contains(t, scripts[1], "echo") +} + +// TestExtractRunScriptsFromJobPreSteps_NoPreSteps verifies nil return when pre-steps absent. +func TestExtractRunScriptsFromJobPreSteps_NoPreSteps(t *testing.T) { + jobs := map[string]any{ + "activation": map[string]any{ + "permissions": map[string]any{"contents": "read"}, + }, + } + scripts := extractRunScriptsFromJobPreSteps(jobs, "activation") + assert.Nil(t, scripts) +} + +// TestExtractRunScriptsFromJobPreSteps_NonRunSteps verifies that non-run steps (uses: ...) are skipped. +func TestExtractRunScriptsFromJobPreSteps_NonRunSteps(t *testing.T) { + jobs := map[string]any{ + "activation": map[string]any{ + "pre-steps": []any{ + map[string]any{ + "name": "Checkout", + "uses": "actions/checkout@v4", + }, + }, + }, + } + scripts := extractRunScriptsFromJobPreSteps(jobs, "activation") + assert.Empty(t, scripts, "uses-only steps should not produce any scripts") +} + +// TestActivationJobPermissionsWithGhPrDiffPreStep is an integration test that verifies +// the compiler adds pull-requests: read to the activation job when a pre-step calls +// `gh pr diff`. This reproduces the issue reported for the gh-aw-docs-review workflow. +func TestActivationJobPermissionsWithGhPrDiffPreStep(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-perms-gh-pr-diff") + testFile := filepath.Join(tmpDir, "docs-review.md") + testContent := `--- +on: + pull_request: + types: [opened, synchronize] +permissions: + contents: read + pull-requests: read +engine: copilot +jobs: + activation: + pre-steps: + - name: Get changed markdown files + run: | + gh pr diff "$PR_NUMBER" --name-only \ + | awk '/\.md$/' \ + > /tmp/gh-aw/docs-review-data/changed-md.txt +--- + +# Docs review workflow with Vale pre-step +` + err := os.WriteFile(testFile, []byte(testContent), 0644) + require.NoError(t, err, "failed to write test workflow") + + compiler := NewCompiler() + err = compiler.CompileWorkflow(testFile) + require.NoError(t, err, "failed to compile workflow") + + lockContent, err := os.ReadFile(stringutil.MarkdownToLockFile(testFile)) + require.NoError(t, err, "failed to read generated lock file") + + activationJobSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) + assert.Contains(t, activationJobSection, "pull-requests: read", + "activation job should include pull-requests: read when pre-step calls gh pr diff") +} + +// TestActivationJobPermissionsWithGhIssuePreStep verifies issues: read is added when +// an activation pre-step calls `gh issue view`. +func TestActivationJobPermissionsWithGhIssuePreStep(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-perms-gh-issue") + testFile := filepath.Join(tmpDir, "issue-workflow.md") + testContent := `--- +on: + issues: + types: [opened] +permissions: + contents: read + issues: read +engine: copilot +jobs: + activation: + pre-steps: + - name: Fetch issue data + run: | + gh issue view "$ISSUE_NUMBER" --json body > /tmp/issue.json +--- + +# Issue workflow with gh issue pre-step +` + err := os.WriteFile(testFile, []byte(testContent), 0644) + require.NoError(t, err, "failed to write test workflow") + + compiler := NewCompiler() + err = compiler.CompileWorkflow(testFile) + require.NoError(t, err, "failed to compile workflow") + + lockContent, err := os.ReadFile(stringutil.MarkdownToLockFile(testFile)) + require.NoError(t, err, "failed to read generated lock file") + + activationJobSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) + assert.Contains(t, activationJobSection, "issues: read", + "activation job should include issues: read when pre-step calls gh issue view") +} + +// TestActivationJobPermissionsNoPreStepChanges verifies that the activation job permissions +// are unchanged when there are no pre-steps with gh commands. +func TestActivationJobPermissionsNoPreStepChanges(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-perms-no-gh") + testFile := filepath.Join(tmpDir, "basic-workflow.md") + testContent := `--- +on: + pull_request: + types: [opened] +permissions: + contents: read + pull-requests: read +engine: copilot +--- + +# Basic workflow without activation pre-steps +` + err := os.WriteFile(testFile, []byte(testContent), 0644) + require.NoError(t, err, "failed to write test workflow") + + compiler := NewCompiler() + err = compiler.CompileWorkflow(testFile) + require.NoError(t, err, "failed to compile workflow") + + lockContent, err := os.ReadFile(stringutil.MarkdownToLockFile(testFile)) + require.NoError(t, err, "failed to read generated lock file") + + activationJobSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) + // The activation job should have its standard permissions but NOT pull-requests: read + // since there are no pre-steps requiring it (the main job handles pull-requests via filtered + // permissions, but the activation job only needs what its own steps require). + assert.Contains(t, activationJobSection, "contents: read", + "activation job should always include contents: read") + assert.NotContains(t, activationJobSection, "pull-requests", + "activation job should NOT include pull-requests when no pre-step requires it") +} From 992db26460f1d543ddb951d1826adc46e9f1f2cc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 17:07:41 +0000 Subject: [PATCH 03/15] fix: address code review feedback on gh CLI permissions inference - Add word boundary \b to ghSubcommandRE to avoid false positives on partial action words - Add early len(ctx.data.Jobs) > 0 guard in buildActivationPermissions before extracting pre-step scripts - Clarify TestActivationJobPermissionsNoPreStepChanges: remove pull-requests: read from frontmatter and add comment explaining the activation job computes permissions independently from the main job Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler_activation_job_builder.go | 12 +++++++----- pkg/workflow/gh_cli_permissions.go | 3 ++- pkg/workflow/gh_cli_permissions_test.go | 9 ++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index fc39d901db..3a389426d7 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -544,11 +544,13 @@ func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) st // Infer permissions required by gh CLI calls in jobs.activation.pre-steps run scripts. // This ensures that user-defined pre-steps that call `gh pr diff`, `gh issue view`, etc. // get the permissions they need without requiring manual permission declarations. - activationPreStepScripts := extractRunScriptsFromJobPreSteps(ctx.data.Jobs, string(constants.ActivationJobName)) - if len(activationPreStepScripts) > 0 { - for scope, level := range inferPermissionsFromShellScripts(activationPreStepScripts) { - if _, exists := permsMap[scope]; !exists { - permsMap[scope] = level + if len(ctx.data.Jobs) > 0 { + activationPreStepScripts := extractRunScriptsFromJobPreSteps(ctx.data.Jobs, string(constants.ActivationJobName)) + if len(activationPreStepScripts) > 0 { + for scope, level := range inferPermissionsFromShellScripts(activationPreStepScripts) { + if _, exists := permsMap[scope]; !exists { + permsMap[scope] = level + } } } } diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index 79450b177b..12cd470755 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -111,7 +111,8 @@ func init() { // - multi-line shell scripts where the `gh` binary may be preceded by whitespace. // // Capture groups: (1) subcommand group, (2) action word. -var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|workflow|run|release)\s+([\w][\w-]*)`) +// The trailing `\b` ensures the action word is complete (avoids matching partial words). +var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|workflow|run|release)\s+([\w][\w-]*)\b`) // ghAPIRE matches `gh api ` invocations. // Capture group: (1) API path (up to the first whitespace, pipe, or quote). diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index a74bba99f7..abc2fbce1d 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -226,7 +226,10 @@ jobs: } // TestActivationJobPermissionsNoPreStepChanges verifies that the activation job permissions -// are unchanged when there are no pre-steps with gh commands. +// are unchanged when there are no pre-steps with gh commands. Even if the workflow-level +// frontmatter declares pull-requests: read, the activation job should NOT receive that +// permission unless its own steps actually need it (the activation job computes its permissions +// independently of the main job's filtered permissions). func TestActivationJobPermissionsNoPreStepChanges(t *testing.T) { tmpDir := testutil.TempDir(t, "activation-perms-no-gh") testFile := filepath.Join(tmpDir, "basic-workflow.md") @@ -236,7 +239,6 @@ on: types: [opened] permissions: contents: read - pull-requests: read engine: copilot --- @@ -253,9 +255,6 @@ engine: copilot require.NoError(t, err, "failed to read generated lock file") activationJobSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) - // The activation job should have its standard permissions but NOT pull-requests: read - // since there are no pre-steps requiring it (the main job handles pull-requests via filtered - // permissions, but the activation job only needs what its own steps require). assert.Contains(t, activationJobSection, "contents: read", "activation job should always include contents: read") assert.NotContains(t, activationJobSection, "pull-requests", From f90549bb84c39227a9b78b2d012bf6f2c6134682 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 17:19:48 +0000 Subject: [PATCH 04/15] feat: expand gh CLI permissions mapping and detect write commands in activation pre-steps Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler_activation_job.go | 7 +- .../compiler_activation_job_builder.go | 13 +- pkg/workflow/data/gh_cli_permissions.json | 88 +++++++--- pkg/workflow/gh_cli_permissions.go | 37 +++- pkg/workflow/gh_cli_permissions_test.go | 164 ++++++++++++++++++ 5 files changed, 280 insertions(+), 29 deletions(-) diff --git a/pkg/workflow/compiler_activation_job.go b/pkg/workflow/compiler_activation_job.go index 87b8db073a..3c26079e70 100644 --- a/pkg/workflow/compiler_activation_job.go +++ b/pkg/workflow/compiler_activation_job.go @@ -63,12 +63,17 @@ func (c *Compiler) buildActivationJob(data *WorkflowData, preActivationJobCreate ctx.steps = append(ctx.steps, c.generateScriptModeCleanupStep()) } + permissions, err := c.buildActivationPermissions(ctx) + if err != nil { + return nil, err + } + return &Job{ Name: string(constants.ActivationJobName), If: ctx.activationCondition, HasWorkflowRunSafetyChecks: workflowRunRepoSafety != "", RunsOn: c.formatFrameworkJobRunsOn(data), - Permissions: c.buildActivationPermissions(ctx), + Permissions: permissions, Environment: c.buildActivationEnvironment(ctx), Steps: ctx.steps, Outputs: ctx.outputs, diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index 3a389426d7..bd53d5d561 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -492,7 +492,8 @@ func (c *Compiler) addActivationArtifactUploadStep(ctx *activationJobBuildContex } // buildActivationPermissions builds activation job permissions from workflow features and selected interactions. -func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) string { +// Returns an error if activation pre-steps contain write gh CLI commands that would require write permissions. +func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) (string, error) { permsMap := map[PermissionScope]PermissionLevel{ PermissionContents: PermissionRead, } @@ -547,6 +548,14 @@ func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) st if len(ctx.data.Jobs) > 0 { activationPreStepScripts := extractRunScriptsFromJobPreSteps(ctx.data.Jobs, string(constants.ActivationJobName)) if len(activationPreStepScripts) > 0 { + // Detect write commands first — these are not permitted in activation pre-steps + // because the activation job intentionally operates with read-only permissions. + if writeCmds := detectWriteCommandsInShellScripts(activationPreStepScripts); len(writeCmds) > 0 { + return "", fmt.Errorf( + "activation pre-step uses write gh command(s) [%s]; write operations are not permitted in activation job pre-steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs", + strings.Join(writeCmds, ", "), + ) + } for scope, level := range inferPermissionsFromShellScripts(activationPreStepScripts) { if _, exists := permsMap[scope]; !exists { permsMap[scope] = level @@ -554,7 +563,7 @@ func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) st } } } - return NewPermissionsFromMap(permsMap).RenderToYAML() + return NewPermissionsFromMap(permsMap).RenderToYAML(), nil } // buildActivationEnvironment returns manual-approval environment YAML, with ANSI removed. diff --git a/pkg/workflow/data/gh_cli_permissions.json b/pkg/workflow/data/gh_cli_permissions.json index 4bdf4d5f1a..ab3abcf9c0 100644 --- a/pkg/workflow/data/gh_cli_permissions.json +++ b/pkg/workflow/data/gh_cli_permissions.json @@ -1,21 +1,28 @@ { - "version": "1.0", + "version": "1.1", "description": "GitHub CLI (gh) subcommand patterns and their required GitHub Actions permissions. Used by the compiler to infer permissions needed by pre-steps that call the gh CLI.", "subcommand_groups": { "pr": { "description": "Pull request operations", - "read_subcommands": ["checks", "comment", "diff", "list", "status", "view"], - "write_subcommands": ["close", "convert-to-draft", "create", "edit", "lock", "merge", "ready", "reopen", "review", "unlock"], + "read_subcommands": ["checkout", "checks", "diff", "list", "status", "view"], + "write_subcommands": ["close", "comment", "convert-to-draft", "create", "edit", "lock", "merge", "ready", "reopen", "review", "revert", "unlock", "update-branch"], "read_permissions": ["pull-requests"], "write_permissions": ["pull-requests"] }, "issue": { "description": "Issue operations", - "read_subcommands": ["comment", "list", "status", "view"], - "write_subcommands": ["close", "create", "delete", "edit", "lock", "pin", "reopen", "transfer", "unlock", "unpin"], + "read_subcommands": ["list", "status", "view"], + "write_subcommands": ["close", "comment", "create", "delete", "develop", "edit", "lock", "pin", "reopen", "transfer", "unlock", "unpin"], "read_permissions": ["issues"], "write_permissions": ["issues"] }, + "release": { + "description": "Release operations", + "read_subcommands": ["download", "list", "verify", "verify-asset", "view"], + "write_subcommands": ["create", "delete", "delete-asset", "edit", "upload"], + "read_permissions": ["contents"], + "write_permissions": ["contents"] + }, "workflow": { "description": "GitHub Actions workflow operations", "read_subcommands": ["list", "view"], @@ -30,44 +37,83 @@ "read_permissions": ["actions"], "write_permissions": ["actions"] }, - "release": { - "description": "Release operations", - "read_subcommands": ["download", "list", "view"], - "write_subcommands": ["create", "delete", "delete-asset", "edit", "upload"], + "cache": { + "description": "GitHub Actions cache operations", + "read_subcommands": ["list"], + "write_subcommands": ["delete"], + "read_permissions": ["actions"], + "write_permissions": ["actions"] + }, + "repo": { + "description": "Repository operations", + "read_subcommands": ["clone", "gitignore", "license", "list", "set-default", "view"], + "write_subcommands": ["archive", "autolink", "create", "delete", "deploy-key", "edit", "fork", "rename", "sync", "unarchive"], "read_permissions": ["contents"], "write_permissions": ["contents"] + }, + "label": { + "description": "Label operations (labels use the issues permission scope)", + "read_subcommands": ["list"], + "write_subcommands": ["clone", "create", "delete", "edit"], + "read_permissions": ["issues"], + "write_permissions": ["issues"] } }, "api_path_patterns": [ { - "pattern": "^/repos/[^/\\s]+/[^/\\s]+/pulls(/|$|\\?)", + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/pulls(/|$|\\?)", "description": "Pull requests REST API", "permissions": ["pull-requests"] }, { - "pattern": "^/repos/[^/\\s]+/[^/\\s]+/issues(/|$|\\?)", + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/issues(/|$|\\?)", "description": "Issues REST API", "permissions": ["issues"] }, { - "pattern": "^/repos/[^/\\s]+/[^/\\s]+/actions(/|$|\\?)", + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/labels(/|$|\\?)", + "description": "Labels REST API (uses issues permission scope)", + "permissions": ["issues"] + }, + { + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/actions(/|$|\\?)", "description": "GitHub Actions REST API", "permissions": ["actions"] }, { - "pattern": "^repos/[^/\\s]+/[^/\\s]+/pulls(/|$|\\?)", - "description": "Pull requests REST API (without leading slash)", - "permissions": ["pull-requests"] + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/releases(/|$|\\?)", + "description": "Releases REST API", + "permissions": ["contents"] }, { - "pattern": "^repos/[^/\\s]+/[^/\\s]+/issues(/|$|\\?)", - "description": "Issues REST API (without leading slash)", - "permissions": ["issues"] + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/(git|contents|commits|branches|tags|compare)(/|$|\\?)", + "description": "Repository contents REST API", + "permissions": ["contents"] }, { - "pattern": "^repos/[^/\\s]+/[^/\\s]+/actions(/|$|\\?)", - "description": "GitHub Actions REST API (without leading slash)", - "permissions": ["actions"] + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/deployments(/|$|\\?)", + "description": "Deployments REST API", + "permissions": ["deployments"] + }, + { + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/(statuses|commits/[^/\\s]+/statuses)(/|$|\\?)", + "description": "Commit statuses REST API", + "permissions": ["statuses"] + }, + { + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/(check-runs|check-suites)(/|$|\\?)", + "description": "Check runs/suites REST API", + "permissions": ["checks"] + }, + { + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/pages(/|$|\\?)", + "description": "GitHub Pages REST API", + "permissions": ["pages"] + }, + { + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/packages(/|$|\\?)", + "description": "Packages REST API", + "permissions": ["packages"] } ] } diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index 12cd470755..82ae0220e4 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -112,7 +112,7 @@ func init() { // // Capture groups: (1) subcommand group, (2) action word. // The trailing `\b` ensures the action word is complete (avoids matching partial words). -var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|workflow|run|release)\s+([\w][\w-]*)\b`) +var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|release|workflow|run|cache|repo|label)\s+([\w][\w-]*)\b`) // ghAPIRE matches `gh api ` invocations. // Capture group: (1) API path (up to the first whitespace, pipe, or quote). @@ -122,9 +122,9 @@ var ghAPIRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+([^\s|;&"'\\]+)`) // gh CLI invocations and returns the minimum set of GitHub Actions permissions // required to run those commands. // -// Only read-level permissions are inferred here; write-level operations in -// pre-steps are intentionally not auto-escalated because they would require -// explicit user intent and additional security review. +// Only read-level permissions are inferred here; write-level operations are +// intentionally not auto-escalated. Use detectWriteCommandsInShellScripts to +// surface write commands as validation errors. func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]PermissionLevel { perms := make(map[PermissionScope]PermissionLevel) @@ -144,7 +144,8 @@ func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]Perm } continue } - // Check explicit write mapping (we still grant at least read for pre-steps). + // Write commands only need read-level permissions in the activation job context. + // (Full write escalation is rejected by detectWriteCommandsInShellScripts instead.) if readPerms, ok := ghCLIPermissions.writeCommands[key]; ok { for _, scope := range readPerms { if _, exists := perms[scope]; !exists { @@ -181,6 +182,32 @@ func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]Perm return perms } +// detectWriteCommandsInShellScripts returns all write gh CLI commands found in the +// given scripts, formatted as "gh " (e.g. "gh pr create"). +// The slice contains no duplicates and is sorted deterministically in discovery order. +func detectWriteCommandsInShellScripts(scripts []string) []string { + var found []string + seen := make(map[string]struct{}) + + for _, script := range scripts { + for _, m := range ghSubcommandRE.FindAllStringSubmatch(script, -1) { + group := strings.ToLower(m[1]) + action := strings.ToLower(m[2]) + key := group + " " + action + + if _, isWrite := ghCLIPermissions.writeCommands[key]; isWrite { + cmd := "gh " + key + if _, already := seen[cmd]; !already { + seen[cmd] = struct{}{} + found = append(found, cmd) + } + } + } + } + + return found +} + // extractRunScriptsFromJobPreSteps returns the `run` script text from every // pre-step in the named job configuration inside the frontmatter jobs map. // diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index abc2fbce1d..09b8a145bf 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -260,3 +260,167 @@ engine: copilot assert.NotContains(t, activationJobSection, "pull-requests", "activation job should NOT include pull-requests when no pre-step requires it") } + +// TestDetectWriteCommandsInShellScripts_GhPrCreate verifies that `gh pr create` is detected as a write command. +func TestDetectWriteCommandsInShellScripts_GhPrCreate(t *testing.T) { + scripts := []string{`gh pr create --title "Fix bug" --body "details"`} + cmds := detectWriteCommandsInShellScripts(scripts) + require.Len(t, cmds, 1) + assert.Equal(t, "gh pr create", cmds[0]) +} + +// TestDetectWriteCommandsInShellScripts_GhIssueClose verifies that `gh issue close` is detected. +func TestDetectWriteCommandsInShellScripts_GhIssueClose(t *testing.T) { + scripts := []string{`gh issue close $ISSUE_NUMBER`} + cmds := detectWriteCommandsInShellScripts(scripts) + require.Len(t, cmds, 1) + assert.Equal(t, "gh issue close", cmds[0]) +} + +// TestDetectWriteCommandsInShellScripts_ReadCommandNotDetected verifies that a read command +// (e.g. `gh pr diff`) is NOT flagged as a write command. +func TestDetectWriteCommandsInShellScripts_ReadCommandNotDetected(t *testing.T) { + scripts := []string{`gh pr diff "$PR_NUMBER" --name-only`} + cmds := detectWriteCommandsInShellScripts(scripts) + assert.Empty(t, cmds, "gh pr diff is a read command and should not be detected as write") +} + +// TestDetectWriteCommandsInShellScripts_Deduplicated verifies that duplicate write commands +// are reported only once. +func TestDetectWriteCommandsInShellScripts_Deduplicated(t *testing.T) { + scripts := []string{ + `gh pr create --title "Fix 1" +gh pr create --title "Fix 2"`, + } + cmds := detectWriteCommandsInShellScripts(scripts) + assert.Len(t, cmds, 1, "duplicate write commands should be deduplicated") + assert.Equal(t, "gh pr create", cmds[0]) +} + +// TestDetectWriteCommandsInShellScripts_MultipleWriteCommands verifies detection of +// multiple distinct write commands. +func TestDetectWriteCommandsInShellScripts_MultipleWriteCommands(t *testing.T) { + scripts := []string{ + `gh pr merge $PR_NUMBER --squash +gh issue comment $ISSUE_NUMBER --body "done"`, + } + cmds := detectWriteCommandsInShellScripts(scripts) + assert.Len(t, cmds, 2) + assert.Contains(t, cmds, "gh pr merge") + assert.Contains(t, cmds, "gh issue comment") +} + +// TestInferPermissionsFromShellScripts_GhCacheList verifies actions: read for gh cache list. +func TestInferPermissionsFromShellScripts_GhCacheList(t *testing.T) { + scripts := []string{`gh cache list --json key`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionActions], "gh cache list should require actions: read") +} + +// TestInferPermissionsFromShellScripts_GhRepoView verifies contents: read for gh repo view. +func TestInferPermissionsFromShellScripts_GhRepoView(t *testing.T) { + scripts := []string{`gh repo view owner/repo --json description`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionContents], "gh repo view should require contents: read") +} + +// TestInferPermissionsFromShellScripts_GhLabelList verifies issues: read for gh label list. +func TestInferPermissionsFromShellScripts_GhLabelList(t *testing.T) { + scripts := []string{`gh label list --json name`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionIssues], "gh label list should require issues: read") +} + +// TestInferPermissionsFromShellScripts_GhIssueComment verifies that `gh issue comment` +// (a write command) still causes issues: read to be inferred so the permission is present +// in the activation job — the write-command check is separate. +func TestInferPermissionsFromShellScripts_GhIssueComment(t *testing.T) { + scripts := []string{`gh issue comment $ISSUE_NUMBER --body "hello"`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionIssues], "write commands still require at minimum read-level permission for the scope") +} + +// TestInferPermissionsFromShellScripts_GhAPIReleases verifies contents: read for gh api releases. +func TestInferPermissionsFromShellScripts_GhAPIReleases(t *testing.T) { + scripts := []string{`gh api /repos/owner/repo/releases --jq '.[0].tag_name'`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionContents], "gh api /repos/.../releases should require contents: read") +} + +// TestInferPermissionsFromShellScripts_GhAPILabels verifies issues: read for gh api labels endpoint. +func TestInferPermissionsFromShellScripts_GhAPILabels(t *testing.T) { + scripts := []string{`gh api /repos/owner/repo/labels`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionIssues], "gh api /repos/.../labels should require issues: read") +} + +// TestActivationJobWriteCommandInPreStepReturnsError verifies that the compiler returns +// an error when an activation pre-step calls a write gh command. +func TestActivationJobWriteCommandInPreStepReturnsError(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-write-cmd-error") + testFile := filepath.Join(tmpDir, "bad-workflow.md") + testContent := `--- +on: + pull_request: + types: [opened] +permissions: + contents: read + pull-requests: read +engine: copilot +jobs: + activation: + pre-steps: + - name: Create PR comment + run: | + gh pr comment "$PR_NUMBER" --body "Starting review..." +--- + +# Workflow whose activation pre-step illegally calls a write gh command +` + err := os.WriteFile(testFile, []byte(testContent), 0644) + require.NoError(t, err, "failed to write test workflow") + + compiler := NewCompiler() + err = compiler.CompileWorkflow(testFile) + require.Error(t, err, "compiler should reject write gh commands in activation pre-steps") + assert.Contains(t, err.Error(), "gh pr comment", "error should mention the offending command") + assert.Contains(t, err.Error(), "write", "error should explain the write-permission restriction") +} + +// TestActivationJobPermissionsWithGhCachePreStep verifies actions: read is added when +// an activation pre-step calls `gh cache list`. +func TestActivationJobPermissionsWithGhCachePreStep(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-perms-gh-cache") + testFile := filepath.Join(tmpDir, "cache-workflow.md") + testContent := `--- +on: + pull_request: + types: [opened] +permissions: + contents: read + actions: read +engine: copilot +jobs: + activation: + pre-steps: + - name: List caches + run: | + gh cache list --json key > /tmp/caches.json +--- + +# Workflow that lists caches in activation pre-step +` + err := os.WriteFile(testFile, []byte(testContent), 0644) + require.NoError(t, err, "failed to write test workflow") + + compiler := NewCompiler() + err = compiler.CompileWorkflow(testFile) + require.NoError(t, err, "failed to compile workflow") + + lockContent, err := os.ReadFile(stringutil.MarkdownToLockFile(testFile)) + require.NoError(t, err, "failed to read generated lock file") + + activationJobSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) + assert.Contains(t, activationJobSection, "actions: read", + "activation job should include actions: read when pre-step calls gh cache list") +} From 98aea308a7c27706953802204b0014347b14c805 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 17:21:05 +0000 Subject: [PATCH 05/15] fix: add docs reference to write-command error message Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler_activation_job_builder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index bd53d5d561..aca5fe9583 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -552,7 +552,7 @@ func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) (s // because the activation job intentionally operates with read-only permissions. if writeCmds := detectWriteCommandsInShellScripts(activationPreStepScripts); len(writeCmds) > 0 { return "", fmt.Errorf( - "activation pre-step uses write gh command(s) [%s]; write operations are not permitted in activation job pre-steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs", + "activation pre-step uses write gh command(s) [%s]; write operations are not permitted in activation job pre-steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs. See: https://github.github.com/gh-aw/reference/safe-outputs/", strings.Join(writeCmds, ", "), ) } From f15ab0405d4385f7954922ac54d4f8e85edd06b5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 17:48:14 +0000 Subject: [PATCH 06/15] feat: map gh CLI groups to GitHub App permissions Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_activation_job_builder.go | 12 ++ pkg/workflow/data/gh_cli_permissions.json | 108 ++++++++++++--- pkg/workflow/gh_cli_permissions.go | 123 ++++++++++++------ pkg/workflow/gh_cli_permissions_test.go | 50 +++++++ 4 files changed, 233 insertions(+), 60 deletions(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index aca5fe9583..1f87ae7151 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -162,6 +162,18 @@ func (c *Compiler) addActivationFeedbackAndValidationSteps(ctx *activationJobBui if ctx.needsAppTokenForAccess { appPerms.Set(PermissionContents, PermissionRead) } + // Add GitHub App-only permissions inferred from pre-step gh CLI commands so the + // minted App token includes the scopes those commands require (e.g. codespaces: read + // for `gh codespace list`). Only App-only scopes are passed here — standard GitHub + // Actions scopes (pull-requests, issues, etc.) are already covered by the GITHUB_TOKEN + // permissions block and do not need to be re-declared on the App token. + for scope, level := range inferPermissionsFromShellScripts( + extractRunScriptsFromJobPreSteps(data.Jobs, string(constants.ActivationJobName)), + ) { + if IsGitHubAppOnlyScope(scope) { + appPerms.Set(scope, level) + } + } ctx.steps = append(ctx.steps, c.buildActivationAppTokenMintStep(data.ActivationGitHubApp, appPerms)...) ctx.outputs["activation_app_token_minting_failed"] = "${{ steps.activation-app-token.outcome == 'failure' }}" } diff --git a/pkg/workflow/data/gh_cli_permissions.json b/pkg/workflow/data/gh_cli_permissions.json index ab3abcf9c0..159161e8b6 100644 --- a/pkg/workflow/data/gh_cli_permissions.json +++ b/pkg/workflow/data/gh_cli_permissions.json @@ -1,119 +1,185 @@ { - "version": "1.1", - "description": "GitHub CLI (gh) subcommand patterns and their required GitHub Actions permissions. Used by the compiler to infer permissions needed by pre-steps that call the gh CLI.", + "version": "1.2", + "description": "GitHub CLI (gh) subcommand patterns and their required GitHub Actions and GitHub App permissions. Used by the compiler to infer permissions needed by pre-steps that call the gh CLI.", "subcommand_groups": { "pr": { "description": "Pull request operations", "read_subcommands": ["checkout", "checks", "diff", "list", "status", "view"], "write_subcommands": ["close", "comment", "convert-to-draft", "create", "edit", "lock", "merge", "ready", "reopen", "review", "revert", "unlock", "update-branch"], "read_permissions": ["pull-requests"], - "write_permissions": ["pull-requests"] + "write_permissions": ["pull-requests"], + "app_read_permissions": ["pull-requests"], + "app_write_permissions": ["pull-requests"] }, "issue": { "description": "Issue operations", "read_subcommands": ["list", "status", "view"], "write_subcommands": ["close", "comment", "create", "delete", "develop", "edit", "lock", "pin", "reopen", "transfer", "unlock", "unpin"], "read_permissions": ["issues"], - "write_permissions": ["issues"] + "write_permissions": ["issues"], + "app_read_permissions": ["issues"], + "app_write_permissions": ["issues"] }, "release": { "description": "Release operations", "read_subcommands": ["download", "list", "verify", "verify-asset", "view"], "write_subcommands": ["create", "delete", "delete-asset", "edit", "upload"], "read_permissions": ["contents"], - "write_permissions": ["contents"] + "write_permissions": ["contents"], + "app_read_permissions": ["contents"], + "app_write_permissions": ["contents"] }, "workflow": { "description": "GitHub Actions workflow operations", "read_subcommands": ["list", "view"], "write_subcommands": ["disable", "enable", "run"], "read_permissions": ["actions"], - "write_permissions": ["actions"] + "write_permissions": ["actions"], + "app_read_permissions": ["actions"], + "app_write_permissions": ["actions", "workflows"] }, "run": { "description": "GitHub Actions run operations", "read_subcommands": ["download", "list", "view", "watch"], "write_subcommands": ["cancel", "delete", "rerun"], "read_permissions": ["actions"], - "write_permissions": ["actions"] + "write_permissions": ["actions"], + "app_read_permissions": ["actions"], + "app_write_permissions": ["actions"] }, "cache": { "description": "GitHub Actions cache operations", "read_subcommands": ["list"], "write_subcommands": ["delete"], "read_permissions": ["actions"], - "write_permissions": ["actions"] + "write_permissions": ["actions"], + "app_read_permissions": ["actions"], + "app_write_permissions": ["actions"] }, "repo": { "description": "Repository operations", "read_subcommands": ["clone", "gitignore", "license", "list", "set-default", "view"], "write_subcommands": ["archive", "autolink", "create", "delete", "deploy-key", "edit", "fork", "rename", "sync", "unarchive"], "read_permissions": ["contents"], - "write_permissions": ["contents"] + "write_permissions": ["contents"], + "app_read_permissions": ["contents"], + "app_write_permissions": ["contents", "administration"] }, "label": { "description": "Label operations (labels use the issues permission scope)", "read_subcommands": ["list"], "write_subcommands": ["clone", "create", "delete", "edit"], "read_permissions": ["issues"], - "write_permissions": ["issues"] + "write_permissions": ["issues"], + "app_read_permissions": ["issues"], + "app_write_permissions": ["issues"] + }, + "codespace": { + "description": "Codespace operations (requires GitHub App with codespaces permission; not available via GITHUB_TOKEN)", + "read_subcommands": ["list", "logs", "ports", "view"], + "write_subcommands": ["create", "delete", "edit", "rebuild", "rename", "restart", "start", "stop"], + "read_permissions": [], + "write_permissions": [], + "app_read_permissions": ["codespaces"], + "app_write_permissions": ["codespaces"] } }, "api_path_patterns": [ { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/pulls(/|$|\\?)", "description": "Pull requests REST API", - "permissions": ["pull-requests"] + "permissions": ["pull-requests"], + "app_permissions": ["pull-requests"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/issues(/|$|\\?)", "description": "Issues REST API", - "permissions": ["issues"] + "permissions": ["issues"], + "app_permissions": ["issues"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/labels(/|$|\\?)", "description": "Labels REST API (uses issues permission scope)", - "permissions": ["issues"] + "permissions": ["issues"], + "app_permissions": ["issues"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/actions(/|$|\\?)", "description": "GitHub Actions REST API", - "permissions": ["actions"] + "permissions": ["actions"], + "app_permissions": ["actions"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/releases(/|$|\\?)", "description": "Releases REST API", - "permissions": ["contents"] + "permissions": ["contents"], + "app_permissions": ["contents"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/(git|contents|commits|branches|tags|compare)(/|$|\\?)", "description": "Repository contents REST API", - "permissions": ["contents"] + "permissions": ["contents"], + "app_permissions": ["contents"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/deployments(/|$|\\?)", "description": "Deployments REST API", - "permissions": ["deployments"] + "permissions": ["deployments"], + "app_permissions": ["deployments"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/(statuses|commits/[^/\\s]+/statuses)(/|$|\\?)", "description": "Commit statuses REST API", - "permissions": ["statuses"] + "permissions": ["statuses"], + "app_permissions": ["statuses"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/(check-runs|check-suites)(/|$|\\?)", "description": "Check runs/suites REST API", - "permissions": ["checks"] + "permissions": ["checks"], + "app_permissions": ["checks"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/pages(/|$|\\?)", "description": "GitHub Pages REST API", - "permissions": ["pages"] + "permissions": ["pages"], + "app_permissions": ["pages"] }, { "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/packages(/|$|\\?)", "description": "Packages REST API", - "permissions": ["packages"] + "permissions": ["packages"], + "app_permissions": ["packages"] + }, + { + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/environments(/|$|\\?)", + "description": "Environments REST API (GitHub App only)", + "permissions": [], + "app_permissions": ["environments"] + }, + { + "pattern": "^/?repos/[^/\\s]+/[^/\\s]+/hooks(/|$|\\?)", + "description": "Repository webhooks REST API (GitHub App only)", + "permissions": [], + "app_permissions": ["repository-hooks"] + }, + { + "pattern": "^/?orgs/[^/\\s]+/members(/|$|\\?)", + "description": "Organization members REST API (GitHub App only)", + "permissions": [], + "app_permissions": ["members"] + }, + { + "pattern": "^/?orgs/[^/\\s]+/teams(/|$|\\?)", + "description": "Organization teams REST API (GitHub App only)", + "permissions": [], + "app_permissions": ["members"] + }, + { + "pattern": "^/?orgs/[^/\\s]+/hooks(/|$|\\?)", + "description": "Organization webhooks REST API (GitHub App only)", + "permissions": [], + "app_permissions": ["organization-hooks"] } ] } diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index 82ae0220e4..9f6bb686e4 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -13,18 +13,21 @@ var ghCLIPermissionsJSON []byte // ghCLISubcommandGroup maps a gh subcommand group (e.g. "pr", "issue") to its permissions. type ghCLISubcommandGroup struct { - Description string `json:"description"` - ReadSubcommands []string `json:"read_subcommands"` - WriteSubcommands []string `json:"write_subcommands"` - ReadPermissions []string `json:"read_permissions"` - WritePermissions []string `json:"write_permissions"` + Description string `json:"description"` + ReadSubcommands []string `json:"read_subcommands"` + WriteSubcommands []string `json:"write_subcommands"` + ReadPermissions []string `json:"read_permissions"` + WritePermissions []string `json:"write_permissions"` + AppReadPermissions []string `json:"app_read_permissions"` + AppWritePermissions []string `json:"app_write_permissions"` } // ghCLIAPIPathPattern maps a REST API path pattern to the required permissions. type ghCLIAPIPathPattern struct { - Pattern string `json:"pattern"` - Description string `json:"description"` - Permissions []string `json:"permissions"` + Pattern string `json:"pattern"` + Description string `json:"description"` + Permissions []string `json:"permissions"` + AppPermissions []string `json:"app_permissions"` } // ghCLIPermissionsData is the top-level structure of gh_cli_permissions.json. @@ -44,13 +47,21 @@ type compiledGHCLIPermissions struct { // groupReadPermissions maps a subcommand group name (e.g. "pr") to read permission scopes // used as fallback when the specific action is not recognised. groupReadPermissions map[string][]PermissionScope + // appReadCommands maps "group action" to GitHub App read permission scopes. + appReadCommands map[string][]PermissionScope + // appWriteCommands maps "group action" to GitHub App write permission scopes. + appWriteCommands map[string][]PermissionScope + // groupAppReadPermissions maps a subcommand group name to GitHub App read permission scopes + // used as fallback when the specific action is not recognised. + groupAppReadPermissions map[string][]PermissionScope // apiPathPatterns holds compiled regexps paired with required permission scopes. apiPathPatterns []compiledAPIPathPattern } type compiledAPIPathPattern struct { - re *regexp.Regexp - permissions []PermissionScope + re *regexp.Regexp + permissions []PermissionScope + appPermissions []PermissionScope } var ghCLIPermissions compiledGHCLIPermissions @@ -62,9 +73,12 @@ func init() { } cp := compiledGHCLIPermissions{ - readCommands: make(map[string][]PermissionScope), - writeCommands: make(map[string][]PermissionScope), - groupReadPermissions: make(map[string][]PermissionScope), + readCommands: make(map[string][]PermissionScope), + writeCommands: make(map[string][]PermissionScope), + groupReadPermissions: make(map[string][]PermissionScope), + appReadCommands: make(map[string][]PermissionScope), + appWriteCommands: make(map[string][]PermissionScope), + groupAppReadPermissions: make(map[string][]PermissionScope), } for group, sg := range data.SubcommandGroups { @@ -76,17 +90,28 @@ func init() { for i, p := range sg.WritePermissions { writePerms[i] = PermissionScope(p) } + appReadPerms := make([]PermissionScope, len(sg.AppReadPermissions)) + for i, p := range sg.AppReadPermissions { + appReadPerms[i] = PermissionScope(p) + } + appWritePerms := make([]PermissionScope, len(sg.AppWritePermissions)) + for i, p := range sg.AppWritePermissions { + appWritePerms[i] = PermissionScope(p) + } // Store group-level fallback (used when specific action is unknown). cp.groupReadPermissions[group] = readPerms + cp.groupAppReadPermissions[group] = appReadPerms for _, action := range sg.ReadSubcommands { key := group + " " + action cp.readCommands[key] = readPerms + cp.appReadCommands[key] = appReadPerms } for _, action := range sg.WriteSubcommands { key := group + " " + action cp.writeCommands[key] = writePerms + cp.appWriteCommands[key] = appWritePerms } } @@ -99,7 +124,15 @@ func init() { for i, p := range ap.Permissions { perms[i] = PermissionScope(p) } - cp.apiPathPatterns = append(cp.apiPathPatterns, compiledAPIPathPattern{re: re, permissions: perms}) + appPerms := make([]PermissionScope, len(ap.AppPermissions)) + for i, p := range ap.AppPermissions { + appPerms[i] = PermissionScope(p) + } + cp.apiPathPatterns = append(cp.apiPathPatterns, compiledAPIPathPattern{ + re: re, + permissions: perms, + appPermissions: appPerms, + }) } ghCLIPermissions = cp @@ -112,15 +145,20 @@ func init() { // // Capture groups: (1) subcommand group, (2) action word. // The trailing `\b` ensures the action word is complete (avoids matching partial words). -var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|release|workflow|run|cache|repo|label)\s+([\w][\w-]*)\b`) +var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|release|workflow|run|cache|repo|label|codespace)\s+([\w][\w-]*)\b`) // ghAPIRE matches `gh api ` invocations. // Capture group: (1) API path (up to the first whitespace, pipe, or quote). var ghAPIRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+([^\s|;&"'\\]+)`) // inferPermissionsFromShellScripts scans one or more shell script strings for -// gh CLI invocations and returns the minimum set of GitHub Actions permissions -// required to run those commands. +// gh CLI invocations and returns the minimum set of GitHub Actions and GitHub App +// permissions required to run those commands. +// +// The returned map includes both GitHub Actions (GITHUB_TOKEN) scopes and GitHub +// App-only scopes. Callers should use IsGitHubAppOnlyScope to distinguish them: +// App-only scopes are skipped when rendering GITHUB_TOKEN permissions but are passed +// to the GitHub App token minting step when a GitHub App is configured. // // Only read-level permissions are inferred here; write-level operations are // intentionally not auto-escalated. Use detectWriteCommandsInShellScripts to @@ -128,6 +166,14 @@ var ghAPIRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+([^\s|;&"'\\]+)`) func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]PermissionLevel { perms := make(map[PermissionScope]PermissionLevel) + addScopes := func(scopes []PermissionScope) { + for _, scope := range scopes { + if _, exists := perms[scope]; !exists { + perms[scope] = PermissionRead + } + } + } + for _, script := range scripts { // Match gh patterns. for _, m := range ghSubcommandRE.FindAllStringSubmatch(script, -1) { @@ -135,33 +181,35 @@ func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]Perm action := strings.ToLower(m[2]) key := group + " " + action + matched := false // Check explicit read mapping first. if readPerms, ok := ghCLIPermissions.readCommands[key]; ok { - for _, scope := range readPerms { - if _, exists := perms[scope]; !exists { - perms[scope] = PermissionRead - } - } + addScopes(readPerms) + matched = true + } + if appReadPerms, ok := ghCLIPermissions.appReadCommands[key]; ok { + addScopes(appReadPerms) + matched = true + } + if matched { continue } // Write commands only need read-level permissions in the activation job context. // (Full write escalation is rejected by detectWriteCommandsInShellScripts instead.) if readPerms, ok := ghCLIPermissions.writeCommands[key]; ok { - for _, scope := range readPerms { - if _, exists := perms[scope]; !exists { - perms[scope] = PermissionRead - } - } + addScopes(readPerms) + matched = true + } + if appWritePerms, ok := ghCLIPermissions.appWriteCommands[key]; ok { + addScopes(appWritePerms) + matched = true + } + if matched { continue } // Fall back to group-level read permissions for unrecognised actions. - if readPerms, ok := ghCLIPermissions.groupReadPermissions[group]; ok { - for _, scope := range readPerms { - if _, exists := perms[scope]; !exists { - perms[scope] = PermissionRead - } - } - } + addScopes(ghCLIPermissions.groupReadPermissions[group]) + addScopes(ghCLIPermissions.groupAppReadPermissions[group]) } // Match gh api patterns. @@ -169,11 +217,8 @@ func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]Perm path := m[1] for _, ap := range ghCLIPermissions.apiPathPatterns { if ap.re.MatchString(path) { - for _, scope := range ap.permissions { - if _, exists := perms[scope]; !exists { - perms[scope] = PermissionRead - } - } + addScopes(ap.permissions) + addScopes(ap.appPermissions) } } } diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index 09b8a145bf..e51db91e16 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -424,3 +424,53 @@ jobs: assert.Contains(t, activationJobSection, "actions: read", "activation job should include actions: read when pre-step calls gh cache list") } + +// TestInferPermissionsFromShellScripts_GhCodespaceList verifies that `gh codespace list` +// returns the GitHub App-only codespaces: read permission (no GITHUB_TOKEN equivalent). +func TestInferPermissionsFromShellScripts_GhCodespaceList(t *testing.T) { + scripts := []string{`gh codespace list --json name`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionCodespaces], + "gh codespace list should require codespaces: read (GitHub App-only)") +} + +// TestInferPermissionsFromShellScripts_GhAPIOrgsMembers verifies that `gh api /orgs/.../members` +// returns the GitHub App-only members: read permission. +func TestInferPermissionsFromShellScripts_GhAPIOrgsMembers(t *testing.T) { + scripts := []string{`gh api /orgs/myorg/members --jq '.[].login'`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionMembers], + "gh api /orgs/.../members should require members: read (GitHub App-only)") +} + +// TestInferPermissionsFromShellScripts_AppAndActionsPermissions verifies that a script +// combining standard and App-only gh commands returns both sets of permissions. +func TestInferPermissionsFromShellScripts_AppAndActionsPermissions(t *testing.T) { + scripts := []string{ + `gh pr diff "$PR_NUMBER" --name-only +gh codespace list --json name`, + } + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests], + "gh pr diff should require pull-requests: read") + assert.Equal(t, PermissionRead, perms[PermissionCodespaces], + "gh codespace list should require codespaces: read (GitHub App-only)") +} + +// TestInferPermissionsFromShellScripts_GhRepoWriteHasAppAdminPerm verifies that `gh repo archive` +// (a write command) is still inferred to need administration: read (GitHub App-only) at minimum. +func TestInferPermissionsFromShellScripts_GhRepoWriteHasAppAdminPerm(t *testing.T) { + scripts := []string{`gh repo archive owner/repo`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionAdministration], + "gh repo archive (write) should infer administration: read for GitHub App") +} + +// TestInferPermissionsFromShellScripts_GhAPIRepoEnvironments verifies environments: read +// (GitHub App-only) for the environments REST API path. +func TestInferPermissionsFromShellScripts_GhAPIRepoEnvironments(t *testing.T) { + scripts := []string{`gh api /repos/owner/repo/environments --jq '.[].name'`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionEnvironments], + "gh api /repos/.../environments should require environments: read (GitHub App-only)") +} From 7b0880a712ebef45af9c9044c158f20d0cd0bc20 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 17:53:07 +0000 Subject: [PATCH 07/15] refactor: dynamic regex from JSON keys, cache pre-step inference in build context Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_activation_job_builder.go | 45 +++++++++++-------- pkg/workflow/gh_cli_permissions.go | 28 +++++++----- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index 1f87ae7151..0aedd85c13 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -39,6 +39,13 @@ type activationJobBuildContext struct { customJobsBeforeActivation []string activationNeeds []string activationCondition string + + // activationPreStepScripts holds the `run` scripts extracted from + // jobs.activation.pre-steps, cached here to avoid repeated extraction. + activationPreStepScripts []string + // activationPreStepInferredPerms holds the permissions inferred from + // activationPreStepScripts, cached here to avoid repeated inference. + activationPreStepInferredPerms map[PermissionScope]PermissionLevel } // newActivationJobBuildContext initializes activation-job state with setup, aw_info, and base outputs. @@ -75,6 +82,11 @@ func (c *Compiler) newActivationJobBuildContext( } ctx.shouldRemoveLabel = ctx.hasLabelCommand && data.LabelCommandRemoveLabel + // Cache pre-step scripts and inferred permissions once to avoid redundant extraction + // and inference calls in buildActivationPermissions and addActivationFeedbackAndValidationSteps. + ctx.activationPreStepScripts = extractRunScriptsFromJobPreSteps(data.Jobs, string(constants.ActivationJobName)) + ctx.activationPreStepInferredPerms = inferPermissionsFromShellScripts(ctx.activationPreStepScripts) + ctx.steps = append(ctx.steps, c.generateCheckoutActionsFolder(data)...) activationSetupTraceID := "" activationSetupParentSpanID := "" @@ -167,9 +179,8 @@ func (c *Compiler) addActivationFeedbackAndValidationSteps(ctx *activationJobBui // for `gh codespace list`). Only App-only scopes are passed here — standard GitHub // Actions scopes (pull-requests, issues, etc.) are already covered by the GITHUB_TOKEN // permissions block and do not need to be re-declared on the App token. - for scope, level := range inferPermissionsFromShellScripts( - extractRunScriptsFromJobPreSteps(data.Jobs, string(constants.ActivationJobName)), - ) { + // Uses the cached inferred permissions to avoid redundant computation. + for scope, level := range ctx.activationPreStepInferredPerms { if IsGitHubAppOnlyScope(scope) { appPerms.Set(scope, level) } @@ -557,21 +568,19 @@ func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) (s // Infer permissions required by gh CLI calls in jobs.activation.pre-steps run scripts. // This ensures that user-defined pre-steps that call `gh pr diff`, `gh issue view`, etc. // get the permissions they need without requiring manual permission declarations. - if len(ctx.data.Jobs) > 0 { - activationPreStepScripts := extractRunScriptsFromJobPreSteps(ctx.data.Jobs, string(constants.ActivationJobName)) - if len(activationPreStepScripts) > 0 { - // Detect write commands first — these are not permitted in activation pre-steps - // because the activation job intentionally operates with read-only permissions. - if writeCmds := detectWriteCommandsInShellScripts(activationPreStepScripts); len(writeCmds) > 0 { - return "", fmt.Errorf( - "activation pre-step uses write gh command(s) [%s]; write operations are not permitted in activation job pre-steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs. See: https://github.github.com/gh-aw/reference/safe-outputs/", - strings.Join(writeCmds, ", "), - ) - } - for scope, level := range inferPermissionsFromShellScripts(activationPreStepScripts) { - if _, exists := permsMap[scope]; !exists { - permsMap[scope] = level - } + // Scripts and inferred permissions are cached in ctx to avoid redundant computation. + if len(ctx.activationPreStepScripts) > 0 { + // Detect write commands first — these are not permitted in activation pre-steps + // because the activation job intentionally operates with read-only permissions. + if writeCmds := detectWriteCommandsInShellScripts(ctx.activationPreStepScripts); len(writeCmds) > 0 { + return "", fmt.Errorf( + "activation pre-step uses write gh command(s) [%s]; write operations are not permitted in activation job pre-steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs. See: https://github.github.com/gh-aw/reference/safe-outputs/", + strings.Join(writeCmds, ", "), + ) + } + for scope, level := range ctx.activationPreStepInferredPerms { + if _, exists := permsMap[scope]; !exists { + permsMap[scope] = level } } } diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index 9f6bb686e4..34689e15ac 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "regexp" + "sort" "strings" ) @@ -40,6 +41,10 @@ type ghCLIPermissionsData struct { // compiledGHCLIPermissions holds pre-compiled lookup data built from the JSON. type compiledGHCLIPermissions struct { + // subcommandRE is dynamically compiled from the subcommand_groups keys so that adding + // a new group to the JSON automatically extends the pattern without a code change. + // Capture groups: (1) subcommand group, (2) action word. + subcommandRE *regexp.Regexp // readCommands maps "group action" (e.g. "pr diff") to read permission scopes. readCommands map[string][]PermissionScope // writeCommands maps "group action" (e.g. "pr create") to write permission scopes. @@ -81,6 +86,16 @@ func init() { groupAppReadPermissions: make(map[string][]PermissionScope), } + // Build the subcommand regex dynamically from the JSON group keys so that adding a new + // group to gh_cli_permissions.json automatically extends the pattern without a code change. + groups := make([]string, 0, len(data.SubcommandGroups)) + for group := range data.SubcommandGroups { + groups = append(groups, regexp.QuoteMeta(group)) + } + sort.Strings(groups) // deterministic alternation order + subcommandPattern := `(?m)(?:^|[\s|;])gh\s+(` + strings.Join(groups, "|") + `)\s+([\w][\w-]*)\b` + cp.subcommandRE = regexp.MustCompile(subcommandPattern) + for group, sg := range data.SubcommandGroups { readPerms := make([]PermissionScope, len(sg.ReadPermissions)) for i, p := range sg.ReadPermissions { @@ -138,15 +153,6 @@ func init() { ghCLIPermissions = cp } -// ghSubcommandRE matches invocations of the gh CLI followed by a known subcommand group -// and an action word. It is designed to handle both: -// - simple single-line calls (gh pr diff "$PR" --name-only) -// - multi-line shell scripts where the `gh` binary may be preceded by whitespace. -// -// Capture groups: (1) subcommand group, (2) action word. -// The trailing `\b` ensures the action word is complete (avoids matching partial words). -var ghSubcommandRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+(pr|issue|release|workflow|run|cache|repo|label|codespace)\s+([\w][\w-]*)\b`) - // ghAPIRE matches `gh api ` invocations. // Capture group: (1) API path (up to the first whitespace, pipe, or quote). var ghAPIRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+([^\s|;&"'\\]+)`) @@ -176,7 +182,7 @@ func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]Perm for _, script := range scripts { // Match gh patterns. - for _, m := range ghSubcommandRE.FindAllStringSubmatch(script, -1) { + for _, m := range ghCLIPermissions.subcommandRE.FindAllStringSubmatch(script, -1) { group := strings.ToLower(m[1]) action := strings.ToLower(m[2]) key := group + " " + action @@ -235,7 +241,7 @@ func detectWriteCommandsInShellScripts(scripts []string) []string { seen := make(map[string]struct{}) for _, script := range scripts { - for _, m := range ghSubcommandRE.FindAllStringSubmatch(script, -1) { + for _, m := range ghCLIPermissions.subcommandRE.FindAllStringSubmatch(script, -1) { group := strings.ToLower(m[1]) action := strings.ToLower(m[2]) key := group + " " + action From 6d9632ab01d590c009077e27611d0db84c4706cb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 17:54:24 +0000 Subject: [PATCH 08/15] fix: skip permission inference when no pre-step scripts present Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler_activation_job_builder.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index 0aedd85c13..dbbb1e8866 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -85,7 +85,9 @@ func (c *Compiler) newActivationJobBuildContext( // Cache pre-step scripts and inferred permissions once to avoid redundant extraction // and inference calls in buildActivationPermissions and addActivationFeedbackAndValidationSteps. ctx.activationPreStepScripts = extractRunScriptsFromJobPreSteps(data.Jobs, string(constants.ActivationJobName)) - ctx.activationPreStepInferredPerms = inferPermissionsFromShellScripts(ctx.activationPreStepScripts) + if len(ctx.activationPreStepScripts) > 0 { + ctx.activationPreStepInferredPerms = inferPermissionsFromShellScripts(ctx.activationPreStepScripts) + } ctx.steps = append(ctx.steps, c.generateCheckoutActionsFolder(data)...) activationSetupTraceID := "" From ec59c93d114fee43241122131faa7649bdc149ba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 18:19:32 +0000 Subject: [PATCH 09/15] fix: apply gh CLI write-detector and read-inference to agent job pre-steps Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler_main_job.go | 27 +++ pkg/workflow/gh_cli_permissions.go | 25 +++ pkg/workflow/gh_cli_permissions_test.go | 221 ++++++++++++++++++++++++ pkg/workflow/permissions_operations.go | 40 +++++ 4 files changed, 313 insertions(+) diff --git a/pkg/workflow/compiler_main_job.go b/pkg/workflow/compiler_main_job.go index a1d4d2a84b..7127207707 100644 --- a/pkg/workflow/compiler_main_job.go +++ b/pkg/workflow/compiler_main_job.go @@ -336,6 +336,33 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) ( } } + // Infer permissions required by gh CLI calls in agent job pre-steps. + // Scans both data.PreSteps (top-level pre-steps:) and jobs.agent.pre-steps, + // detects write commands (which are not permitted since the agent job is read-only), + // and merges inferred read permissions into the existing permissions block. + // Skipped only when the user explicitly opted out of all permissions (permissions: {}). + agentPreStepScripts := extractRunScriptsFromPreStepsYAML(data.PreSteps) + if data.Jobs != nil { + agentPreStepScripts = append(agentPreStepScripts, extractRunScriptsFromJobPreSteps(data.Jobs, string(constants.AgentJobName))...) + } + if len(agentPreStepScripts) > 0 { + if writeCmds := detectWriteCommandsInShellScripts(agentPreStepScripts); len(writeCmds) > 0 { + return nil, fmt.Errorf( + "agent job pre-step uses write gh command(s) [%s]; write operations are not permitted in agent job pre-steps because the agent job runs with read-only permissions. Use safe-outputs for write operations. See: https://github.github.com/gh-aw/reference/safe-outputs/", + strings.Join(writeCmds, ", "), + ) + } + // Infer read permissions unless the user explicitly zeroed out all permissions. + // Check data.Permissions (the original value) since needsContentsRead above may have + // already expanded "permissions: {}" to "permissions:\n contents: read". + if strings.TrimSpace(data.Permissions) != "permissions: {}" && permissions != "" { + inferred := inferPermissionsFromShellScripts(agentPreStepScripts) + if len(inferred) > 0 { + permissions = mergeInferredIntoPermissionsYAML(permissions, inferred) + } + } + } + // In script mode, explicitly add a cleanup step (mirrors post.js in dev/release/action mode). if c.actionMode.IsScript() { steps = append(steps, c.generateScriptModeCleanupStep()) diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index 34689e15ac..8040d0710f 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -7,6 +7,8 @@ import ( "regexp" "sort" "strings" + + "github.com/goccy/go-yaml" ) //go:embed data/gh_cli_permissions.json @@ -259,6 +261,29 @@ func detectWriteCommandsInShellScripts(scripts []string) []string { return found } +// extractRunScriptsFromPreStepsYAML parses a pre-steps YAML string (as stored in +// WorkflowData.PreSteps) and returns the `run` script text from every step. +func extractRunScriptsFromPreStepsYAML(preStepsYAML string) []string { + if preStepsYAML == "" { + return nil + } + var wrapper map[string][]map[string]any + if err := yaml.Unmarshal([]byte(preStepsYAML), &wrapper); err != nil { + return nil + } + steps := wrapper["pre-steps"] + if len(steps) == 0 { + return nil + } + var scripts []string + for _, step := range steps { + if runVal, ok := step["run"].(string); ok && runVal != "" { + scripts = append(scripts, runVal) + } + } + return scripts +} + // extractRunScriptsFromJobPreSteps returns the `run` script text from every // pre-step in the named job configuration inside the frontmatter jobs map. // diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index e51db91e16..3a2078f22c 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -474,3 +474,224 @@ func TestInferPermissionsFromShellScripts_GhAPIRepoEnvironments(t *testing.T) { assert.Equal(t, PermissionRead, perms[PermissionEnvironments], "gh api /repos/.../environments should require environments: read (GitHub App-only)") } + +// --- extractRunScriptsFromPreStepsYAML --- + +// TestExtractRunScriptsFromPreStepsYAML_Basic verifies extraction of run scripts from +// a YAML string matching the WorkflowData.PreSteps format. +func TestExtractRunScriptsFromPreStepsYAML_Basic(t *testing.T) { + yamlStr := `pre-steps: + - name: Lint changed files + run: | + gh pr diff "$PR_NUMBER" --name-only | awk '/\.md$/' + - uses: some-org/action@abc123def456abc123def456abc123def456abc1 +` + scripts := extractRunScriptsFromPreStepsYAML(yamlStr) + assert.Len(t, scripts, 1) + assert.Contains(t, scripts[0], "gh pr diff") +} + +// TestExtractRunScriptsFromPreStepsYAML_Empty verifies nil return for empty input. +func TestExtractRunScriptsFromPreStepsYAML_Empty(t *testing.T) { + assert.Nil(t, extractRunScriptsFromPreStepsYAML("")) +} + +// TestExtractRunScriptsFromPreStepsYAML_NoRunSteps verifies nil return when no run steps present. +func TestExtractRunScriptsFromPreStepsYAML_NoRunSteps(t *testing.T) { + yamlStr := `pre-steps: + - uses: some-org/action@abc123def456abc123def456abc123def456abc1 +` + assert.Nil(t, extractRunScriptsFromPreStepsYAML(yamlStr)) +} + +// TestExtractRunScriptsFromPreStepsYAML_MultipleRunSteps verifies all run scripts are returned. +func TestExtractRunScriptsFromPreStepsYAML_MultipleRunSteps(t *testing.T) { + yamlStr := `pre-steps: + - name: Step one + run: gh pr diff "$PR_NUMBER" + - name: Step two + run: gh issue list +` + scripts := extractRunScriptsFromPreStepsYAML(yamlStr) + assert.Len(t, scripts, 2) +} + +// --- mergeInferredIntoPermissionsYAML --- + +// TestMergeInferredIntoPermissionsYAML_AddsNewScope verifies that a new inferred scope +// is added when the existing permissions block does not include it. +func TestMergeInferredIntoPermissionsYAML_AddsNewScope(t *testing.T) { + existing := "permissions:\n contents: read" + inferred := map[PermissionScope]PermissionLevel{ + PermissionPullRequests: PermissionRead, + } + result := mergeInferredIntoPermissionsYAML(existing, inferred) + assert.Contains(t, result, "pull-requests: read") + assert.Contains(t, result, "contents: read") +} + +// TestMergeInferredIntoPermissionsYAML_DoesNotOverride verifies that an explicitly +// declared scope is never overridden by inferred values. +func TestMergeInferredIntoPermissionsYAML_DoesNotOverride(t *testing.T) { + existing := "permissions:\n pull-requests: read" + inferred := map[PermissionScope]PermissionLevel{ + PermissionPullRequests: PermissionWrite, + } + result := mergeInferredIntoPermissionsYAML(existing, inferred) + assert.Contains(t, result, "pull-requests: read") + assert.NotContains(t, result, "pull-requests: write") +} + +// TestMergeInferredIntoPermissionsYAML_EmptyInputUnchanged verifies that an empty +// permissions string is returned unchanged (no implicit block is created). +func TestMergeInferredIntoPermissionsYAML_EmptyInputUnchanged(t *testing.T) { + inferred := map[PermissionScope]PermissionLevel{ + PermissionPullRequests: PermissionRead, + } + result := mergeInferredIntoPermissionsYAML("", inferred) + assert.Empty(t, result) +} + +// TestMergeInferredIntoPermissionsYAML_SkipsAppOnlyScopes verifies that GitHub +// App-only scopes are not added to the job-level permissions block. +func TestMergeInferredIntoPermissionsYAML_SkipsAppOnlyScopes(t *testing.T) { + existing := "permissions:\n contents: read" + inferred := map[PermissionScope]PermissionLevel{ + PermissionCodespaces: PermissionRead, // GitHub App-only + } + result := mergeInferredIntoPermissionsYAML(existing, inferred) + assert.NotContains(t, result, "codespaces") +} + +// --- Agent job integration tests --- + +// TestAgentJobPreStepsWriteCommandErrors verifies that when an agent job pre-step +// contains a write gh command, the compiler returns an error. +func TestAgentJobPreStepsWriteCommandErrors(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-prestep-write-error") + + content := `--- +on: push +permissions: + contents: read +pre-steps: + - name: Comment on PR + run: gh pr comment "$PR_NUMBER" --body "processing" +engine: claude +strict: false +--- + +Test agent pre-steps write command triggers error. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + err := compiler.CompileWorkflow(testFile) + require.Error(t, err, "compiler should error when agent pre-step uses a write gh command") + assert.Contains(t, err.Error(), "agent job pre-step uses write gh command(s)") + assert.Contains(t, err.Error(), "gh pr comment") + assert.Contains(t, err.Error(), "safe-outputs") +} + +// TestAgentJobPreStepsInferReadPermission verifies that when an agent job pre-step +// contains `gh pr diff` (a read command) and the user has explicit permissions, +// the compiler automatically adds pull-requests: read. +func TestAgentJobPreStepsInferReadPermission(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-prestep-read-perm") + + content := `--- +on: push +permissions: + contents: read +pre-steps: + - name: Get changed files + run: gh pr diff "$PR_NUMBER" --name-only > /tmp/changed.txt +engine: claude +strict: false +--- + +Test agent pre-steps read permission inference. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + require.NoError(t, compiler.CompileWorkflow(testFile)) + + lockFile := filepath.Join(tmpDir, "workflow.lock.yml") + raw, err := os.ReadFile(lockFile) + require.NoError(t, err) + lockContent := string(raw) + + agentJob := extractJobSection(lockContent, string(constants.AgentJobName)) + assert.Contains(t, agentJob, "pull-requests: read", + "agent job should have pull-requests: read inferred from pre-step gh pr diff") +} + +// TestAgentJobPreStepsInferWithDefaultPermissions verifies that when the user has not +// declared an explicit permissions block, the default permissions (contents: read) are +// still augmented with inferred scopes required by pre-step gh commands. +func TestAgentJobPreStepsInferWithDefaultPermissions(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-prestep-default-perms") + + content := `--- +on: push +pre-steps: + - name: Get changed files + run: gh pr diff "$PR_NUMBER" --name-only > /tmp/changed.txt +engine: claude +strict: false +--- + +Test: inference applies even without explicit permissions declaration. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + require.NoError(t, compiler.CompileWorkflow(testFile)) + + lockFile := filepath.Join(tmpDir, "workflow.lock.yml") + raw, err := os.ReadFile(lockFile) + require.NoError(t, err) + lockContent := string(raw) + + agentJob := extractJobSection(lockContent, string(constants.AgentJobName)) + assert.Contains(t, agentJob, "pull-requests: read", + "agent job should have pull-requests: read inferred from pre-step gh pr diff even without explicit permissions block") +} + +// TestAgentJobPreStepsNoInferForEmptyPermissions verifies that when the user explicitly +// sets permissions: {} (empty/no permissions), pre-step inference is skipped because +// the user intentionally opted out of all permissions. +func TestAgentJobPreStepsNoInferForEmptyPermissions(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-prestep-empty-perms") + + content := `--- +on: push +permissions: {} +pre-steps: + - name: Get changed files + run: gh pr diff "$PR_NUMBER" --name-only > /tmp/changed.txt +engine: claude +strict: false +--- + +Test: no inference when user explicitly opts out of permissions. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + require.NoError(t, compiler.CompileWorkflow(testFile)) + + lockFile := filepath.Join(tmpDir, "workflow.lock.yml") + raw, err := os.ReadFile(lockFile) + require.NoError(t, err) + lockContent := string(raw) + + agentJob := extractJobSection(lockContent, string(constants.AgentJobName)) + assert.NotContains(t, agentJob, "pull-requests:", + "agent job should NOT have pull-requests when user explicitly set permissions: {}") +} diff --git a/pkg/workflow/permissions_operations.go b/pkg/workflow/permissions_operations.go index 7453bb6c8c..9d920485d9 100644 --- a/pkg/workflow/permissions_operations.go +++ b/pkg/workflow/permissions_operations.go @@ -417,3 +417,43 @@ func (p *Permissions) RenderToYAML() string { return strings.Join(lines, "\n") } + +// mergeInferredIntoPermissionsYAML merges a map of inferred permissions into an existing +// permissions YAML string and returns the updated YAML string (2-space indented, suitable +// for filterJobLevelPermissions / indentYAMLLines callers). +// +// Rules: +// - GitHub App-only scopes are skipped (they are not valid job-level permissions). +// - An inferred scope is added only when not already declared by the user. +// - An inferred scope at PermissionNone is always ignored. +// +// If permissionsYAML is empty the function returns an empty string unchanged, because +// adding a new explicit block to a job that currently inherits workflow-level permissions +// would unintentionally restrict those permissions. +func mergeInferredIntoPermissionsYAML(permissionsYAML string, inferred map[PermissionScope]PermissionLevel) string { +if permissionsYAML == "" || len(inferred) == 0 { +return permissionsYAML +} + +parsedPerms := NewPermissionsParser(permissionsYAML).ToPermissions() + +changed := false +for scope, level := range inferred { +if IsGitHubAppOnlyScope(scope) { +continue +} +if level == PermissionNone { +continue +} +if _, exists := parsedPerms.Get(scope); !exists { +parsedPerms.Set(scope, level) +changed = true +} +} + +if !changed { +return permissionsYAML +} + +return filterJobLevelPermissions(parsedPerms.RenderToYAML()) +} From a686aac88c719d1a8780709c35d74c0cdba5f679 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 18:22:23 +0000 Subject: [PATCH 10/15] refactor: address code review - split conditions, document permissions check, fix test SHA Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler_main_job.go | 6 ++-- pkg/workflow/gh_cli_permissions_test.go | 4 +-- pkg/workflow/permissions_operations.go | 47 ++++++++++++++----------- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/pkg/workflow/compiler_main_job.go b/pkg/workflow/compiler_main_job.go index 7127207707..02f4b63d69 100644 --- a/pkg/workflow/compiler_main_job.go +++ b/pkg/workflow/compiler_main_job.go @@ -354,8 +354,10 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) ( } // Infer read permissions unless the user explicitly zeroed out all permissions. // Check data.Permissions (the original value) since needsContentsRead above may have - // already expanded "permissions: {}" to "permissions:\n contents: read". - if strings.TrimSpace(data.Permissions) != "permissions: {}" && permissions != "" { + // already expanded "permissions: {}" into an explicit block. + // Uses the same exact-string check as tools.go (the YAML parser always normalizes + // "permissions: {}" to this canonical form when parsing the frontmatter). + if data.Permissions != "permissions: {}" && permissions != "" { inferred := inferPermissionsFromShellScripts(agentPreStepScripts) if len(inferred) > 0 { permissions = mergeInferredIntoPermissionsYAML(permissions, inferred) diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index 3a2078f22c..5c431d1ab7 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -484,7 +484,7 @@ func TestExtractRunScriptsFromPreStepsYAML_Basic(t *testing.T) { - name: Lint changed files run: | gh pr diff "$PR_NUMBER" --name-only | awk '/\.md$/' - - uses: some-org/action@abc123def456abc123def456abc123def456abc1 + - uses: some-org/action@0000000000000000000000000000000000000000 ` scripts := extractRunScriptsFromPreStepsYAML(yamlStr) assert.Len(t, scripts, 1) @@ -499,7 +499,7 @@ func TestExtractRunScriptsFromPreStepsYAML_Empty(t *testing.T) { // TestExtractRunScriptsFromPreStepsYAML_NoRunSteps verifies nil return when no run steps present. func TestExtractRunScriptsFromPreStepsYAML_NoRunSteps(t *testing.T) { yamlStr := `pre-steps: - - uses: some-org/action@abc123def456abc123def456abc123def456abc1 + - uses: some-org/action@0000000000000000000000000000000000000000 ` assert.Nil(t, extractRunScriptsFromPreStepsYAML(yamlStr)) } diff --git a/pkg/workflow/permissions_operations.go b/pkg/workflow/permissions_operations.go index 9d920485d9..ca87bb8d4b 100644 --- a/pkg/workflow/permissions_operations.go +++ b/pkg/workflow/permissions_operations.go @@ -431,29 +431,34 @@ func (p *Permissions) RenderToYAML() string { // adding a new explicit block to a job that currently inherits workflow-level permissions // would unintentionally restrict those permissions. func mergeInferredIntoPermissionsYAML(permissionsYAML string, inferred map[PermissionScope]PermissionLevel) string { -if permissionsYAML == "" || len(inferred) == 0 { -return permissionsYAML -} + if permissionsYAML == "" { + // No existing permissions block: adding one would unintentionally narrow the + // workflow-level permissions that the job currently inherits. + return permissionsYAML + } + if len(inferred) == 0 { + return permissionsYAML + } -parsedPerms := NewPermissionsParser(permissionsYAML).ToPermissions() + parsedPerms := NewPermissionsParser(permissionsYAML).ToPermissions() -changed := false -for scope, level := range inferred { -if IsGitHubAppOnlyScope(scope) { -continue -} -if level == PermissionNone { -continue -} -if _, exists := parsedPerms.Get(scope); !exists { -parsedPerms.Set(scope, level) -changed = true -} -} + changed := false + for scope, level := range inferred { + if IsGitHubAppOnlyScope(scope) { + continue + } + if level == PermissionNone { + continue + } + if _, exists := parsedPerms.Get(scope); !exists { + parsedPerms.Set(scope, level) + changed = true + } + } -if !changed { -return permissionsYAML -} + if !changed { + return permissionsYAML + } -return filterJobLevelPermissions(parsedPerms.RenderToYAML()) + return filterJobLevelPermissions(parsedPerms.RenderToYAML()) } From 68a1ca205a617f1fbac2391025ed3a8f5f58790e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 18:35:14 +0000 Subject: [PATCH 11/15] feat: extend gh CLI scan to all step section types (steps, post-steps, pre-agent-steps) Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_activation_job_builder.go | 51 +-- pkg/workflow/compiler_main_job.go | 24 +- pkg/workflow/gh_cli_permissions.go | 37 +- pkg/workflow/gh_cli_permissions_test.go | 319 +++++++++++++++++- 4 files changed, 388 insertions(+), 43 deletions(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index dbbb1e8866..6eff397f80 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -40,12 +40,12 @@ type activationJobBuildContext struct { activationNeeds []string activationCondition string - // activationPreStepScripts holds the `run` scripts extracted from - // jobs.activation.pre-steps, cached here to avoid repeated extraction. - activationPreStepScripts []string - // activationPreStepInferredPerms holds the permissions inferred from - // activationPreStepScripts, cached here to avoid repeated inference. - activationPreStepInferredPerms map[PermissionScope]PermissionLevel + // activationAllScripts holds the `run` scripts extracted from all step sections + // in jobs.activation (pre-steps, steps, post-steps), cached to avoid repeated extraction. + activationAllScripts []string + // activationInferredPerms holds the permissions inferred from activationAllScripts, + // cached here to avoid repeated inference. + activationInferredPerms map[PermissionScope]PermissionLevel } // newActivationJobBuildContext initializes activation-job state with setup, aw_info, and base outputs. @@ -82,11 +82,15 @@ func (c *Compiler) newActivationJobBuildContext( } ctx.shouldRemoveLabel = ctx.hasLabelCommand && data.LabelCommandRemoveLabel - // Cache pre-step scripts and inferred permissions once to avoid redundant extraction - // and inference calls in buildActivationPermissions and addActivationFeedbackAndValidationSteps. - ctx.activationPreStepScripts = extractRunScriptsFromJobPreSteps(data.Jobs, string(constants.ActivationJobName)) - if len(ctx.activationPreStepScripts) > 0 { - ctx.activationPreStepInferredPerms = inferPermissionsFromShellScripts(ctx.activationPreStepScripts) + // Cache scripts from all step sections and inferred permissions once to avoid redundant + // extraction and inference calls in buildActivationPermissions and + // addActivationFeedbackAndValidationSteps. + activationJobName := string(constants.ActivationJobName) + for _, section := range []string{"pre-steps", "steps", "post-steps"} { + ctx.activationAllScripts = append(ctx.activationAllScripts, extractRunScriptsFromJobSection(data.Jobs, activationJobName, section)...) + } + if len(ctx.activationAllScripts) > 0 { + ctx.activationInferredPerms = inferPermissionsFromShellScripts(ctx.activationAllScripts) } ctx.steps = append(ctx.steps, c.generateCheckoutActionsFolder(data)...) @@ -176,13 +180,13 @@ func (c *Compiler) addActivationFeedbackAndValidationSteps(ctx *activationJobBui if ctx.needsAppTokenForAccess { appPerms.Set(PermissionContents, PermissionRead) } - // Add GitHub App-only permissions inferred from pre-step gh CLI commands so the + // Add GitHub App-only permissions inferred from activation job gh CLI commands so the // minted App token includes the scopes those commands require (e.g. codespaces: read // for `gh codespace list`). Only App-only scopes are passed here — standard GitHub // Actions scopes (pull-requests, issues, etc.) are already covered by the GITHUB_TOKEN // permissions block and do not need to be re-declared on the App token. // Uses the cached inferred permissions to avoid redundant computation. - for scope, level := range ctx.activationPreStepInferredPerms { + for scope, level := range ctx.activationInferredPerms { if IsGitHubAppOnlyScope(scope) { appPerms.Set(scope, level) } @@ -517,7 +521,7 @@ func (c *Compiler) addActivationArtifactUploadStep(ctx *activationJobBuildContex } // buildActivationPermissions builds activation job permissions from workflow features and selected interactions. -// Returns an error if activation pre-steps contain write gh CLI commands that would require write permissions. +// Returns an error if any activation job step section contains write gh CLI commands that would require write permissions. func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) (string, error) { permsMap := map[PermissionScope]PermissionLevel{ PermissionContents: PermissionRead, @@ -567,20 +571,21 @@ func (c *Compiler) buildActivationPermissions(ctx *activationJobBuildContext) (s permsMap[PermissionDiscussions] = PermissionWrite } } - // Infer permissions required by gh CLI calls in jobs.activation.pre-steps run scripts. - // This ensures that user-defined pre-steps that call `gh pr diff`, `gh issue view`, etc. - // get the permissions they need without requiring manual permission declarations. + // Infer permissions required by gh CLI calls in jobs.activation step sections + // (pre-steps, steps, post-steps). This ensures that user-defined steps that call + // `gh pr diff`, `gh issue view`, etc. get the permissions they need without requiring + // manual permission declarations. // Scripts and inferred permissions are cached in ctx to avoid redundant computation. - if len(ctx.activationPreStepScripts) > 0 { - // Detect write commands first — these are not permitted in activation pre-steps - // because the activation job intentionally operates with read-only permissions. - if writeCmds := detectWriteCommandsInShellScripts(ctx.activationPreStepScripts); len(writeCmds) > 0 { + if len(ctx.activationAllScripts) > 0 { + // Detect write commands first — these are not permitted in the activation job + // because it intentionally operates with read-only permissions. + if writeCmds := detectWriteCommandsInShellScripts(ctx.activationAllScripts); len(writeCmds) > 0 { return "", fmt.Errorf( - "activation pre-step uses write gh command(s) [%s]; write operations are not permitted in activation job pre-steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs. See: https://github.github.com/gh-aw/reference/safe-outputs/", + "activation job uses write gh command(s) [%s]; write operations are not permitted in activation job steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs. See: https://github.github.com/gh-aw/reference/safe-outputs/", strings.Join(writeCmds, ", "), ) } - for scope, level := range ctx.activationPreStepInferredPerms { + for scope, level := range ctx.activationInferredPerms { if _, exists := permsMap[scope]; !exists { permsMap[scope] = level } diff --git a/pkg/workflow/compiler_main_job.go b/pkg/workflow/compiler_main_job.go index 02f4b63d69..bc3682a989 100644 --- a/pkg/workflow/compiler_main_job.go +++ b/pkg/workflow/compiler_main_job.go @@ -336,19 +336,25 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) ( } } - // Infer permissions required by gh CLI calls in agent job pre-steps. - // Scans both data.PreSteps (top-level pre-steps:) and jobs.agent.pre-steps, - // detects write commands (which are not permitted since the agent job is read-only), + // Infer permissions required by gh CLI calls in all agent job step sections + // (pre-steps, steps, post-steps, pre-agent-steps). + // Detects write commands (which are not permitted since the agent job is read-only), // and merges inferred read permissions into the existing permissions block. // Skipped only when the user explicitly opted out of all permissions (permissions: {}). - agentPreStepScripts := extractRunScriptsFromPreStepsYAML(data.PreSteps) + agentJobName := string(constants.AgentJobName) + agentAllScripts := extractRunScriptsFromSectionYAML(data.PreSteps, "pre-steps") + agentAllScripts = append(agentAllScripts, extractRunScriptsFromSectionYAML(data.CustomSteps, "steps")...) + agentAllScripts = append(agentAllScripts, extractRunScriptsFromSectionYAML(data.PreAgentSteps, "pre-agent-steps")...) + agentAllScripts = append(agentAllScripts, extractRunScriptsFromSectionYAML(data.PostSteps, "post-steps")...) if data.Jobs != nil { - agentPreStepScripts = append(agentPreStepScripts, extractRunScriptsFromJobPreSteps(data.Jobs, string(constants.AgentJobName))...) + for _, section := range []string{"pre-steps", "steps", "pre-agent-steps", "post-steps"} { + agentAllScripts = append(agentAllScripts, extractRunScriptsFromJobSection(data.Jobs, agentJobName, section)...) + } } - if len(agentPreStepScripts) > 0 { - if writeCmds := detectWriteCommandsInShellScripts(agentPreStepScripts); len(writeCmds) > 0 { + if len(agentAllScripts) > 0 { + if writeCmds := detectWriteCommandsInShellScripts(agentAllScripts); len(writeCmds) > 0 { return nil, fmt.Errorf( - "agent job pre-step uses write gh command(s) [%s]; write operations are not permitted in agent job pre-steps because the agent job runs with read-only permissions. Use safe-outputs for write operations. See: https://github.github.com/gh-aw/reference/safe-outputs/", + "agent job uses write gh command(s) [%s]; write operations are not permitted in agent job steps because the agent job runs with read-only permissions. Use safe-outputs for write operations. See: https://github.github.com/gh-aw/reference/safe-outputs/", strings.Join(writeCmds, ", "), ) } @@ -358,7 +364,7 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) ( // Uses the same exact-string check as tools.go (the YAML parser always normalizes // "permissions: {}" to this canonical form when parsing the frontmatter). if data.Permissions != "permissions: {}" && permissions != "" { - inferred := inferPermissionsFromShellScripts(agentPreStepScripts) + inferred := inferPermissionsFromShellScripts(agentAllScripts) if len(inferred) > 0 { permissions = mergeInferredIntoPermissionsYAML(permissions, inferred) } diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index 8040d0710f..5b78999179 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -261,17 +261,19 @@ func detectWriteCommandsInShellScripts(scripts []string) []string { return found } -// extractRunScriptsFromPreStepsYAML parses a pre-steps YAML string (as stored in -// WorkflowData.PreSteps) and returns the `run` script text from every step. -func extractRunScriptsFromPreStepsYAML(preStepsYAML string) []string { - if preStepsYAML == "" { +// extractRunScriptsFromSectionYAML parses a step-section YAML string (e.g. as stored in +// WorkflowData.PreSteps, PostSteps, PreAgentSteps, or CustomSteps) and returns the `run` +// script text from every step. sectionName must match the top-level key in the YAML +// (e.g. "pre-steps", "post-steps", "pre-agent-steps", "steps"). +func extractRunScriptsFromSectionYAML(sectionYAML, sectionName string) []string { + if sectionYAML == "" { return nil } var wrapper map[string][]map[string]any - if err := yaml.Unmarshal([]byte(preStepsYAML), &wrapper); err != nil { + if err := yaml.Unmarshal([]byte(sectionYAML), &wrapper); err != nil { return nil } - steps := wrapper["pre-steps"] + steps := wrapper[sectionName] if len(steps) == 0 { return nil } @@ -284,11 +286,18 @@ func extractRunScriptsFromPreStepsYAML(preStepsYAML string) []string { return scripts } -// extractRunScriptsFromJobPreSteps returns the `run` script text from every -// pre-step in the named job configuration inside the frontmatter jobs map. +// extractRunScriptsFromPreStepsYAML parses a pre-steps YAML string (as stored in +// WorkflowData.PreSteps) and returns the `run` script text from every step. +func extractRunScriptsFromPreStepsYAML(preStepsYAML string) []string { + return extractRunScriptsFromSectionYAML(preStepsYAML, "pre-steps") +} + +// extractRunScriptsFromJobSection returns the `run` script text from every step in the +// named section (e.g. "pre-steps", "steps", "post-steps") of the named job configuration +// inside the frontmatter jobs map. // // It is a read-only extraction: it never mutates the jobs map. -func extractRunScriptsFromJobPreSteps(jobs map[string]any, jobName string) []string { +func extractRunScriptsFromJobSection(jobs map[string]any, jobName, sectionName string) []string { if len(jobs) == 0 { return nil } @@ -303,7 +312,7 @@ func extractRunScriptsFromJobPreSteps(jobs map[string]any, jobName string) []str return nil } - raw, ok := configMap["pre-steps"] + raw, ok := configMap[sectionName] if !ok { return nil } @@ -325,3 +334,11 @@ func extractRunScriptsFromJobPreSteps(jobs map[string]any, jobName string) []str } return scripts } + +// extractRunScriptsFromJobPreSteps returns the `run` script text from every +// pre-step in the named job configuration inside the frontmatter jobs map. +// +// It is a read-only extraction: it never mutates the jobs map. +func extractRunScriptsFromJobPreSteps(jobs map[string]any, jobName string) []string { + return extractRunScriptsFromJobSection(jobs, jobName, "pre-steps") +} diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index 5c431d1ab7..d62345d31a 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -589,7 +589,7 @@ Test agent pre-steps write command triggers error. compiler := NewCompiler() err := compiler.CompileWorkflow(testFile) require.Error(t, err, "compiler should error when agent pre-step uses a write gh command") - assert.Contains(t, err.Error(), "agent job pre-step uses write gh command(s)") + assert.Contains(t, err.Error(), "agent job uses write gh command(s)") assert.Contains(t, err.Error(), "gh pr comment") assert.Contains(t, err.Error(), "safe-outputs") } @@ -695,3 +695,320 @@ Test: no inference when user explicitly opts out of permissions. assert.NotContains(t, agentJob, "pull-requests:", "agent job should NOT have pull-requests when user explicitly set permissions: {}") } + +// --- extractRunScriptsFromSectionYAML --- + +// TestExtractRunScriptsFromSectionYAML_Steps verifies extraction from a `steps:` section. +func TestExtractRunScriptsFromSectionYAML_Steps(t *testing.T) { + yamlStr := `steps: + - name: List issues + run: gh issue list --json number + - uses: some-org/action@0000000000000000000000000000000000000000 +` + scripts := extractRunScriptsFromSectionYAML(yamlStr, "steps") + assert.Len(t, scripts, 1) + assert.Contains(t, scripts[0], "gh issue list") +} + +// TestExtractRunScriptsFromSectionYAML_PostSteps verifies extraction from a `post-steps:` section. +func TestExtractRunScriptsFromSectionYAML_PostSteps(t *testing.T) { + yamlStr := `post-steps: + - name: Clean up + run: gh run list --json databaseId +` + scripts := extractRunScriptsFromSectionYAML(yamlStr, "post-steps") + assert.Len(t, scripts, 1) + assert.Contains(t, scripts[0], "gh run list") +} + +// TestExtractRunScriptsFromSectionYAML_PreAgentSteps verifies extraction from a `pre-agent-steps:` section. +func TestExtractRunScriptsFromSectionYAML_PreAgentSteps(t *testing.T) { + yamlStr := `pre-agent-steps: + - name: Fetch release info + run: gh release view --json tagName +` + scripts := extractRunScriptsFromSectionYAML(yamlStr, "pre-agent-steps") + assert.Len(t, scripts, 1) + assert.Contains(t, scripts[0], "gh release view") +} + +// TestExtractRunScriptsFromSectionYAML_WrongKey verifies nil is returned when the YAML key +// does not match the requested section name. +func TestExtractRunScriptsFromSectionYAML_WrongKey(t *testing.T) { + yamlStr := `pre-steps: + - name: A step + run: gh pr diff +` + scripts := extractRunScriptsFromSectionYAML(yamlStr, "post-steps") + assert.Nil(t, scripts) +} + +// --- extractRunScriptsFromJobSection --- + +// TestExtractRunScriptsFromJobSection_Steps verifies extraction from jobs..steps. +func TestExtractRunScriptsFromJobSection_Steps(t *testing.T) { + jobs := map[string]any{ + "activation": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "List workflows", + "run": `gh workflow list --json name`, + }, + }, + }, + } + scripts := extractRunScriptsFromJobSection(jobs, "activation", "steps") + require.Len(t, scripts, 1) + assert.Contains(t, scripts[0], "gh workflow list") +} + +// TestExtractRunScriptsFromJobSection_PostSteps verifies extraction from jobs..post-steps. +func TestExtractRunScriptsFromJobSection_PostSteps(t *testing.T) { + jobs := map[string]any{ + "agent": map[string]any{ + "post-steps": []any{ + map[string]any{ + "name": "Summary", + "run": `gh pr view "$PR_NUMBER" --json state`, + }, + }, + }, + } + scripts := extractRunScriptsFromJobSection(jobs, "agent", "post-steps") + require.Len(t, scripts, 1) + assert.Contains(t, scripts[0], "gh pr view") +} + +// TestExtractRunScriptsFromJobSection_MissingSectionReturnsNil verifies nil when section absent. +func TestExtractRunScriptsFromJobSection_MissingSectionReturnsNil(t *testing.T) { + jobs := map[string]any{ + "activation": map[string]any{ + "pre-steps": []any{}, + }, + } + assert.Nil(t, extractRunScriptsFromJobSection(jobs, "activation", "post-steps")) +} + +// --- Agent job integration tests for steps / post-steps / pre-agent-steps --- + +// TestAgentJobStepsInferReadPermission verifies that `gh issue list` in a top-level `steps:` +// block causes the compiler to add issues: read to the agent job. +func TestAgentJobStepsInferReadPermission(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-steps-read-perm") + + content := `--- +on: push +permissions: + contents: read +steps: + - name: List issues + run: gh issue list --json number > /tmp/issues.json +engine: claude +strict: false +--- + +Test agent steps read permission inference. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + require.NoError(t, compiler.CompileWorkflow(testFile)) + + lockFile := filepath.Join(tmpDir, "workflow.lock.yml") + raw, err := os.ReadFile(lockFile) + require.NoError(t, err) + + agentJob := extractJobSection(string(raw), string(constants.AgentJobName)) + assert.Contains(t, agentJob, "issues: read", + "agent job should have issues: read inferred from steps gh issue list") +} + +// TestAgentJobPostStepsInferReadPermission verifies that `gh run list` in a top-level `post-steps:` +// block causes the compiler to add actions: read to the agent job. +func TestAgentJobPostStepsInferReadPermission(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-poststeps-read-perm") + + content := `--- +on: push +permissions: + contents: read +post-steps: + - name: Show runs + run: gh run list --json databaseId > /tmp/runs.json +engine: claude +strict: false +--- + +Test agent post-steps read permission inference. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + require.NoError(t, compiler.CompileWorkflow(testFile)) + + lockFile := filepath.Join(tmpDir, "workflow.lock.yml") + raw, err := os.ReadFile(lockFile) + require.NoError(t, err) + + agentJob := extractJobSection(string(raw), string(constants.AgentJobName)) + assert.Contains(t, agentJob, "actions: read", + "agent job should have actions: read inferred from post-steps gh run list") +} + +// TestAgentJobPreAgentStepsInferReadPermission verifies that `gh pr diff` in a top-level +// `pre-agent-steps:` block causes the compiler to add pull-requests: read to the agent job. +func TestAgentJobPreAgentStepsInferReadPermission(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-preagent-read-perm") + + content := `--- +on: push +permissions: + contents: read +pre-agent-steps: + - name: Get diff + run: gh pr diff "$PR_NUMBER" --name-only > /tmp/diff.txt +engine: claude +strict: false +--- + +Test agent pre-agent-steps read permission inference. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + require.NoError(t, compiler.CompileWorkflow(testFile)) + + lockFile := filepath.Join(tmpDir, "workflow.lock.yml") + raw, err := os.ReadFile(lockFile) + require.NoError(t, err) + + agentJob := extractJobSection(string(raw), string(constants.AgentJobName)) + assert.Contains(t, agentJob, "pull-requests: read", + "agent job should have pull-requests: read inferred from pre-agent-steps gh pr diff") +} + +// TestAgentJobWriteCommandInStepsErrors verifies that a write gh command in a top-level +// `steps:` block triggers a compile error. +func TestAgentJobWriteCommandInStepsErrors(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-steps-write-error") + + content := `--- +on: push +permissions: + contents: read +steps: + - name: Create issue + run: gh issue create --title "bug" --body "found a bug" +engine: claude +strict: false +--- + +Test agent steps write command triggers error. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + err := compiler.CompileWorkflow(testFile) + require.Error(t, err, "compiler should error when agent steps use a write gh command") + assert.Contains(t, err.Error(), "agent job uses write gh command(s)") + assert.Contains(t, err.Error(), "gh issue create") +} + +// TestAgentJobWriteCommandInPostStepsErrors verifies that a write gh command in a top-level +// `post-steps:` block triggers a compile error. +func TestAgentJobWriteCommandInPostStepsErrors(t *testing.T) { + tmpDir := testutil.TempDir(t, "agent-poststeps-write-error") + + content := `--- +on: push +permissions: + contents: read +post-steps: + - name: Close PR + run: gh pr close "$PR_NUMBER" +engine: claude +strict: false +--- + +Test agent post-steps write command triggers error. +` + testFile := filepath.Join(tmpDir, "workflow.md") + require.NoError(t, os.WriteFile(testFile, []byte(content), 0644)) + + compiler := NewCompiler() + err := compiler.CompileWorkflow(testFile) + require.Error(t, err, "compiler should error when agent post-steps use a write gh command") + assert.Contains(t, err.Error(), "agent job uses write gh command(s)") + assert.Contains(t, err.Error(), "gh pr close") +} + +// TestActivationJobStepsInferReadPermission verifies that `gh workflow list` in +// jobs.activation.steps causes the compiler to add actions: read to the activation job. +func TestActivationJobStepsInferReadPermission(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-steps-read-perm") + testFile := filepath.Join(tmpDir, "workflow.md") + testContent := `--- +on: + pull_request: + types: [opened] +permissions: + contents: read + actions: read +engine: copilot +jobs: + activation: + steps: + - name: List workflows + run: | + gh workflow list --json name > /tmp/workflows.json +--- + +# Workflow that lists workflows in activation steps +` + require.NoError(t, os.WriteFile(testFile, []byte(testContent), 0644)) + + compiler := NewCompiler() + require.NoError(t, compiler.CompileWorkflow(testFile)) + + lockContent, err := os.ReadFile(stringutil.MarkdownToLockFile(testFile)) + require.NoError(t, err) + + activationSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) + assert.Contains(t, activationSection, "actions: read", + "activation job should have actions: read inferred from steps gh workflow list") +} + +// TestActivationJobWriteCommandInStepsErrors verifies that a write gh command in +// jobs.activation.steps triggers a compile error. +func TestActivationJobWriteCommandInStepsErrors(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-steps-write-error") + testFile := filepath.Join(tmpDir, "bad-workflow.md") + testContent := `--- +on: + pull_request: + types: [opened] +permissions: + contents: read +engine: copilot +jobs: + activation: + steps: + - name: Create label + run: | + gh label create "reviewed" --color "#0075ca" +--- + +# Workflow whose activation steps illegally call a write gh command +` + require.NoError(t, os.WriteFile(testFile, []byte(testContent), 0644)) + + compiler := NewCompiler() + err := compiler.CompileWorkflow(testFile) + require.Error(t, err, "compiler should reject write gh commands in activation steps") + assert.Contains(t, err.Error(), "gh label create", "error should mention the offending command") + assert.Contains(t, err.Error(), "write", "error should explain the write-permission restriction") +} From ea1375b191be83ed6d476efec25569f9e3f34b4f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 18:36:07 +0000 Subject: [PATCH 12/15] docs: clarify pre-agent-steps exclusion from activation job scan Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler_activation_job_builder.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index 6eff397f80..d794a35a30 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -42,6 +42,8 @@ type activationJobBuildContext struct { // activationAllScripts holds the `run` scripts extracted from all step sections // in jobs.activation (pre-steps, steps, post-steps), cached to avoid repeated extraction. + // pre-agent-steps is intentionally omitted: it is an agent-job-only concept (steps that + // run immediately before AI execution) and has no meaning in the activation job. activationAllScripts []string // activationInferredPerms holds the permissions inferred from activationAllScripts, // cached here to avoid repeated inference. From 56d1068344c297c27d0f9b167c0bf561f9f448fc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 19:27:44 +0000 Subject: [PATCH 13/15] docs(adr): draft ADR-32849 for gh CLI permission inference --- ...er-gh-cli-permissions-from-step-scripts.md | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 docs/adr/32849-infer-gh-cli-permissions-from-step-scripts.md diff --git a/docs/adr/32849-infer-gh-cli-permissions-from-step-scripts.md b/docs/adr/32849-infer-gh-cli-permissions-from-step-scripts.md new file mode 100644 index 0000000000..cadc36f0d5 --- /dev/null +++ b/docs/adr/32849-infer-gh-cli-permissions-from-step-scripts.md @@ -0,0 +1,104 @@ +# ADR-32849: Infer gh CLI Permissions from Step Scripts in Activation and Agent Jobs + +**Date**: 2026-05-17 +**Status**: Draft +**Deciders**: pelikhan, Copilot + +--- + +## Part 1 — Narrative (Human-Friendly) + +### Context + +The `gh aw compile` command emits an activation job and an agent job with `permissions:` blocks computed from workflow-level features (reaction targets, label events, safe outputs, etc.). Before this change, `buildActivationPermissions` ignored the contents of user-supplied `run` scripts in `jobs.activation.pre-steps`, `jobs.activation.steps`, and `jobs.activation.post-steps`, and the agent job's `buildMainJob` similarly ignored the contents of its `pre-steps`, `steps`, `pre-agent-steps`, and `post-steps`. As a result, a pre-step that called `gh pr diff "$PR_NUMBER" --name-only` would compile to a lock file with no `pull-requests: read` scope, causing the command to fail silently at runtime — Vale (and similar linters) appeared to lint nothing because the `gh` call returned an empty file list and exited non-zero without halting the job. Compounding the problem, an author could accidentally include a write `gh` command (e.g. `gh pr comment`) in a job whose permissions block is intentionally read-only, and the compiler would emit a workflow that fails opaquely at runtime rather than surfacing the misconfiguration at compile time. + +### Decision + +We will introduce a static, JSON-driven inference layer (`pkg/workflow/data/gh_cli_permissions.json` + `gh_cli_permissions.go`) that scans every `run:` script in the activation and agent jobs' step sections for `gh` subcommands and `gh api` REST path patterns, then merges the inferred minimum `permissions:` scopes into each job's compiled permission map. The scanner is regex-based, derives the `gh ` alternation dynamically from the JSON's subcommand-group keys, and recognises two failure modes: (1) any inferred *write* command in either job is a compile-time error directing the author to safe-outputs, because both jobs run with read-only permissions by contract; (2) inferred *read* scopes are merged additively into the existing permission map, never overriding scopes already set by higher-level features. GitHub-App-only scopes (e.g. `codespaces`, `environments`, organisation membership) discovered in activation job scripts are additionally injected into the App token mint step so the minted token covers what the script will call. + +### Alternatives Considered + +#### Alternative 1: Require Manual `permissions:` Declarations from Workflow Authors + +Make the workflow author responsible for declaring `permissions:` in the markdown frontmatter whenever a pre-step calls `gh`. This is the simplest implementation and avoids new parsing infrastructure. It was rejected because workflow authors are frequently non-engineers, and the failure mode (`gh pr diff` silently returning nothing) is sufficiently confusing that it would burn debugging time on every author who hit it. The whole point of compiler-derived permissions is that the lock file should match what the workflow actually does without authors having to keep two truth sources in sync. + +#### Alternative 2: Grant Broad Permissions to Any Job with Pre-Step Scripts + +Whenever a job has non-empty `pre-steps`, grant `pull-requests: read`, `issues: read`, `actions: read`, and `contents: read` unconditionally. This avoids parsing scripts entirely. It was rejected because it directly violates the principle of least privilege that the rest of the compiler enforces, and because it offers no path to detecting write commands at compile time — write `gh` calls would still fail opaquely at runtime, but now under broader scopes that obscure the misconfiguration. + +#### Alternative 3: Full Shell Parser (e.g. `mvdan/sh`) + +Use a real POSIX shell parser to extract `gh` invocations precisely, handling pipes, command substitution, conditionals, heredocs, and variable expansion correctly. This would eliminate the regex's known blind spots (e.g. `gh` invoked via `xargs`, indirection through shell variables). It was rejected because it adds a non-trivial dependency for a problem whose payoff is asymmetric — the regex catches the overwhelmingly common straight-line `gh ` form, and the failure mode of missing an inference is the same as today (the author adds a manual `permissions:` line). The complexity-to-coverage ratio did not justify the dependency. + +#### Alternative 4: Runtime Permission Probing + +Have the activation job attempt the operation and escalate permissions on failure. Rejected for the same reasons as in ADR-26535: it requires network round-trips, complicates error handling, and shifts misconfiguration discovery from compile time to runtime. Compile-time inference is the consistent pattern across this compiler. + +### Consequences + +#### Positive +- Authors can call `gh pr diff`, `gh issue view`, `gh workflow list`, `gh api /repos/.../pulls/...`, etc. in pre-steps and the compiler will mint a lock file with the minimum required scopes — no manual `permissions:` line needed. +- Write `gh` commands in either job are now caught at compile time with an actionable error pointing to safe-outputs, eliminating an entire class of silent-failure misconfigurations. +- The permission-mapping table is data-driven JSON, so adding a new `gh` subcommand group (e.g. a future `gh discussion`) requires no Go code change beyond an entry in `gh_cli_permissions.json`. +- GitHub-App-only scopes (codespaces, environments, organisation members, webhooks) are inferred from `gh api` calls and injected into the App token mint step, closing a gap where App-only scripts previously had to be hand-wired. + +#### Negative +- The regex-based scanner has known blind spots: `gh` invocations behind `xargs`, function indirection, dynamic command construction, or non-standard shell quoting will not be detected. Authors hitting these cases must still declare permissions manually, and there is no warning when this happens. +- The new permission inference runs on every compile and adds a small but non-zero amount of work even for workflows that don't use `gh` in pre-steps. Caching in `activationJobBuildContext` mitigates this within a single compile but does not amortise across compiles. +- The JSON file is now a new maintenance surface: when GitHub adds or renames a `gh` subcommand or REST path, `gh_cli_permissions.json` must be updated, and there is no automated check that it stays in sync with the actual `gh` CLI release. +- `pre-agent-steps` is scanned for the agent job but intentionally skipped for the activation job (the section has no meaning there). This asymmetry is documented in code comments but is a latent footgun for anyone adding new job types in the future. + +#### Neutral +- The activation job and the agent job now share the same write-command detector and read-permission inferrer, applied through separate call sites. Future changes to inference rules need to be made in one place (`gh_cli_permissions.go`) but verified at two call sites. +- Existing workflows that already declared `permissions:` for `gh` operations will continue to work; the inferred scopes merge additively and do not override explicit declarations. +- The `permissions: {}` opt-out is preserved for the agent job: an author who explicitly zeroed out permissions will not have inferred scopes injected. The activation job has no equivalent opt-out because its permission block is compiler-owned. + +--- + +## Part 2 — Normative Specification (RFC 2119) + +> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + +### Permission Inference Source of Truth + +1. Implementations **MUST** load the `gh` subcommand-to-permission mapping from `pkg/workflow/data/gh_cli_permissions.json` at package init time and **MUST NOT** hardcode subcommand-to-permission mappings elsewhere in the compiler. +2. The JSON schema **MUST** preserve the existing top-level fields: `version`, `description`, `subcommand_groups`, and `api_path_patterns`. +3. Each subcommand group entry **MUST** declare `read_subcommands`, `write_subcommands`, `read_permissions`, `write_permissions`, `app_read_permissions`, and `app_write_permissions`. Empty arrays are permitted. +4. Implementations **MUST** dynamically build the `gh ` regex alternation from the JSON's `subcommand_groups` keys, so that adding a new group requires no Go code change. + +### Activation Job Inference Scope + +1. Implementations **MUST** scan `run:` scripts in `jobs.activation.pre-steps`, `jobs.activation.steps`, and `jobs.activation.post-steps`. +2. Implementations **MUST NOT** scan `jobs.activation.pre-agent-steps`; `pre-agent-steps` is an agent-job-only concept and has no meaning in the activation job. +3. Inferred read permissions **MUST** be merged additively into the activation job's permission map and **MUST NOT** override scopes already set by workflow-level feature derivation. +4. GitHub-App-only scopes inferred from activation job scripts **MUST** be added to the App token mint step's `appPerms` when an App token is being minted. + +### Agent Job Inference Scope + +1. Implementations **MUST** scan `run:` scripts in the agent job's `pre-steps`, `steps`, `pre-agent-steps`, and `post-steps` sections, drawing from both the top-level fields (`data.PreSteps`, `data.CustomSteps`, `data.PreAgentSteps`, `data.PostSteps`) and the corresponding entries under `jobs.`. +2. Implementations **MUST NOT** inject inferred permissions when the workflow frontmatter declares `permissions: {}` (the explicit-empty opt-out), detected by exact-string match against the canonical YAML form. + +### Write Command Detection + +1. Implementations **MUST** treat any `gh` subcommand whose action is listed in a group's `write_subcommands` array as a write command. +2. When one or more write commands are detected in the activation job's scanned scripts, implementations **MUST** return a compile-time error naming each detected command and **MUST** include a reference to safe-outputs as the supported alternative. +3. When one or more write commands are detected in the agent job's scanned scripts, implementations **MUST** return a compile-time error naming each detected command and **MUST** include a reference to safe-outputs as the supported alternative. +4. Implementations **MUST NOT** silently grant write scopes inferred from script content, regardless of the existing permission map. + +### Read Permission Inference + +1. When a recognised read subcommand (e.g. `gh pr diff`, `gh issue view`) is found, implementations **MUST** infer the scopes listed in the group's `read_permissions`. +2. When a `gh api` invocation matches a configured `api_path_patterns` regex, implementations **MUST** infer the scopes listed in that pattern's `permissions` (and `app_permissions` for App tokens) field. +3. Implementations **MAY** fall back to the group-level `read_permissions` when an unrecognised action follows a known subcommand group (e.g. `gh pr something-new`); this fallback **SHOULD NOT** be relied on for newly added actions and the JSON **SHOULD** be updated to list new actions explicitly. + +### Caching + +1. Within a single activation-job build, implementations **MUST** cache the extracted run scripts and inferred permissions on the `activationJobBuildContext` so the scan is not repeated by callers that need the same data (e.g. both `buildActivationPermissions` and `addActivationFeedbackAndValidationSteps`). + +### Conformance + +An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/26000015148) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* From 68fd1326b432e6d008664e37b5f6923c67eefe4f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 19:50:10 +0000 Subject: [PATCH 14/15] fix: restrict gh CLI scanning to actually-injected step sections Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_activation_job_builder.go | 17 ++-- pkg/workflow/compiler_main_job.go | 14 ++- pkg/workflow/gh_cli_permissions.go | 98 ++++++++++++++++++- pkg/workflow/gh_cli_permissions_test.go | 84 ++++++++-------- 4 files changed, 150 insertions(+), 63 deletions(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index d794a35a30..f03bc8aa80 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -40,10 +40,9 @@ type activationJobBuildContext struct { activationNeeds []string activationCondition string - // activationAllScripts holds the `run` scripts extracted from all step sections - // in jobs.activation (pre-steps, steps, post-steps), cached to avoid repeated extraction. - // pre-agent-steps is intentionally omitted: it is an agent-job-only concept (steps that - // run immediately before AI execution) and has no meaning in the activation job. + // activationAllScripts holds the `run` scripts extracted from jobs.activation.pre-steps, + // cached to avoid repeated extraction. Only pre-steps are honored for built-in jobs; + // jobs.activation.steps and jobs.activation.post-steps are not injected by the compiler. activationAllScripts []string // activationInferredPerms holds the permissions inferred from activationAllScripts, // cached here to avoid repeated inference. @@ -84,13 +83,15 @@ func (c *Compiler) newActivationJobBuildContext( } ctx.shouldRemoveLabel = ctx.hasLabelCommand && data.LabelCommandRemoveLabel - // Cache scripts from all step sections and inferred permissions once to avoid redundant + // Cache scripts from pre-steps and inferred permissions once to avoid redundant // extraction and inference calls in buildActivationPermissions and // addActivationFeedbackAndValidationSteps. + // Only pre-steps are honored for built-in jobs: applyBuiltinJobPreSteps (compiler_jobs.go) + // inserts only jobs..pre-steps; jobs..steps and jobs..post-steps are + // ignored for built-in jobs, so scanning them would cause false-positive errors or + // unneeded permission grants. activationJobName := string(constants.ActivationJobName) - for _, section := range []string{"pre-steps", "steps", "post-steps"} { - ctx.activationAllScripts = append(ctx.activationAllScripts, extractRunScriptsFromJobSection(data.Jobs, activationJobName, section)...) - } + ctx.activationAllScripts = extractRunScriptsFromJobSection(data.Jobs, activationJobName, "pre-steps") if len(ctx.activationAllScripts) > 0 { ctx.activationInferredPerms = inferPermissionsFromShellScripts(ctx.activationAllScripts) } diff --git a/pkg/workflow/compiler_main_job.go b/pkg/workflow/compiler_main_job.go index bc3682a989..7f284c8dc7 100644 --- a/pkg/workflow/compiler_main_job.go +++ b/pkg/workflow/compiler_main_job.go @@ -336,20 +336,24 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) ( } } - // Infer permissions required by gh CLI calls in all agent job step sections - // (pre-steps, steps, post-steps, pre-agent-steps). + // Infer permissions required by gh CLI calls in all agent job step sections. // Detects write commands (which are not permitted since the agent job is read-only), // and merges inferred read permissions into the existing permissions block. // Skipped only when the user explicitly opted out of all permissions (permissions: {}). + // + // Top-level frontmatter sections (pre-steps, steps, pre-agent-steps, post-steps) are + // all applied to the agent job and must be fully scanned. + // For jobs.agent.* sections, only jobs.agent.pre-steps is actually injected by + // applyBuiltinJobPreSteps; jobs.agent.steps, jobs.agent.pre-agent-steps, and + // jobs.agent.post-steps are ignored for built-in jobs, so they are intentionally + // excluded to avoid false-positive errors or unneeded permission grants. agentJobName := string(constants.AgentJobName) agentAllScripts := extractRunScriptsFromSectionYAML(data.PreSteps, "pre-steps") agentAllScripts = append(agentAllScripts, extractRunScriptsFromSectionYAML(data.CustomSteps, "steps")...) agentAllScripts = append(agentAllScripts, extractRunScriptsFromSectionYAML(data.PreAgentSteps, "pre-agent-steps")...) agentAllScripts = append(agentAllScripts, extractRunScriptsFromSectionYAML(data.PostSteps, "post-steps")...) if data.Jobs != nil { - for _, section := range []string{"pre-steps", "steps", "pre-agent-steps", "post-steps"} { - agentAllScripts = append(agentAllScripts, extractRunScriptsFromJobSection(data.Jobs, agentJobName, section)...) - } + agentAllScripts = append(agentAllScripts, extractRunScriptsFromJobSection(data.Jobs, agentJobName, "pre-steps")...) } if len(agentAllScripts) > 0 { if writeCmds := detectWriteCommandsInShellScripts(agentAllScripts); len(writeCmds) > 0 { diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index 5b78999179..d875fe5b06 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -155,9 +155,98 @@ func init() { ghCLIPermissions = cp } -// ghAPIRE matches `gh api ` invocations. -// Capture group: (1) API path (up to the first whitespace, pipe, or quote). -var ghAPIRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+([^\s|;&"'\\]+)`) +// ghAPICmdRE matches `gh api` at a command boundary, capturing the rest of the line. +var ghAPICmdRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+(.+)`) + +// ghAPIValueFlags is the set of `gh api` flags that consume the next token as their value. +// These are skipped so that the API endpoint path can be located even when flags precede it. +var ghAPIValueFlags = map[string]bool{ + "-X": true, "--method": true, + "-H": true, "--header": true, + "-f": true, "--field": true, + "-F": true, "--raw-field": true, + "-q": true, "--jq": true, + "-t": true, "--template": true, + "--cache": true, "--paginate-limit": true, +} + +// extractGHAPIEndpoints extracts API endpoint paths from `gh api` invocations in a shell script. +// It handles common invocation patterns such as: +// +// gh api /repos/owner/repo/pulls +// gh api -H 'Accept: application/vnd.github+json' /repos/owner/repo/pulls +// gh api --method GET /repos/owner/repo/pulls +// gh api "/repos/owner/repo/pulls" +// +// Flags and their arguments are skipped until the first non-flag token is found. +func extractGHAPIEndpoints(script string) []string { + var endpoints []string + for _, m := range ghAPICmdRE.FindAllStringSubmatch(script, -1) { + // m[1] is everything after `gh api ` on the same line. + if ep := parseGHAPIEndpoint(m[1]); ep != "" { + endpoints = append(endpoints, ep) + } + } + return endpoints +} + +// parseGHAPIEndpoint returns the API endpoint path from the argument string that follows +// `gh api`, skipping flags and their values. Surrounding quotes are stripped. +func parseGHAPIEndpoint(args string) string { + tokens := splitShellTokens(strings.TrimSpace(args)) + i := 0 + for i < len(tokens) { + tok := tokens[i] + if strings.HasPrefix(tok, "-") { + // Flags with embedded value (e.g. --method=GET) consume only one token. + if strings.Contains(tok, "=") { + i++ + continue + } + i++ + // Flags that take a separate value argument: skip the next token too. + if ghAPIValueFlags[tok] && i < len(tokens) { + i++ + } + continue + } + // First non-flag token is the endpoint; strip surrounding quotes. + return strings.Trim(tok, `"'`) + } + return "" +} + +// splitShellTokens splits a shell argument string by whitespace while respecting +// single and double quoted regions. Quotes are preserved in the returned tokens so +// that the caller can strip them as needed. +func splitShellTokens(s string) []string { + var tokens []string + var cur strings.Builder + inSingle := false + inDouble := false + for i := 0; i < len(s); i++ { + c := s[i] + switch { + case c == '\'' && !inDouble: + inSingle = !inSingle + cur.WriteByte(c) + case c == '"' && !inSingle: + inDouble = !inDouble + cur.WriteByte(c) + case (c == ' ' || c == '\t') && !inSingle && !inDouble: + if cur.Len() > 0 { + tokens = append(tokens, cur.String()) + cur.Reset() + } + default: + cur.WriteByte(c) + } + } + if cur.Len() > 0 { + tokens = append(tokens, cur.String()) + } + return tokens +} // inferPermissionsFromShellScripts scans one or more shell script strings for // gh CLI invocations and returns the minimum set of GitHub Actions and GitHub App @@ -221,8 +310,7 @@ func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]Perm } // Match gh api patterns. - for _, m := range ghAPIRE.FindAllStringSubmatch(script, -1) { - path := m[1] + for _, path := range extractGHAPIEndpoints(script) { for _, ap := range ghCLIPermissions.apiPathPatterns { if ap.re.MatchString(path) { addScopes(ap.permissions) diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index d62345d31a..bf7a1c361d 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -59,6 +59,33 @@ func TestInferPermissionsFromShellScripts_GhAPI(t *testing.T) { assert.Equal(t, PermissionRead, perms[PermissionPullRequests], "gh api /repos/.../pulls should require pull-requests: read") } +// TestInferPermissionsFromShellScripts_GhAPIWithHeaderFlag verifies that flags before the +// endpoint are skipped, e.g. gh api -H 'Accept: ...' /repos/owner/repo/pulls. +func TestInferPermissionsFromShellScripts_GhAPIWithHeaderFlag(t *testing.T) { + scripts := []string{`gh api -H 'Accept: application/vnd.github+json' /repos/owner/repo/pulls`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests], + "gh api with -H flag before endpoint should still infer pull-requests: read") +} + +// TestInferPermissionsFromShellScripts_GhAPIWithMethodFlag verifies that --method GET +// before the endpoint is skipped properly. +func TestInferPermissionsFromShellScripts_GhAPIWithMethodFlag(t *testing.T) { + scripts := []string{`gh api --method GET /repos/owner/repo/issues`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionIssues], + "gh api with --method GET before endpoint should still infer issues: read") +} + +// TestInferPermissionsFromShellScripts_GhAPIQuotedEndpoint verifies that a quoted endpoint +// is correctly extracted, e.g. gh api "/repos/owner/repo/pulls". +func TestInferPermissionsFromShellScripts_GhAPIQuotedEndpoint(t *testing.T) { + scripts := []string{`gh api "/repos/owner/repo/pulls"`} + perms := inferPermissionsFromShellScripts(scripts) + assert.Equal(t, PermissionRead, perms[PermissionPullRequests], + `gh api with quoted endpoint should infer pull-requests: read`) +} + // TestInferPermissionsFromShellScripts_GhAPIIssues verifies issues: read for gh api issues endpoint. func TestInferPermissionsFromShellScripts_GhAPIIssues(t *testing.T) { scripts := []string{`gh api /repos/owner/repo/issues --jq '.[].number'`} @@ -946,47 +973,15 @@ Test agent post-steps write command triggers error. assert.Contains(t, err.Error(), "gh pr close") } -// TestActivationJobStepsInferReadPermission verifies that `gh workflow list` in -// jobs.activation.steps causes the compiler to add actions: read to the activation job. -func TestActivationJobStepsInferReadPermission(t *testing.T) { - tmpDir := testutil.TempDir(t, "activation-steps-read-perm") +// TestActivationJobStepsNotScanned verifies that jobs.activation.steps is NOT scanned for +// gh CLI calls because the activation job is a built-in job and applyBuiltinJobPreSteps +// only injects jobs..pre-steps — steps/post-steps are silently ignored. +func TestActivationJobStepsNotScanned(t *testing.T) { + tmpDir := testutil.TempDir(t, "activation-steps-not-scanned") testFile := filepath.Join(tmpDir, "workflow.md") - testContent := `--- -on: - pull_request: - types: [opened] -permissions: - contents: read - actions: read -engine: copilot -jobs: - activation: - steps: - - name: List workflows - run: | - gh workflow list --json name > /tmp/workflows.json ---- - -# Workflow that lists workflows in activation steps -` - require.NoError(t, os.WriteFile(testFile, []byte(testContent), 0644)) - - compiler := NewCompiler() - require.NoError(t, compiler.CompileWorkflow(testFile)) - - lockContent, err := os.ReadFile(stringutil.MarkdownToLockFile(testFile)) - require.NoError(t, err) - - activationSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) - assert.Contains(t, activationSection, "actions: read", - "activation job should have actions: read inferred from steps gh workflow list") -} - -// TestActivationJobWriteCommandInStepsErrors verifies that a write gh command in -// jobs.activation.steps triggers a compile error. -func TestActivationJobWriteCommandInStepsErrors(t *testing.T) { - tmpDir := testutil.TempDir(t, "activation-steps-write-error") - testFile := filepath.Join(tmpDir, "bad-workflow.md") + // Note: jobs.activation.steps contains a write command (gh label create). + // If it were scanned this would produce a compile error; the absence of an error + // confirms that the section is intentionally skipped. testContent := `--- on: pull_request: @@ -1002,13 +997,12 @@ jobs: gh label create "reviewed" --color "#0075ca" --- -# Workflow whose activation steps illegally call a write gh command +# Workflow whose activation steps are silently ignored (only pre-steps are applied) ` require.NoError(t, os.WriteFile(testFile, []byte(testContent), 0644)) compiler := NewCompiler() - err := compiler.CompileWorkflow(testFile) - require.Error(t, err, "compiler should reject write gh commands in activation steps") - assert.Contains(t, err.Error(), "gh label create", "error should mention the offending command") - assert.Contains(t, err.Error(), "write", "error should explain the write-permission restriction") + // Should compile without error — the write command in jobs.activation.steps is not + // executed so it must not trigger a validation error. + require.NoError(t, compiler.CompileWorkflow(testFile)) } From 9f92da94a7607b01e21e2d277daff0093f0d3672 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 May 2026 19:52:37 +0000 Subject: [PATCH 15/15] fix: handle escape sequences in splitShellTokens and improve test coverage Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/gh_cli_permissions.go | 11 +++++++++++ pkg/workflow/gh_cli_permissions_test.go | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/pkg/workflow/gh_cli_permissions.go b/pkg/workflow/gh_cli_permissions.go index d875fe5b06..0f9b8c5cd2 100644 --- a/pkg/workflow/gh_cli_permissions.go +++ b/pkg/workflow/gh_cli_permissions.go @@ -219,6 +219,11 @@ func parseGHAPIEndpoint(args string) string { // splitShellTokens splits a shell argument string by whitespace while respecting // single and double quoted regions. Quotes are preserved in the returned tokens so // that the caller can strip them as needed. +// +// Backslash escape sequences (e.g. \" inside a double-quoted string) are treated as +// opaque two-character sequences and passed through unchanged; the caller is +// responsible for any further unescaping. This is sufficient for the practical +// `gh api` invocation patterns handled here. func splitShellTokens(s string) []string { var tokens []string var cur strings.Builder @@ -227,6 +232,12 @@ func splitShellTokens(s string) []string { for i := 0; i < len(s); i++ { c := s[i] switch { + case c == '\\' && !inSingle && i+1 < len(s): + // Escape sequence outside single quotes: consume the backslash and the + // next character as a single unit so that \" does not close a quoted string. + cur.WriteByte(c) + i++ + cur.WriteByte(s[i]) case c == '\'' && !inDouble: inSingle = !inSingle cur.WriteByte(c) diff --git a/pkg/workflow/gh_cli_permissions_test.go b/pkg/workflow/gh_cli_permissions_test.go index bf7a1c361d..96e4a5aff5 100644 --- a/pkg/workflow/gh_cli_permissions_test.go +++ b/pkg/workflow/gh_cli_permissions_test.go @@ -1005,4 +1005,12 @@ jobs: // Should compile without error — the write command in jobs.activation.steps is not // executed so it must not trigger a validation error. require.NoError(t, compiler.CompileWorkflow(testFile)) + + // The compiled activation job must not contain the label-create step because + // jobs.activation.steps is not injected into built-in jobs. + lockContent, err := os.ReadFile(stringutil.MarkdownToLockFile(testFile)) + require.NoError(t, err) + activationSection := extractJobSection(string(lockContent), string(constants.ActivationJobName)) + assert.NotContains(t, activationSection, "gh label create", + "jobs.activation.steps should not be injected into the compiled activation job") }