Skip to content

Commit 1f6cf64

Browse files
Copilotgh-aw-bot
andauthored
Add awf-squid bootstrap retry for healthcheck race condition
Co-authored-by: gh-aw-bot <259018956+gh-aw-bot@users.noreply.github.com>
1 parent 431d4c6 commit 1f6cf64

11 files changed

Lines changed: 431 additions & 67 deletions

File tree

pkg/workflow/awf_config_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,78 @@ func TestBuildAWFCommand_UsesConfigFile(t *testing.T) {
10451045
assert.Contains(t, command, `"enabled":true`, "config JSON should have apiProxy enabled")
10461046
}
10471047

1048+
// TestBuildAWFCommand_SquidBootstrapRetry verifies that BuildAWFCommand wraps the AWF
1049+
// invocation in a bootstrap retry loop that fires on awf-squid healthcheck failures.
1050+
func TestBuildAWFCommand_SquidBootstrapRetry(t *testing.T) {
1051+
config := AWFCommandConfig{
1052+
EngineName: "copilot",
1053+
EngineCommand: "copilot --prompt-file /tmp/prompt.txt",
1054+
LogFile: "/tmp/gh-aw/agent-stdio.log",
1055+
AllowedDomains: "github.com",
1056+
WorkflowData: &WorkflowData{
1057+
EngineConfig: &EngineConfig{ID: "copilot"},
1058+
NetworkPermissions: &NetworkPermissions{
1059+
Firewall: &FirewallConfig{Enabled: true},
1060+
},
1061+
},
1062+
}
1063+
1064+
command := BuildAWFCommand(config)
1065+
1066+
// Retry loop scaffolding must be present.
1067+
assert.Contains(t, command, "awf_bootstrap_retry_max=2",
1068+
"expected retry max counter")
1069+
assert.Contains(t, command, "awf_bootstrap_retry_attempt=0",
1070+
"expected retry attempt counter initialised to 0")
1071+
assert.Contains(t, command, "while true; do",
1072+
"expected retry while-loop")
1073+
1074+
// Per-attempt temp log must be created with restricted permissions.
1075+
assert.Contains(t, command, "umask 177",
1076+
"expected restricted permissions on per-attempt log")
1077+
assert.Contains(t, command, `mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log"`,
1078+
"expected mktemp for per-attempt log")
1079+
1080+
// set +e / set -e must bracket the AWF pipeline so PIPESTATUS can be read
1081+
// even when the GitHub Actions runner uses bash -e (errexit).
1082+
setEOffIdx := strings.Index(command, "set +e")
1083+
awfIdx := strings.Index(command, "sudo -E awf")
1084+
setEOnIdx := strings.Index(command, "set -e")
1085+
assert.GreaterOrEqual(t, setEOffIdx, 0, "expected set +e before AWF pipeline")
1086+
assert.Less(t, setEOffIdx, awfIdx, "set +e must precede the AWF invocation")
1087+
assert.Less(t, awfIdx, setEOnIdx, "set -e must follow the AWF invocation")
1088+
1089+
// Exit code must be captured via PIPESTATUS[0] (not $?).
1090+
assert.Contains(t, command, "awf_exit=${PIPESTATUS[0]}",
1091+
"expected PIPESTATUS[0] to capture AWF exit code through the pipeline")
1092+
1093+
// Output must be split: per-attempt log (for grep) and main log (accumulated).
1094+
assert.Contains(t, command, `tee "$awf_attempt_log"`,
1095+
"expected per-attempt log written via tee for grep check")
1096+
assert.Contains(t, command, `tee -a /tmp/gh-aw/agent-stdio.log`,
1097+
"expected main log appended via tee -a")
1098+
1099+
// Retry condition must detect the specific squid unhealthy pattern.
1100+
assert.Contains(t, command, "dependency failed to start: container awf-squid is unhealthy",
1101+
"expected squid healthcheck failure pattern in retry condition")
1102+
1103+
// WARN message on retry and ERROR message on terminal failure.
1104+
assert.Contains(t, command, "[WARN] AWF bootstrap: awf-squid healthcheck failure on attempt",
1105+
"expected WARN message on retry")
1106+
assert.Contains(t, command, "[ERROR] AWF bootstrap: awf-squid healthcheck failure after all attempts",
1107+
"expected ERROR message on terminal failure")
1108+
1109+
// Squid container logs must be captured on terminal failure.
1110+
assert.Contains(t, command, "docker logs awf-squid",
1111+
"expected docker logs awf-squid captured on terminal failure")
1112+
1113+
// AWF must still be invoked with the standard flags.
1114+
assert.Contains(t, command, "sudo -E awf",
1115+
"expected sudo -E awf invocation inside retry loop")
1116+
assert.Contains(t, command, "--skip-pull",
1117+
"expected --skip-pull flag passed to AWF")
1118+
}
1119+
10481120
func TestBuildAWFCommand_ModelMultipliersLoadedFromFile(t *testing.T) {
10491121
config := AWFCommandConfig{
10501122
EngineName: "copilot",

pkg/workflow/awf_helpers.go

Lines changed: 98 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,20 @@ fi`,
300300
// pre-agent overhead such as workspace audit and CLI proxy startup.
301301
writeAgentCLIStartMs := "printf '%s' \"$(date +%s%3N)\" > " + shellEscapeArg(AgentCLIStartMsPath)
302302

303+
// Build the AWF invocation with a one-shot bootstrap retry for transient awf-squid
304+
// startup healthcheck failures. AWF v0.25.57+ already increases the squid healthcheck
305+
// window (start_period=5s, retries=10, interval=2s), but this shell-level retry adds
306+
// defence-in-depth for severely loaded runners that may still exhaust the probe window.
307+
awfRunWithRetry := buildAWFSquidBootstrapRetry(
308+
awfCommand,
309+
expandableArgs,
310+
toolCacheMountRef,
311+
arcDindPrefixArgsRef,
312+
awfArgs,
313+
shellWrappedCommand,
314+
config.LogFile,
315+
)
316+
303317
// Build the complete command with proper formatting.
304318
// configFileSetup (if non-empty) writes the AWF config JSON immediately before the
305319
// AWF invocation so the file is present when AWF parses --config.
@@ -312,22 +326,14 @@ fi`,
312326
%s
313327
%s
314328
%s
315-
# shellcheck disable=SC1003
316-
%s %s %s %s %s \
317-
-- %s 2>&1 | tee -a %s`,
329+
%s`,
318330
writeAgentCLIStartMs,
319331
config.PathSetup,
320332
preCreateLog,
321333
configFileSetup,
322334
arcDindPrefixProbe,
323335
toolCacheMountProbe,
324-
awfCommand,
325-
expandableArgs,
326-
toolCacheMountRef,
327-
arcDindPrefixArgsRef,
328-
shellJoinArgs(awfArgs),
329-
shellWrappedCommand,
330-
shellEscapeArg(config.LogFile))
336+
awfRunWithRetry)
331337
} else if config.PathSetup != "" {
332338
// Include path setup before AWF command (runs on host before AWF)
333339
command = fmt.Sprintf(`set -o pipefail
@@ -336,69 +342,121 @@ fi`,
336342
%s
337343
%s
338344
%s
339-
# shellcheck disable=SC1003
340-
%s %s %s %s %s \
341-
-- %s 2>&1 | tee -a %s`,
345+
%s`,
342346
writeAgentCLIStartMs,
343347
config.PathSetup,
344348
preCreateLog,
345349
arcDindPrefixProbe,
346350
toolCacheMountProbe,
347-
awfCommand,
348-
expandableArgs,
349-
toolCacheMountRef,
350-
arcDindPrefixArgsRef,
351-
shellJoinArgs(awfArgs),
352-
shellWrappedCommand,
353-
shellEscapeArg(config.LogFile))
351+
awfRunWithRetry)
354352
} else if configFileSetup != "" {
355353
command = fmt.Sprintf(`set -o pipefail
356354
%s
357355
%s
358356
%s
359357
%s
360358
%s
361-
# shellcheck disable=SC1003
362-
%s %s %s %s %s \
363-
-- %s 2>&1 | tee -a %s`,
359+
%s`,
364360
writeAgentCLIStartMs,
365361
preCreateLog,
366362
configFileSetup,
367363
arcDindPrefixProbe,
368364
toolCacheMountProbe,
369-
awfCommand,
370-
expandableArgs,
371-
toolCacheMountRef,
372-
arcDindPrefixArgsRef,
373-
shellJoinArgs(awfArgs),
374-
shellWrappedCommand,
375-
shellEscapeArg(config.LogFile))
365+
awfRunWithRetry)
376366
} else {
377367
command = fmt.Sprintf(`set -o pipefail
378368
%s
379369
%s
380370
%s
381371
%s
382-
# shellcheck disable=SC1003
383-
%s %s %s %s %s \
384-
-- %s 2>&1 | tee -a %s`,
372+
%s`,
385373
writeAgentCLIStartMs,
386374
preCreateLog,
387375
arcDindPrefixProbe,
388376
toolCacheMountProbe,
389-
awfCommand,
390-
expandableArgs,
391-
toolCacheMountRef,
392-
arcDindPrefixArgsRef,
393-
shellJoinArgs(awfArgs),
394-
shellWrappedCommand,
395-
shellEscapeArg(config.LogFile))
377+
awfRunWithRetry)
396378
}
397379

398380
awfHelpersLog.Print("Successfully built AWF command")
399381
return command
400382
}
401383

384+
// buildAWFSquidBootstrapRetry returns the shell fragment that invokes AWF with a
385+
// one-shot retry for transient awf-squid startup healthcheck failures.
386+
//
387+
// Motivation: Docker compose uses a depends_on healthcheck condition for the
388+
// awf-squid container. On loaded runners the squid proxy can take longer to
389+
// bind its listening port than the healthcheck window allows, which causes
390+
// "dependency failed to start: container awf-squid is unhealthy" and an
391+
// immediate exit-1 with no agent output. AWF v0.25.57+ increased the probe
392+
// window (start_period 2s→5s, retries 5→10, interval 1s→2s), but a shell-level
393+
// retry provides defence-in-depth for edge cases on severely loaded runners.
394+
//
395+
// Retry policy:
396+
// - Max 2 attempts (awf_bootstrap_retry_max=2): one initial attempt + one retry.
397+
// - 10s sleep between attempts to let Docker settle.
398+
// - Retry fires only on the specific squid healthcheck error pattern; all other
399+
// AWF failures propagate immediately with the original exit code.
400+
// - On terminal failure (retries exhausted) the squid container logs are captured
401+
// and appended to the agent stdio log to aid diagnosis.
402+
//
403+
// The attempt log (awf_attempt_log) captures output from the current attempt only;
404+
// the main log file (logFile) accumulates output across all attempts via tee -a.
405+
// set +e / set -e wraps the pipeline so ${PIPESTATUS[0]} can be read even when the
406+
// GitHub Actions default shell uses -e (errexit).
407+
func buildAWFSquidBootstrapRetry(
408+
awfCommand string,
409+
expandableArgs string,
410+
toolCacheMountRef string,
411+
arcDindPrefixArgsRef string,
412+
awfArgs []string,
413+
shellWrappedCommand string,
414+
logFile string,
415+
) string {
416+
escapedLog := shellEscapeArg(logFile)
417+
return fmt.Sprintf(
418+
`awf_bootstrap_retry_max=2
419+
awf_bootstrap_retry_attempt=0
420+
while true; do
421+
awf_bootstrap_retry_attempt=$((awf_bootstrap_retry_attempt + 1))
422+
awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log")
423+
set +e
424+
# shellcheck disable=SC1003
425+
%s %s %s %s %s \
426+
-- %s 2>&1 | tee "$awf_attempt_log" | tee -a %s
427+
awf_exit=${PIPESTATUS[0]}
428+
set -e
429+
if [[ $awf_exit -eq 0 ]]; then
430+
rm -f "$awf_attempt_log"
431+
break
432+
fi
433+
if grep -Fq 'dependency failed to start: container awf-squid is unhealthy' "$awf_attempt_log" \
434+
&& [[ $awf_bootstrap_retry_attempt -lt $awf_bootstrap_retry_max ]]; then
435+
echo "[WARN] AWF bootstrap: awf-squid healthcheck failure on attempt $awf_bootstrap_retry_attempt/$awf_bootstrap_retry_max — retrying in 10s" | tee -a %s
436+
rm -f "$awf_attempt_log"
437+
sleep 10
438+
continue
439+
fi
440+
if grep -Fq 'dependency failed to start: container awf-squid is unhealthy' "$awf_attempt_log"; then
441+
echo "[ERROR] AWF bootstrap: awf-squid healthcheck failure after all attempts — capturing squid container logs" | tee -a %s
442+
docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a %s || true
443+
fi
444+
rm -f "$awf_attempt_log"
445+
exit "$awf_exit"
446+
done`,
447+
awfCommand,
448+
expandableArgs,
449+
toolCacheMountRef,
450+
arcDindPrefixArgsRef,
451+
shellJoinArgs(awfArgs),
452+
shellWrappedCommand,
453+
escapedLog,
454+
escapedLog,
455+
escapedLog,
456+
escapedLog,
457+
)
458+
}
459+
402460
// BuildAWFArgs constructs common AWF arguments from configuration.
403461
// This extracts the shared AWF argument building logic from engine implementations.
404462
//

pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -561,9 +561,35 @@ jobs:
561561
elif [ -d "/home/runner/work/_tool" ]; then
562562
GH_AW_TOOL_CACHE_MOUNT="/home/runner/work/_tool:/home/runner/work/_tool:ro"
563563
fi
564-
# shellcheck disable=SC1003
565-
sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_TOOL_CACHE_MOUNT:+--mount "$GH_AW_TOOL_CACHE_MOUNT"} ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --tty --env-all --exclude-env ANTHROPIC_API_KEY --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \
566-
-- /bin/bash -c 'set +o histexpand; GH_AW_TOOL_CACHE="${RUNNER_TOOL_CACHE:-/opt/hostedtoolcache}"; export PATH="$(find "$GH_AW_TOOL_CACHE" /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; GH_AW_NPM_GLOBAL_ROOT="$(npm root -g 2>/dev/null || true)"; if [ -n "$GH_AW_NPM_GLOBAL_ROOT" ]; then export NODE_PATH="${GH_AW_NPM_GLOBAL_ROOT}${NODE_PATH:+:${NODE_PATH}}"; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/claude_harness.cjs claude --print --no-chrome --allowed-tools '\''Bash,BashOutput,Edit,Edit(/tmp/*),Edit(/tmp/gh-aw/agent/*),ExitPlanMode,Glob,Grep,KillBash,LS,MultiEdit,MultiEdit(/tmp/*),MultiEdit(/tmp/gh-aw/agent/*),NotebookEdit,NotebookRead,Read,Read(/tmp/*),Read(/tmp/gh-aw/agent/*),Task,TodoWrite,Write,Write(/tmp/*),Write(/tmp/gh-aw/agent/*),mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_job_logs,mcp__github__get_label,mcp__github__get_latest_release,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_review_comments,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_release_by_tag,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__issue_read,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issue_types,mcp__github__list_issues,mcp__github__list_label,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_releases,mcp__github__list_secret_scanning_alerts,mcp__github__list_starred_repositories,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__pull_request_read,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users'\'' --debug-file /tmp/gh-aw/agent-stdio.log --verbose --permission-mode acceptEdits --output-format stream-json --mcp-config "${RUNNER_TEMP}/gh-aw/mcp-config/mcp-servers.json" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt${GH_AW_MODEL_DETECTION_CLAUDE:+ --model "$GH_AW_MODEL_DETECTION_CLAUDE"}' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log
564+
awf_bootstrap_retry_max=2
565+
awf_bootstrap_retry_attempt=0
566+
while true; do
567+
awf_bootstrap_retry_attempt=$((awf_bootstrap_retry_attempt + 1))
568+
awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log")
569+
set +e
570+
# shellcheck disable=SC1003
571+
sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_TOOL_CACHE_MOUNT:+--mount "$GH_AW_TOOL_CACHE_MOUNT"} ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --tty --env-all --exclude-env ANTHROPIC_API_KEY --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \
572+
-- /bin/bash -c 'set +o histexpand; GH_AW_TOOL_CACHE="${RUNNER_TOOL_CACHE:-/opt/hostedtoolcache}"; export PATH="$(find "$GH_AW_TOOL_CACHE" /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; GH_AW_NPM_GLOBAL_ROOT="$(npm root -g 2>/dev/null || true)"; if [ -n "$GH_AW_NPM_GLOBAL_ROOT" ]; then export NODE_PATH="${GH_AW_NPM_GLOBAL_ROOT}${NODE_PATH:+:${NODE_PATH}}"; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/claude_harness.cjs claude --print --no-chrome --allowed-tools '\''Bash,BashOutput,Edit,Edit(/tmp/*),Edit(/tmp/gh-aw/agent/*),ExitPlanMode,Glob,Grep,KillBash,LS,MultiEdit,MultiEdit(/tmp/*),MultiEdit(/tmp/gh-aw/agent/*),NotebookEdit,NotebookRead,Read,Read(/tmp/*),Read(/tmp/gh-aw/agent/*),Task,TodoWrite,Write,Write(/tmp/*),Write(/tmp/gh-aw/agent/*),mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_job_logs,mcp__github__get_label,mcp__github__get_latest_release,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_review_comments,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_release_by_tag,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__issue_read,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issue_types,mcp__github__list_issues,mcp__github__list_label,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_releases,mcp__github__list_secret_scanning_alerts,mcp__github__list_starred_repositories,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__pull_request_read,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users'\'' --debug-file /tmp/gh-aw/agent-stdio.log --verbose --permission-mode acceptEdits --output-format stream-json --mcp-config "${RUNNER_TEMP}/gh-aw/mcp-config/mcp-servers.json" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt${GH_AW_MODEL_DETECTION_CLAUDE:+ --model "$GH_AW_MODEL_DETECTION_CLAUDE"}' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log
573+
awf_exit=${PIPESTATUS[0]}
574+
set -e
575+
if [[ $awf_exit -eq 0 ]]; then
576+
rm -f "$awf_attempt_log"
577+
break
578+
fi
579+
if grep -Fq 'dependency failed to start: container awf-squid is unhealthy' "$awf_attempt_log" \
580+
&& [[ $awf_bootstrap_retry_attempt -lt $awf_bootstrap_retry_max ]]; then
581+
echo "[WARN] AWF bootstrap: awf-squid healthcheck failure on attempt $awf_bootstrap_retry_attempt/$awf_bootstrap_retry_max — retrying in 10s" | tee -a /tmp/gh-aw/agent-stdio.log
582+
rm -f "$awf_attempt_log"
583+
sleep 10
584+
continue
585+
fi
586+
if grep -Fq 'dependency failed to start: container awf-squid is unhealthy' "$awf_attempt_log"; then
587+
echo "[ERROR] AWF bootstrap: awf-squid healthcheck failure after all attempts — capturing squid container logs" | tee -a /tmp/gh-aw/agent-stdio.log
588+
docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true
589+
fi
590+
rm -f "$awf_attempt_log"
591+
exit "$awf_exit"
592+
done
567593
env:
568594
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
569595
BASH_DEFAULT_TIMEOUT_MS: 60000

0 commit comments

Comments
 (0)