Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 5 additions & 37 deletions .sandcastle/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import { SandboxRunner, SANDBOX_LABEL, PROJECT_LABEL_KEY, deriveProject } from "
import { ReviewerAdapter } from "./reviewer-adapter.ts";
import { readMemoryContext, writeMemoryEntry } from "./memory-store.ts";
import { parseMemoryTags, buildOutcomeEntry, type OutcomeAttempt } from "./memory.ts";
import { withRetry } from "./retry.ts";
export { withRetry };

export { SANDBOX_LABEL };

Expand Down Expand Up @@ -190,43 +192,9 @@ export function parseConcurrency(): number {
return Math.max(1, Number(process.env.AGENTIC_CONCURRENCY ?? "1") || 1);
}

/**
* Retry `fn` up to `maxAttempts` times with exponential backoff.
* Throws the last error when all attempts are exhausted.
* `sleep` is injectable for unit tests (no real network or timers needed).
*/
export async function withRetry<T>(
fn: () => Promise<T>,
opts: {
maxAttempts?: number;
baseDelayMs?: number;
label?: string;
sleep?: (ms: number) => Promise<void>;
} = {},
): Promise<T> {
const {
maxAttempts = 4,
baseDelayMs = 2_000,
label = "operation",
sleep = (ms) => new Promise<void>((r) => setTimeout(r, ms)),
} = opts;
let lastErr: unknown;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return await fn();
} catch (err) {
lastErr = err;
if (attempt < maxAttempts) {
const delay = baseDelayMs * 2 ** (attempt - 1);
console.warn(
`[retry] ${label} failed (attempt ${attempt}/${maxAttempts}), retrying in ${delay}ms`,
);
await sleep(delay);
}
}
}
throw lastErr;
}
// withRetry lives in retry.ts (shared with sandbox-runner.ts, which main.ts
// itself imports — keeping it there avoids a circular import). Imported and
// re-exported at the top of this file for existing consumers.

type ShellExec = (file: string, args: string[]) => Promise<{ stdout: string | Buffer }>;

Expand Down
2 changes: 1 addition & 1 deletion .sandcastle/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"postinstall": "patch-package",
"start": "tsx main.ts",
"typecheck": "tsc --noEmit",
"test": "node --import tsx --test reduce.test.ts sandbox-runner.test.ts reviewer-adapter.test.ts memory.test.ts memory-store.test.ts run-sh.test.ts init-sh.test.ts up-sh.test.ts afk-cmd.test.ts context-compressor.test.ts",
"test": "node --import tsx --test reduce.test.ts sandbox-runner.test.ts reviewer-adapter.test.ts memory.test.ts memory-store.test.ts run-sh.test.ts init-sh.test.ts up-sh.test.ts afk-cmd.test.ts context-compressor.test.ts retry.test.ts",
"test:integration": "SANDCASTLE_INTEGRATION=1 node --import tsx --test integration.test.ts"
},
"devDependencies": {
Expand Down
74 changes: 1 addition & 73 deletions .sandcastle/reduce.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { test } from "node:test";
import assert from "node:assert/strict";
import { reduce, READY_LABEL, IN_PROGRESS_LABEL, IN_REVIEW_LABEL, reviewVerdict, reconcileFromLabels, type State, type CiStatus, type Pr, type ReviewOutput } from "./reduce.ts";
import { parseBlockedBy, formatReviewAsComment } from "./issue-source.ts";
import { sweepOrphanedSandboxes, ensureSandboxNetwork, parseConcurrency, withRetry, resetAgentBranch, refreshBase, validateSignature, classifyDelivery, parseSmeeEvent, parseOrchEnv, resolveCredentials, resolveRunMode, resolveDockerHost } from "./main.ts";
import { sweepOrphanedSandboxes, ensureSandboxNetwork, parseConcurrency, resetAgentBranch, refreshBase, validateSignature, classifyDelivery, parseSmeeEvent, parseOrchEnv, resolveCredentials, resolveRunMode, resolveDockerHost } from "./main.ts";
import { createHmac } from "node:crypto";
import { SANDBOX_LABEL, PROJECT_LABEL_KEY, deriveProject } from "./sandbox-runner.ts";

Expand Down Expand Up @@ -638,78 +638,6 @@ test("parseConcurrency: invalid value falls back to 1", () => {
}
});

// ─── withRetry ───────────────────────────────────────────────────────────────

test("withRetry: resolves immediately when fn succeeds on first attempt", async () => {
const sleeps: number[] = [];
const result = await withRetry(() => Promise.resolve(42), {
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
});
assert.equal(result, 42);
assert.deepEqual(sleeps, [], "no sleep when fn succeeds first time");
});

test("withRetry: retries on failure and resolves when fn eventually succeeds", async () => {
let calls = 0;
const sleeps: number[] = [];
const result = await withRetry(
() => {
calls++;
if (calls < 3) return Promise.reject(new Error("transient"));
return Promise.resolve("ok");
},
{
maxAttempts: 4,
baseDelayMs: 100,
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
},
);
assert.equal(result, "ok");
assert.equal(calls, 3, "fn called exactly 3 times");
assert.equal(sleeps.length, 2, "slept between each failed attempt");
});

test("withRetry: throws last error after maxAttempts exhausted", async () => {
let calls = 0;
const err = new Error("persistent failure");
await assert.rejects(
() =>
withRetry(() => { calls++; return Promise.reject(err); }, {
maxAttempts: 3,
baseDelayMs: 10,
sleep: () => Promise.resolve(),
}),
(thrown: Error) => thrown === err,
);
assert.equal(calls, 3, "fn tried exactly maxAttempts times");
});

test("withRetry: sleeps with exponential backoff between attempts", async () => {
const sleeps: number[] = [];
await assert.rejects(
() =>
withRetry(() => Promise.reject(new Error("fail")), {
maxAttempts: 4,
baseDelayMs: 50,
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
}),
);
assert.deepEqual(sleeps, [50, 100, 200], "delays double each retry (2^0, 2^1, 2^2 * baseDelayMs)");
});

test("withRetry: does not sleep after the final failed attempt", async () => {
const sleeps: number[] = [];
await assert.rejects(
() =>
withRetry(() => Promise.reject(new Error("fail")), {
maxAttempts: 2,
baseDelayMs: 100,
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
}),
);
assert.equal(sleeps.length, 1, "only sleeps between attempts, not after the last failure");
});

// ─── Conflicting PR (issue #23) ──────────────────────────────────────────────

test("conflicting PR does not keep loop alive — Stop emitted when nothing else pending", () => {
Expand Down
108 changes: 108 additions & 0 deletions .sandcastle/retry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/**
* Unit tests for withRetry — the generic backoff-retry wrapper used both by
* main.ts (transient gh/GitHub API failures) and sandbox-runner.ts (transient
* agent-turn failures, gated by isTransientAgentError).
*
* Run: npm test (picks up all *.test.ts files in the test script)
*/
import { test } from "node:test";
import assert from "node:assert/strict";
import { withRetry } from "./retry.ts";

test("withRetry: resolves immediately when fn succeeds on first attempt", async () => {
const sleeps: number[] = [];
const result = await withRetry(() => Promise.resolve(42), {
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
});
assert.equal(result, 42);
assert.deepEqual(sleeps, [], "no sleep when fn succeeds first time");
});

test("withRetry: retries on failure and resolves when fn eventually succeeds", async () => {
let calls = 0;
const sleeps: number[] = [];
const result = await withRetry(
() => {
calls++;
if (calls < 3) return Promise.reject(new Error("transient"));
return Promise.resolve("ok");
},
{
maxAttempts: 4,
baseDelayMs: 100,
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
},
);
assert.equal(result, "ok");
assert.equal(calls, 3, "fn called exactly 3 times");
assert.equal(sleeps.length, 2, "slept between each failed attempt");
});

test("withRetry: throws last error after maxAttempts exhausted", async () => {
let calls = 0;
const err = new Error("persistent failure");
await assert.rejects(
() =>
withRetry(() => { calls++; return Promise.reject(err); }, {
maxAttempts: 3,
baseDelayMs: 10,
sleep: () => Promise.resolve(),
}),
(thrown: Error) => thrown === err,
);
assert.equal(calls, 3, "fn tried exactly maxAttempts times");
});

test("withRetry: sleeps with exponential backoff between attempts", async () => {
const sleeps: number[] = [];
await assert.rejects(
() =>
withRetry(() => Promise.reject(new Error("fail")), {
maxAttempts: 4,
baseDelayMs: 50,
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
}),
);
assert.deepEqual(sleeps, [50, 100, 200], "delays double each retry (2^0, 2^1, 2^2 * baseDelayMs)");
});

test("withRetry: does not sleep after the final failed attempt", async () => {
const sleeps: number[] = [];
await assert.rejects(
() =>
withRetry(() => Promise.reject(new Error("fail")), {
maxAttempts: 2,
baseDelayMs: 100,
sleep: (ms) => { sleeps.push(ms); return Promise.resolve(); },
}),
);
assert.equal(sleeps.length, 1, "only sleeps between attempts, not after the last failure");
});

// ─── shouldRetry predicate ───────────────────────────────────────────────────

test("withRetry: shouldRetry=false throws immediately without further attempts", async () => {
let calls = 0;
const err = new Error("not worth retrying");
await assert.rejects(
() =>
withRetry(() => { calls++; return Promise.reject(err); }, {
maxAttempts: 4,
baseDelayMs: 10,
sleep: () => Promise.resolve(),
shouldRetry: () => false,
}),
(thrown: Error) => thrown === err,
);
assert.equal(calls, 1, "fn called exactly once — no retries when shouldRetry is false");
});

test("withRetry: shouldRetry defaults to true (retries on any error) when omitted", async () => {
let calls = 0;
const result = await withRetry(
() => { calls++; return calls < 2 ? Promise.reject(new Error("x")) : Promise.resolve("ok"); },
{ maxAttempts: 2, baseDelayMs: 10, sleep: () => Promise.resolve() },
);
assert.equal(result, "ok");
assert.equal(calls, 2);
});
45 changes: 45 additions & 0 deletions .sandcastle/retry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/**
* Retry `fn` up to `maxAttempts` times with exponential backoff.
* Throws the last error when all attempts are exhausted, or immediately
* (no further attempts) when `shouldRetry` returns false for a given error —
* e.g. a session-limit or auth failure that retrying won't fix, vs. a
* transient network blip that will likely clear on its own.
* `sleep` is injectable for unit tests (no real network or timers needed).
*/
export async function withRetry<T>(
fn: () => Promise<T>,
opts: {
maxAttempts?: number;
baseDelayMs?: number;
label?: string;
sleep?: (ms: number) => Promise<void>;
shouldRetry?: (err: unknown) => boolean;
} = {},
): Promise<T> {
const {
maxAttempts = 4,
baseDelayMs = 2_000,
label = "operation",
sleep = (ms) => new Promise<void>((r) => setTimeout(r, ms)),
shouldRetry = () => true,
} = opts;
let lastErr: unknown;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return await fn();
} catch (err) {
lastErr = err;
if (!shouldRetry(err)) {
throw err;
}
if (attempt < maxAttempts) {
const delay = baseDelayMs * 2 ** (attempt - 1);
console.warn(
`[retry] ${label} failed (attempt ${attempt}/${maxAttempts}), retrying in ${delay}ms`,
);
await sleep(delay);
}
}
}
throw lastErr;
}
30 changes: 29 additions & 1 deletion .sandcastle/sandbox-runner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*/
import { test } from "node:test";
import assert from "node:assert/strict";
import { buildAgentInput } from "./sandbox-runner.ts";
import { buildAgentInput, isTransientAgentError } from "./sandbox-runner.ts";

const STUB_ISSUE = { number: 42, title: "Fix the bug", body: "Detailed description" };

Expand Down Expand Up @@ -161,3 +161,31 @@ test("local tier is never proxy-routed regardless of HEADROOM_MODE", () => {
else process.env.HEADROOM_MODE = orig;
}
});

// ─── isTransientAgentError ───────────────────────────────────────────────────

test("isTransientAgentError: true for a network/server disconnect", () => {
assert.equal(isTransientAgentError(new Error("API Error: Server disconnected")), true);
});

test("isTransientAgentError: true for common connection-reset style messages", () => {
assert.equal(isTransientAgentError(new Error("read ECONNRESET")), true);
assert.equal(isTransientAgentError(new Error("connect ETIMEDOUT 1.2.3.4:443")), true);
assert.equal(isTransientAgentError(new Error("socket hang up")), true);
assert.equal(isTransientAgentError(new Error("fetch failed")), true);
});

test("isTransientAgentError: false for a session-limit failure — retrying now can't help", () => {
assert.equal(
isTransientAgentError(new Error("You've hit your session limit · resets 12:20pm (UTC)")),
false,
);
});

test("isTransientAgentError: false for an unrelated/logic error", () => {
assert.equal(isTransientAgentError(new Error("claude-code exited with code 1: some other reason")), false);
});

test("isTransientAgentError: false for a non-Error thrown value", () => {
assert.equal(isTransientAgentError("just a string"), false);
});
Loading
Loading