Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.3.3] - 2026-06-30

### Added

- **Linked repositories (`/link`).** A new `/link` slash command opens an
Expand All @@ -26,7 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
generated `AGENTS.md` is written to the repo-level `.orb/` directory and now
captures project structure, architecture, business-logic mapping, and code
patterns/conventions — the context an agent needs to start coding without
re-exploring.
re-exploring. The cap is now ~150 lines (up from ~60) so it can cover all
four sections without being truncated.
- **Repo-level agent data lives in `.orb/`.** The folder OrbCode creates in a
project for `AGENTS.md` (and now `links.json`) is `.orb/` — a single,
tool-neutral name shared by the IDE and the CLI. Machine settings are
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@matterailab/orbcode",
"version": "0.3.3",
"version": "0.3.4",
"description": "OrbCode CLI — agentic coding in your terminal, powered by Axon models by MatterAI",
"type": "module",
"bin": {
Expand Down
145 changes: 137 additions & 8 deletions src/core/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,45 @@ import { renderLinkedReposSection } from "../config/links.js"

const MAX_STEPS_PER_TURN = 50
const RESULT_PREVIEW_LINES = 6
/** How many times to automatically re-establish a model request that fails
* before producing any output (transient/connection errors). */
const MAX_STREAM_RETRIES = 3

/** Transient failures worth auto-retrying: any transport/connection error (no
* usable HTTP status — socket reset, DNS, timeout, TLS drop) plus 5xx/408/429
* server responses. Real 4xx client errors (auth, bad request) are not retried. */
function isRetryableStreamError(error: unknown): boolean {
const err = error as { status?: number; code?: number | string }
const status = Number(err?.status ?? err?.code)
if (Number.isFinite(status) && status !== 0) {
return status >= 500 || status === 408 || status === 429
}
return true
}

function retryBackoffMs(attempt: number): number {
return Math.min(500 * 2 ** attempt, 8000)
}

/** Sleep that settles early (rejecting with AbortError) if the signal fires, so
* a user interrupt isn't stuck waiting out a retry backoff. */
function interruptibleDelay(ms: number, signal: AbortSignal): Promise<void> {
return new Promise((resolve, reject) => {
if (signal.aborted) {
reject(new DOMException("aborted", "AbortError"))
return
}
const onAbort = () => {
clearTimeout(timer)
reject(new DOMException("aborted", "AbortError"))
}
const timer = setTimeout(() => {
signal.removeEventListener("abort", onAbort)
resolve()
}, ms)
signal.addEventListener("abort", onAbort, { once: true })
})
}

export interface AgentOptions {
cwd: string
Expand Down Expand Up @@ -479,7 +518,17 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
},
]
let summary = ""
for await (const chunk of this.client.createMessage(this.systemPrompt, request, [], signal)) {
for await (const chunk of this.streamWithRetry(
() => this.client.createMessage(this.systemPrompt, request, [], signal),
signal,
() => {
// Compaction only streams text (committed once at the end), so a
// mid-stream retry just discards the partial summary.
summary = ""
onEvent({ type: "stream-reset" })
return true
},
)) {
if (signal.aborted) throw new DOMException("aborted", "AbortError")
if (chunk.type === "text") {
summary += chunk.text
Expand Down Expand Up @@ -520,30 +569,109 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
}
}

/**
* Consume a model stream, automatically re-establishing the request up to
* MAX_STREAM_RETRIES times on a transient/connection failure. A user abort is
* never retried.
*
* Before the first chunk of an attempt nothing has streamed, so the retry is
* always clean. Once chunks have streamed, retrying would duplicate on-screen
* output — so we only retry mid-stream when the caller supplies `onRestart` and
* it returns true, meaning it rolled the partial output back (cleared buffers,
* reset accumulators). If it can't (e.g. a row was already committed), the error
* propagates.
*/
private async *streamWithRetry(
makeStream: () => ReturnType<LLMClient["createMessage"]>,
signal: AbortSignal,
onRestart?: () => boolean,
): ReturnType<LLMClient["createMessage"]> {
for (let attempt = 0; ; attempt++) {
let produced = false
try {
for await (const chunk of makeStream()) {
produced = true
yield chunk
}
return
} catch (error) {
if (signal.aborted || (error as Error).name === "AbortError") throw error
if (attempt >= MAX_STREAM_RETRIES || !isRetryableStreamError(error)) throw error
// Output already streamed this attempt: only retry if the caller can
// cleanly roll it back, otherwise a restart would duplicate it.
if (produced && !(onRestart?.() ?? false)) throw error
const delayMs = retryBackoffMs(attempt)
this.options.callbacks.onEvent({
type: "system",
message: `Connection to the model failed (${(error as Error).message}). Retrying ${attempt + 1}/${MAX_STREAM_RETRIES} in ${Math.ceil(delayMs / 1000)}s…`,
isError: false,
})
await interruptibleDelay(delayMs, signal)
}
}
}

/** Run one model request + tool execution round. Returns true when the turn is over. */
private async runStep(): Promise<boolean> {
const { onEvent } = this.options.callbacks
const signal = this.abortController!.signal

let assistantText = ""
let hadReasoning = false
// A reasoning segment is "open" from its first delta until visible content
// (text or a tool call) begins. We emit reasoning-done at that transition so
// "Thought for Ns" reflects only the thinking time — not the answer that
// follows — and the live "Thinking" block stops before the answer streams.
// A fresh segment can re-open if the model interleaves reasoning with content.
let reasoningOpen = false
let reasoningStart = 0
let reasoningDetails: unknown
// Once a reasoning-done row is committed to the transcript we can't roll it
// back, so a mid-stream retry after that point isn't clean.
let reasoningRowCommitted = false
const finalizeReasoning = () => {
if (reasoningOpen) {
reasoningOpen = false
reasoningRowCommitted = true
onEvent({ type: "reasoning-done", durationMs: Date.now() - reasoningStart })
}
}
const toolCallsByIndex = new Map<number, PendingToolCall>()
let nextSyntheticIndex = 10000

const stream = this.client.createMessage(this.systemPrompt, this.outgoingMessages(), getActiveTools(this.mcp), signal)
// Roll back this step's partial output so streamWithRetry can restart a
// dropped stream mid-flight. Tools only run after the stream completes, so
// nothing irreversible has happened yet; the one thing we can't undo is an
// already-committed reasoning row, so we decline the restart in that case.
const rollbackForRetry = (): boolean => {
if (reasoningRowCommitted) return false
assistantText = ""
reasoningOpen = false
reasoningStart = 0
reasoningDetails = undefined
toolCallsByIndex.clear()
nextSyntheticIndex = 10000
onEvent({ type: "stream-reset" })
return true
}

const stream = this.streamWithRetry(
() => this.client.createMessage(this.systemPrompt, this.outgoingMessages(), getActiveTools(this.mcp), signal),
signal,
rollbackForRetry,
)

for await (const chunk of stream) {
if (signal.aborted) throw new DOMException("aborted", "AbortError")
switch (chunk.type) {
case "text":
// Visible content begins — the reasoning phase (if any) is over.
finalizeReasoning()
assistantText += chunk.text
onEvent({ type: "text-delta", text: chunk.text })
break
case "reasoning":
if (!hadReasoning) {
hadReasoning = true
if (!reasoningOpen) {
reasoningOpen = true
reasoningStart = Date.now()
}
onEvent({ type: "reasoning-delta", text: chunk.text })
Expand All @@ -553,6 +681,8 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
reasoningDetails = chunk.details
break
case "native_tool_calls":
// A tool call also ends the reasoning phase.
finalizeReasoning()
for (const tc of chunk.toolCalls) {
const index = tc.index ?? nextSyntheticIndex++
let pending = toolCallsByIndex.get(index)
Expand All @@ -579,9 +709,8 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
}
}

if (hadReasoning) {
onEvent({ type: "reasoning-done", durationMs: Date.now() - reasoningStart })
}
// A reasoning-only turn (no following text/tool content) still needs closing.
finalizeReasoning()
if (assistantText) {
onEvent({ type: "text-done" })
}
Expand Down
2 changes: 2 additions & 0 deletions src/core/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ export type AgentEvent =
| { type: "reasoning-done"; durationMs: number }
| { type: "text-delta"; text: string }
| { type: "text-done" }
/** discard the in-flight streaming buffers before an auto-retry re-streams */
| { type: "stream-reset" }
| { type: "tool-start"; id: string; name: string; summary: string }
| {
type: "tool-end"
Expand Down
Loading
Loading