MatterAIOrg · code-crusher · Jul 2, 2026 · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.3] - 2026-06-30
+
 ### Added
 
 - **Linked repositories (`/link`).** A new `/link` slash command opens an
@@ -26,7 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   generated `AGENTS.md` is written to the repo-level `.orb/` directory and now
   captures project structure, architecture, business-logic mapping, and code
   patterns/conventions — the context an agent needs to start coding without
-  re-exploring.
+  re-exploring. The cap is now ~150 lines (up from ~60) so it can cover all
+  four sections without being truncated.
 - **Repo-level agent data lives in `.orb/`.** The folder OrbCode creates in a
   project for `AGENTS.md` (and now `links.json`) is `.orb/` — a single,
   tool-neutral name shared by the IDE and the CLI. Machine settings are

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@matterailab/orbcode",
-  "version": "0.3.3",
+  "version": "0.3.4",
   "description": "OrbCode CLI — agentic coding in your terminal, powered by Axon models by MatterAI",
   "type": "module",
   "bin": {

diff --git a/src/core/agent.ts b/src/core/agent.ts
@@ -25,6 +25,45 @@ import { renderLinkedReposSection } from "../config/links.js"
 
 const MAX_STEPS_PER_TURN = 50
 const RESULT_PREVIEW_LINES = 6
+/** How many times to automatically re-establish a model request that fails
+ *  before producing any output (transient/connection errors). */
+const MAX_STREAM_RETRIES = 3
+
+/** Transient failures worth auto-retrying: any transport/connection error (no
+ *  usable HTTP status — socket reset, DNS, timeout, TLS drop) plus 5xx/408/429
+ *  server responses. Real 4xx client errors (auth, bad request) are not retried. */
+function isRetryableStreamError(error: unknown): boolean {
+	const err = error as { status?: number; code?: number | string }
+	const status = Number(err?.status ?? err?.code)
+	if (Number.isFinite(status) && status !== 0) {
+		return status >= 500 || status === 408 || status === 429
+	}
+	return true
+}
+
+function retryBackoffMs(attempt: number): number {
+	return Math.min(500 * 2 ** attempt, 8000)
+}
+
+/** Sleep that settles early (rejecting with AbortError) if the signal fires, so
+ *  a user interrupt isn't stuck waiting out a retry backoff. */
+function interruptibleDelay(ms: number, signal: AbortSignal): Promise<void> {
+	return new Promise((resolve, reject) => {
+		if (signal.aborted) {
+			reject(new DOMException("aborted", "AbortError"))
+			return
+		}
+		const onAbort = () => {
+			clearTimeout(timer)
+			reject(new DOMException("aborted", "AbortError"))
+		}
+		const timer = setTimeout(() => {
+			signal.removeEventListener("abort", onAbort)
+			resolve()
+		}, ms)
+		signal.addEventListener("abort", onAbort, { once: true })
+	})
+}
 
 export interface AgentOptions {
 	cwd: string
@@ -479,7 +518,17 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 				},
 			]
 			let summary = ""
-			for await (const chunk of this.client.createMessage(this.systemPrompt, request, [], signal)) {
+			for await (const chunk of this.streamWithRetry(
+				() => this.client.createMessage(this.systemPrompt, request, [], signal),
+				signal,
+				() => {
+					// Compaction only streams text (committed once at the end), so a
+					// mid-stream retry just discards the partial summary.
+					summary = ""
+					onEvent({ type: "stream-reset" })
+					return true
+				},
+			)) {
 				if (signal.aborted) throw new DOMException("aborted", "AbortError")
 				if (chunk.type === "text") {
 					summary += chunk.text
@@ -520,30 +569,109 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 		}
 	}
 
+	/**
+	 * Consume a model stream, automatically re-establishing the request up to
+	 * MAX_STREAM_RETRIES times on a transient/connection failure. A user abort is
+	 * never retried.
+	 *
+	 * Before the first chunk of an attempt nothing has streamed, so the retry is
+	 * always clean. Once chunks have streamed, retrying would duplicate on-screen
+	 * output — so we only retry mid-stream when the caller supplies `onRestart` and
+	 * it returns true, meaning it rolled the partial output back (cleared buffers,
+	 * reset accumulators). If it can't (e.g. a row was already committed), the error
+	 * propagates.
+	 */
+	private async *streamWithRetry(
+		makeStream: () => ReturnType<LLMClient["createMessage"]>,
+		signal: AbortSignal,
+		onRestart?: () => boolean,
+	): ReturnType<LLMClient["createMessage"]> {
+		for (let attempt = 0; ; attempt++) {
+			let produced = false
+			try {
+				for await (const chunk of makeStream()) {
+					produced = true
+					yield chunk
+				}
+				return
+			} catch (error) {
+				if (signal.aborted || (error as Error).name === "AbortError") throw error
+				if (attempt >= MAX_STREAM_RETRIES || !isRetryableStreamError(error)) throw error
+				// Output already streamed this attempt: only retry if the caller can
+				// cleanly roll it back, otherwise a restart would duplicate it.
+				if (produced && !(onRestart?.() ?? false)) throw error
+				const delayMs = retryBackoffMs(attempt)
+				this.options.callbacks.onEvent({
+					type: "system",
+					message: `Connection to the model failed (${(error as Error).message}). Retrying ${attempt + 1}/${MAX_STREAM_RETRIES} in ${Math.ceil(delayMs / 1000)}s…`,
+					isError: false,
+				})
+				await interruptibleDelay(delayMs, signal)
+			}
+		}
+	}
+
 	/** Run one model request + tool execution round. Returns true when the turn is over. */
 	private async runStep(): Promise<boolean> {
 		const { onEvent } = this.options.callbacks
 		const signal = this.abortController!.signal
 
 		let assistantText = ""
-		let hadReasoning = false
+		// A reasoning segment is "open" from its first delta until visible content
+		// (text or a tool call) begins. We emit reasoning-done at that transition so
+		// "Thought for Ns" reflects only the thinking time — not the answer that
+		// follows — and the live "Thinking" block stops before the answer streams.
+		// A fresh segment can re-open if the model interleaves reasoning with content.
+		let reasoningOpen = false
 		let reasoningStart = 0
 		let reasoningDetails: unknown
+		// Once a reasoning-done row is committed to the transcript we can't roll it
+		// back, so a mid-stream retry after that point isn't clean.
+		let reasoningRowCommitted = false
+		const finalizeReasoning = () => {
+			if (reasoningOpen) {
+				reasoningOpen = false
+				reasoningRowCommitted = true
+				onEvent({ type: "reasoning-done", durationMs: Date.now() - reasoningStart })
+			}
+		}
 		const toolCallsByIndex = new Map<number, PendingToolCall>()
 		let nextSyntheticIndex = 10000
 
-		const stream = this.client.createMessage(this.systemPrompt, this.outgoingMessages(), getActiveTools(this.mcp), signal)
+		// Roll back this step's partial output so streamWithRetry can restart a
+		// dropped stream mid-flight. Tools only run after the stream completes, so
+		// nothing irreversible has happened yet; the one thing we can't undo is an
+		// already-committed reasoning row, so we decline the restart in that case.
+		const rollbackForRetry = (): boolean => {
+			if (reasoningRowCommitted) return false
+			assistantText = ""
+			reasoningOpen = false
+			reasoningStart = 0
+			reasoningDetails = undefined
+			toolCallsByIndex.clear()
+			nextSyntheticIndex = 10000
+			onEvent({ type: "stream-reset" })
+			return true
+		}
+
+		const stream = this.streamWithRetry(
+			() => this.client.createMessage(this.systemPrompt, this.outgoingMessages(), getActiveTools(this.mcp), signal),
+			signal,
+			rollbackForRetry,
+		)
 
 		for await (const chunk of stream) {
 			if (signal.aborted) throw new DOMException("aborted", "AbortError")
 			switch (chunk.type) {
 				case "text":
+					// Visible content begins — the reasoning phase (if any) is over.
+					finalizeReasoning()
 					assistantText += chunk.text
 					onEvent({ type: "text-delta", text: chunk.text })
 					break
 				case "reasoning":
-					if (!hadReasoning) {
-						hadReasoning = true
+					if (!reasoningOpen) {
+						reasoningOpen = true
 						reasoningStart = Date.now()
 					}
 					onEvent({ type: "reasoning-delta", text: chunk.text })
@@ -553,6 +681,8 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 					reasoningDetails = chunk.details
 					break
 				case "native_tool_calls":
+					// A tool call also ends the reasoning phase.
+					finalizeReasoning()
 					for (const tc of chunk.toolCalls) {
 						const index = tc.index ?? nextSyntheticIndex++
 						let pending = toolCallsByIndex.get(index)
@@ -579,9 +709,8 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 			}
 		}
 
-		if (hadReasoning) {
-			onEvent({ type: "reasoning-done", durationMs: Date.now() - reasoningStart })
-		}
+		// A reasoning-only turn (no following text/tool content) still needs closing.
+		finalizeReasoning()
 		if (assistantText) {
 			onEvent({ type: "text-done" })
 		}

diff --git a/src/core/events.ts b/src/core/events.ts
@@ -5,6 +5,8 @@ export type AgentEvent =
 	| { type: "reasoning-done"; durationMs: number }
 	| { type: "text-delta"; text: string }
 	| { type: "text-done" }
+	/** discard the in-flight streaming buffers before an auto-retry re-streams */
+	| { type: "stream-reset" }
 	| { type: "tool-start"; id: string; name: string; summary: string }
 	| {
 			type: "tool-end"