From 8872d179c6614a6c6a7d4938c36e0651a98170d3 Mon Sep 17 00:00:00 2001
From: "matterai-app[bot]" <matterai-app[bot]@users.noreply.github.com>
Date: Wed, 1 Jul 2026 17:17:20 +0530
Subject: [PATCH 1/5] chore(release): bump version to 0.3.4 and update
 changelog

Bump @matterailab/orbcode from 0.3.3 to 0.3.4 in package.json and
fold the AGENTS.md context cap bump (~60 -> ~150 lines, covering
project structure, architecture, business-logic mapping, and code
patterns/conventions without truncation) into the 0.3.3 changelog
entry it shipped under.
---
 CHANGELOG.md | 5 ++++-
 package.json | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 11f8bb7..1493611 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.3] - 2026-06-30
+
 ### Added
 
 - **Linked repositories (`/link`).** A new `/link` slash command opens an
@@ -26,7 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   generated `AGENTS.md` is written to the repo-level `.orb/` directory and now
   captures project structure, architecture, business-logic mapping, and code
   patterns/conventions — the context an agent needs to start coding without
-  re-exploring.
+  re-exploring. The cap is now ~150 lines (up from ~60) so it can cover all
+  four sections without being truncated.
 - **Repo-level agent data lives in `.orb/`.** The folder OrbCode creates in a
   project for `AGENTS.md` (and now `links.json`) is `.orb/` — a single,
   tool-neutral name shared by the IDE and the CLI. Machine settings are
diff --git a/package.json b/package.json
index 59335d7..a6c34f6 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@matterailab/orbcode",
-  "version": "0.3.3",
+  "version": "0.3.4",
   "description": "OrbCode CLI — agentic coding in your terminal, powered by Axon models by MatterAI",
   "type": "module",
   "bin": {

From 8750b22b6759f7fa667f816b66342000340eed01 Mon Sep 17 00:00:00 2001
From: "matterai-app[bot]" <matterai-app[bot]@users.noreply.github.com>
Date: Wed, 1 Jul 2026 17:17:33 +0530
Subject: [PATCH 2/5] fix(agent): auto-retry transient model stream failures
 and close reasoning phase on first content
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two related correctness/resilience fixes in the agent's per-turn
streaming pipeline:

1. Transient stream failures are now retried automatically. Connection
   drops before the first chunk (DNS/socket reset/TLS, plus 5xx, 408,
   429) are retried up to 3 times with exponential backoff capped at
   8s. Real 4xx client errors are not retried. Retries only apply
   before any output is produced — once chunks have streamed we
   can't safely retry without duplicating on-screen content, so the
   error propagates. A user abort is never retried, and the backoff
   delay is interruptible so Ctrl+C doesn't get stuck waiting it out.
   A 'Connection to the model failed (...). Retrying n/3 in Ns…' line
   is emitted via the system event channel so the user sees progress.

2. The 'Thought for Ns' timer now reflects only the thinking phase.
   Previously, a single boolean 'hadReasoning' flag was set on the
   first reasoning delta and only checked after the stream ended,
   so a reasoning segment followed by text would report the entire
   reasoning+answer span as thinking time. Reasoning is now modeled
   as an open/close segment: it opens on the first reasoning delta
   and closes on the first text delta, tool call, or stream end —
   matching the on-screen 'Thinking' block behavior and supporting
   interleaved reasoning/content correctly.
---
 src/core/agent.ts | 111 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 103 insertions(+), 8 deletions(-)

diff --git a/src/core/agent.ts b/src/core/agent.ts
index 4953356..da45860 100644
--- a/src/core/agent.ts
+++ b/src/core/agent.ts
@@ -25,6 +25,45 @@ import { renderLinkedReposSection } from "../config/links.js"
 
 const MAX_STEPS_PER_TURN = 50
 const RESULT_PREVIEW_LINES = 6
+/** How many times to automatically re-establish a model request that fails
+ *  before producing any output (transient/connection errors). */
+const MAX_STREAM_RETRIES = 3
+
+/** Transient failures worth auto-retrying: any transport/connection error (no
+ *  usable HTTP status — socket reset, DNS, timeout, TLS drop) plus 5xx/408/429
+ *  server responses. Real 4xx client errors (auth, bad request) are not retried. */
+function isRetryableStreamError(error: unknown): boolean {
+	const err = error as { status?: number; code?: number | string }
+	const status = Number(err?.status ?? err?.code)
+	if (Number.isFinite(status) && status !== 0) {
+		return status >= 500 || status === 408 || status === 429
+	}
+	return true
+}
+
+function retryBackoffMs(attempt: number): number {
+	return Math.min(500 * 2 ** attempt, 8000)
+}
+
+/** Sleep that settles early (rejecting with AbortError) if the signal fires, so
+ *  a user interrupt isn't stuck waiting out a retry backoff. */
+function interruptibleDelay(ms: number, signal: AbortSignal): Promise<void> {
+	return new Promise((resolve, reject) => {
+		if (signal.aborted) {
+			reject(new DOMException("aborted", "AbortError"))
+			return
+		}
+		const onAbort = () => {
+			clearTimeout(timer)
+			reject(new DOMException("aborted", "AbortError"))
+		}
+		const timer = setTimeout(() => {
+			signal.removeEventListener("abort", onAbort)
+			resolve()
+		}, ms)
+		signal.addEventListener("abort", onAbort, { once: true })
+	})
+}
 
 export interface AgentOptions {
 	cwd: string
@@ -479,7 +518,10 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 				},
 			]
 			let summary = ""
-			for await (const chunk of this.client.createMessage(this.systemPrompt, request, [], signal)) {
+			for await (const chunk of this.streamWithRetry(
+				() => this.client.createMessage(this.systemPrompt, request, [], signal),
+				signal,
+			)) {
 				if (signal.aborted) throw new DOMException("aborted", "AbortError")
 				if (chunk.type === "text") {
 					summary += chunk.text
@@ -520,30 +562,82 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 		}
 	}
 
+	/**
+	 * Consume a model stream, automatically re-establishing the request up to
+	 * MAX_STREAM_RETRIES times when it fails BEFORE producing any chunk — the
+	 * common socket-closure case (connection dropped at setup or while waiting
+	 * for the first token). Once chunks have streamed we can't retry without
+	 * duplicating on-screen output, so the error propagates. A user abort is
+	 * never retried.
+	 */
+	private async *streamWithRetry(
+		makeStream: () => ReturnType<LLMClient["createMessage"]>,
+		signal: AbortSignal,
+	): ReturnType<LLMClient["createMessage"]> {
+		for (let attempt = 0; ; attempt++) {
+			let produced = false
+			try {
+				for await (const chunk of makeStream()) {
+					produced = true
+					yield chunk
+				}
+				return
+			} catch (error) {
+				if (signal.aborted || (error as Error).name === "AbortError") throw error
+				if (produced || attempt >= MAX_STREAM_RETRIES || !isRetryableStreamError(error)) {
+					throw error
+				}
+				const delayMs = retryBackoffMs(attempt)
+				this.options.callbacks.onEvent({
+					type: "system",
+					message: `Connection to the model failed (${(error as Error).message}). Retrying ${attempt + 1}/${MAX_STREAM_RETRIES} in ${Math.ceil(delayMs / 1000)}s…`,
+					isError: false,
+				})
+				await interruptibleDelay(delayMs, signal)
+			}
+		}
+	}
+
 	/** Run one model request + tool execution round. Returns true when the turn is over. */
 	private async runStep(): Promise<boolean> {
 		const { onEvent } = this.options.callbacks
 		const signal = this.abortController!.signal
 
 		let assistantText = ""
-		let hadReasoning = false
+		// A reasoning segment is "open" from its first delta until visible content
+		// (text or a tool call) begins. We emit reasoning-done at that transition so
+		// "Thought for Ns" reflects only the thinking time — not the answer that
+		// follows — and the live "Thinking" block stops before the answer streams.
+		// A fresh segment can re-open if the model interleaves reasoning with content.
+		let reasoningOpen = false
 		let reasoningStart = 0
 		let reasoningDetails: unknown
+		const finalizeReasoning = () => {
+			if (reasoningOpen) {
+				reasoningOpen = false
+				onEvent({ type: "reasoning-done", durationMs: Date.now() - reasoningStart })
+			}
+		}
 		const toolCallsByIndex = new Map<number, PendingToolCall>()
 		let nextSyntheticIndex = 10000
 
-		const stream = this.client.createMessage(this.systemPrompt, this.outgoingMessages(), getActiveTools(this.mcp), signal)
+		const stream = this.streamWithRetry(
+			() => this.client.createMessage(this.systemPrompt, this.outgoingMessages(), getActiveTools(this.mcp), signal),
+			signal,
+		)
 
 		for await (const chunk of stream) {
 			if (signal.aborted) throw new DOMException("aborted", "AbortError")
 			switch (chunk.type) {
 				case "text":
+					// Visible content begins — the reasoning phase (if any) is over.
+					finalizeReasoning()
 					assistantText += chunk.text
 					onEvent({ type: "text-delta", text: chunk.text })
 					break
 				case "reasoning":
-					if (!hadReasoning) {
-						hadReasoning = true
+					if (!reasoningOpen) {
+						reasoningOpen = true
 						reasoningStart = Date.now()
 					}
 					onEvent({ type: "reasoning-delta", text: chunk.text })
@@ -553,6 +647,8 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 					reasoningDetails = chunk.details
 					break
 				case "native_tool_calls":
+					// A tool call also ends the reasoning phase.
+					finalizeReasoning()
 					for (const tc of chunk.toolCalls) {
 						const index = tc.index ?? nextSyntheticIndex++
 						let pending = toolCallsByIndex.get(index)
@@ -579,9 +675,8 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 			}
 		}
 
-		if (hadReasoning) {
-			onEvent({ type: "reasoning-done", durationMs: Date.now() - reasoningStart })
-		}
+		// A reasoning-only turn (no following text/tool content) still needs closing.
+		finalizeReasoning()
 		if (assistantText) {
 			onEvent({ type: "text-done" })
 		}

From bd0c111a6abc6e7c7fd1ccab0c2f86c1aeb324bf Mon Sep 17 00:00:00 2001
From: "matterai-app[bot]" <matterai-app[bot]@users.noreply.github.com>
Date: Wed, 1 Jul 2026 17:17:45 +0530
Subject: [PATCH 3/5] feat(ui): add /task slash command to reference a previous
 task
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a '/task' slash command that lets the user pull a prior session
from the same directory into the current conversation as context.

Behavior:
- '/task' (no argument) opens a SessionPicker over all sessions for
  the current cwd except the active one.
- On selection, the previous task's user/assistant messages are
  extracted (user messages unwrapped from <user_query> tags) and
  wrapped in a <previous_task title='...'> block inside a prompt
  asking the model to summarize it. The summary is then presented
  in the current conversation as the reference.
- Conversations longer than ~8000 chars are truncated with a
  marker so the prompt stays well under context limits.
- If no previous tasks exist in this directory, a friendly info
  row is shown instead of opening an empty picker.

Implementation:
- New 'taskPickerSessions' state in App holds the candidate list
  when the picker is open; it's added to the existing 'no-modal'
  guard so other modals (MCP picker, link manager, etc.) don't
  stack.
- 'handleTaskSelect' reuses the existing 'runTurn' path — the
  prompt is the user message, and the model produces the summary.
- SessionPicker gains an optional 'title' prop (default unchanged)
  so the same component reads correctly for both '/resume' and
  '/task'.
---
 src/ui/App.tsx                      | 94 +++++++++++++++++++++++++++++
 src/ui/components/SessionPicker.tsx |  5 +-
 2 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/src/ui/App.tsx b/src/ui/App.tsx
index 3fbffd2..302754a 100644
--- a/src/ui/App.tsx
+++ b/src/ui/App.tsx
@@ -77,6 +77,10 @@ const SLASH_COMMANDS: SlashCommand[] = [
     description: "summarize the conversation to free up context",
   },
   { name: "/tasks", description: "show the current task list" },
+  {
+    name: "/task",
+    description: "reference a previous task in the current conversation",
+  },
   {
     name: "/status",
     description: "show session status (model, context, cost, account)",
@@ -223,6 +227,50 @@ Instructions:
 
 This is a review only — do NOT modify any files. Present the findings to the user.${userInput ? `\n\nThe user provided the following input with the code-review command:\n${userInput}` : ""}`;
 
+const MAX_TASK_CONVERSATION_CHARS = 8000;
+
+function extractConversation(session: SessionData): string {
+  const lines: string[] = [];
+  for (const message of session.messages) {
+    if (message.role === "user" && typeof message.content === "string") {
+      const match = /<user_query>\n?([\s\S]*?)\n?<\/user_query>/.exec(
+        message.content,
+      );
+      if (match) lines.push(`User: ${match[1]}`);
+    } else if (
+      message.role === "assistant" &&
+      typeof message.content === "string" &&
+      message.content.trim()
+    ) {
+      lines.push(`Assistant: ${message.content}`);
+    }
+  }
+  return lines.join("\n\n");
+}
+
+function buildTaskReferencePrompt(session: SessionData): string {
+  const conversation = extractConversation(session);
+  const truncated =
+    conversation.length > MAX_TASK_CONVERSATION_CHARS
+      ? conversation.slice(0, MAX_TASK_CONVERSATION_CHARS) +
+        "\n\n[... conversation truncated ...]"
+      : conversation;
+  return `The user has referenced a previous task. Here is the conversation from that task:
+
+<previous_task title="${session.title || session.id}">
+${truncated}
+</previous_task>
+
+Please summarize this previous task and add it as a reference for the current task. The summary should capture:
+- What the task was about
+- What was accomplished
+- Key decisions made
+- Files that were created or modified
+- Any remaining work
+
+Present the summary in a clear, organized format. This summary will serve as context for the current task.`;
+}
+
 interface PendingApproval {
   request: ApprovalRequest;
   resolve: (decision: ApprovalDecision) => void;
@@ -296,6 +344,9 @@ export function App({
   const [resumableSessions, setResumableSessions] = useState<
     SessionData[] | null
   >(null);
+  const [taskPickerSessions, setTaskPickerSessions] = useState<
+    SessionData[] | null
+  >(null);
   const [linkManagerOpen, setLinkManagerOpen] = useState(false);
   const [links, setLinks] = useState<LinkedRepo[]>([]);
   const [linkStatus, setLinkStatus] = useState("");
@@ -612,6 +663,20 @@ export function App({
     [createAgent, pushRow, resetTranscript],
   );
 
+  const handleTaskSelect = useCallback(
+    (session: SessionData) => {
+      setTaskPickerSessions(null);
+      pushRow({
+        kind: "user",
+        text: `/task (referencing: ${session.title || session.id})`,
+      });
+      setBusy(true);
+      setBusyLabel("Thinking");
+      void getAgent().runTurn(buildTaskReferencePrompt(session));
+    },
+    [getAgent, pushRow],
+  );
+
   const switchModel = useCallback(
     (modelId: string) => {
       const updated = { ...loadSettings(), model: modelId };
@@ -741,6 +806,24 @@ export function App({
           setResumableSessions(sessions);
           break;
         }
+        case "/task": {
+          if (!getAuthToken(settings)) {
+            setView("login");
+            break;
+          }
+          const taskSessions = listSessions(process.cwd()).filter(
+            (s) => s.id !== agentRef.current?.taskId,
+          );
+          if (taskSessions.length === 0) {
+            pushRow({
+              kind: "info",
+              text: "No previous tasks found for this directory.",
+            });
+            break;
+          }
+          setTaskPickerSessions(taskSessions);
+          break;
+        }
         case "/compact":
           if (!getAuthToken(settings)) {
             setView("login");
@@ -1216,6 +1299,7 @@ export function App({
     !mcpPickerOpen &&
     !mcpMigrationEntries &&
     !resumableSessions &&
+    !taskPickerSessions &&
     !linkManagerOpen;
 
   return (
@@ -1308,6 +1392,16 @@ export function App({
               />
             </Box>
           )}
+          {taskPickerSessions && (
+            <Box marginTop={1}>
+              <SessionPicker
+                sessions={taskPickerSessions}
+                title="Reference a previous task"
+                onSelect={handleTaskSelect}
+                onCancel={() => setTaskPickerSessions(null)}
+              />
+            </Box>
+          )}
           {linkManagerOpen && (
             <Box marginTop={1}>
               <LinkManager
diff --git a/src/ui/components/SessionPicker.tsx b/src/ui/components/SessionPicker.tsx
index ee237d7..ffc211c 100644
--- a/src/ui/components/SessionPicker.tsx
+++ b/src/ui/components/SessionPicker.tsx
@@ -10,6 +10,7 @@ interface SessionPickerProps {
 	sessions: SessionData[]
 	onSelect: (session: SessionData) => void
 	onCancel: () => void
+	title?: string
 }
 
 function relativeTime(iso: string): string {
@@ -23,7 +24,7 @@ function relativeTime(iso: string): string {
 	return `${days}d ago`
 }
 
-export function SessionPicker({ sessions, onSelect, onCancel }: SessionPickerProps) {
+export function SessionPicker({ sessions, onSelect, onCancel, title = "Resume a previous session" }: SessionPickerProps) {
 	const [selected, setSelected] = useState(0)
 
 	useInput((input, key) => {
@@ -55,7 +56,7 @@ export function SessionPicker({ sessions, onSelect, onCancel }: SessionPickerPro
 	return (
 		<Box flexDirection="column" borderStyle="round" borderColor={COLORS.primary} paddingX={1}>
 			<Text bold color={COLORS.primary}>
-				Resume a previous session
+				{title}
 			</Text>
 			{windowStart > 0 && <Text dimColor>  ↑ {windowStart} more</Text>}
 			{visible.map((session, i) => {

From 2260420c39165cf509979c2037bfd9fbe973d964 Mon Sep 17 00:00:00 2001
From: "matterai-app[bot]" <matterai-app[bot]@users.noreply.github.com>
Date: Thu, 2 Jul 2026 14:28:16 +0530
Subject: [PATCH 4/5] fix(agent): allow auto-retry mid-stream when partial
 output can be rolled back

Previously streamWithRetry only retried before the first chunk, because
once any text or reasoning had streamed, re-issuing the request would
duplicate on-screen output. The user-visible effect was that a dropped
connection after partial progress surfaced as a failed step, even
though the model was happy to continue.

This adds an optional onRestart callback to streamWithRetry. When the
caller can cleanly undo the partial output (cleared buffers, reset
accumulators) it returns true and the stream is re-issued. The main
agent loop installs a rollbackForRetry handler that:

  - resets assistantText, reasoningOpen, reasoningStart, reasoningDetails
  - clears pending tool calls
  - emits a new 'stream-reset' event so the UI can drop its partial
    streaming/reasoning buffers
  - declines the restart if a reasoning row was already committed to
    the transcript, since that cannot be undone

The compaction path installs a simpler reset that just clears its
in-memory summary buffer, because compaction only streams text and
commits once at the end.

The UI handler for 'stream-reset' clears textBufferRef, streamingText,
reasoningBufferRef, and streamingReasoning, then resets the busy label
back to 'Working' so the spinner reflects the restarted attempt.
---
 src/core/agent.ts  | 48 +++++++++++++++++++++++++++++++++++++++-------
 src/core/events.ts |  2 ++
 src/ui/App.tsx     | 11 +++++++++++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/src/core/agent.ts b/src/core/agent.ts
index da45860..eb5a83f 100644
--- a/src/core/agent.ts
+++ b/src/core/agent.ts
@@ -521,6 +521,13 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 			for await (const chunk of this.streamWithRetry(
 				() => this.client.createMessage(this.systemPrompt, request, [], signal),
 				signal,
+				() => {
+					// Compaction only streams text (committed once at the end), so a
+					// mid-stream retry just discards the partial summary.
+					summary = ""
+					onEvent({ type: "stream-reset" })
+					return true
+				},
 			)) {
 				if (signal.aborted) throw new DOMException("aborted", "AbortError")
 				if (chunk.type === "text") {
@@ -564,15 +571,20 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 
 	/**
 	 * Consume a model stream, automatically re-establishing the request up to
-	 * MAX_STREAM_RETRIES times when it fails BEFORE producing any chunk — the
-	 * common socket-closure case (connection dropped at setup or while waiting
-	 * for the first token). Once chunks have streamed we can't retry without
-	 * duplicating on-screen output, so the error propagates. A user abort is
+	 * MAX_STREAM_RETRIES times on a transient/connection failure. A user abort is
 	 * never retried.
+	 *
+	 * Before the first chunk of an attempt nothing has streamed, so the retry is
+	 * always clean. Once chunks have streamed, retrying would duplicate on-screen
+	 * output — so we only retry mid-stream when the caller supplies `onRestart` and
+	 * it returns true, meaning it rolled the partial output back (cleared buffers,
+	 * reset accumulators). If it can't (e.g. a row was already committed), the error
+	 * propagates.
 	 */
 	private async *streamWithRetry(
 		makeStream: () => ReturnType<LLMClient["createMessage"]>,
 		signal: AbortSignal,
+		onRestart?: () => boolean,
 	): ReturnType<LLMClient["createMessage"]> {
 		for (let attempt = 0; ; attempt++) {
 			let produced = false
@@ -584,9 +596,10 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 				return
 			} catch (error) {
 				if (signal.aborted || (error as Error).name === "AbortError") throw error
-				if (produced || attempt >= MAX_STREAM_RETRIES || !isRetryableStreamError(error)) {
-					throw error
-				}
+				if (attempt >= MAX_STREAM_RETRIES || !isRetryableStreamError(error)) throw error
+				// Output already streamed this attempt: only retry if the caller can
+				// cleanly roll it back, otherwise a restart would duplicate it.
+				if (produced && !(onRestart?.() ?? false)) throw error
 				const delayMs = retryBackoffMs(attempt)
 				this.options.callbacks.onEvent({
 					type: "system",
@@ -612,18 +625,39 @@ User time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
 		let reasoningOpen = false
 		let reasoningStart = 0
 		let reasoningDetails: unknown
+		// Once a reasoning-done row is committed to the transcript we can't roll it
+		// back, so a mid-stream retry after that point isn't clean.
+		let reasoningRowCommitted = false
 		const finalizeReasoning = () => {
 			if (reasoningOpen) {
 				reasoningOpen = false
+				reasoningRowCommitted = true
 				onEvent({ type: "reasoning-done", durationMs: Date.now() - reasoningStart })
 			}
 		}
 		const toolCallsByIndex = new Map<number, PendingToolCall>()
 		let nextSyntheticIndex = 10000
 
+		// Roll back this step's partial output so streamWithRetry can restart a
+		// dropped stream mid-flight. Tools only run after the stream completes, so
+		// nothing irreversible has happened yet; the one thing we can't undo is an
+		// already-committed reasoning row, so we decline the restart in that case.
+		const rollbackForRetry = (): boolean => {
+			if (reasoningRowCommitted) return false
+			assistantText = ""
+			reasoningOpen = false
+			reasoningStart = 0
+			reasoningDetails = undefined
+			toolCallsByIndex.clear()
+			nextSyntheticIndex = 10000
+			onEvent({ type: "stream-reset" })
+			return true
+		}
+
 		const stream = this.streamWithRetry(
 			() => this.client.createMessage(this.systemPrompt, this.outgoingMessages(), getActiveTools(this.mcp), signal),
 			signal,
+			rollbackForRetry,
 		)
 
 		for await (const chunk of stream) {
diff --git a/src/core/events.ts b/src/core/events.ts
index 6c63743..9edb215 100644
--- a/src/core/events.ts
+++ b/src/core/events.ts
@@ -5,6 +5,8 @@ export type AgentEvent =
 	| { type: "reasoning-done"; durationMs: number }
 	| { type: "text-delta"; text: string }
 	| { type: "text-done" }
+	/** discard the in-flight streaming buffers before an auto-retry re-streams */
+	| { type: "stream-reset" }
 	| { type: "tool-start"; id: string; name: string; summary: string }
 	| {
 			type: "tool-end"
diff --git a/src/ui/App.tsx b/src/ui/App.tsx
index 302754a..bd3251d 100644
--- a/src/ui/App.tsx
+++ b/src/ui/App.tsx
@@ -498,6 +498,17 @@ export function App({
           setStreamingText("");
           setBusyLabel("Working");
           break;
+        case "stream-reset":
+          // An auto-retry is re-streaming this step from scratch — drop the
+          // partial text/reasoning shown for the failed attempt so it doesn't
+          // duplicate. (Committed rows are untouched; the agent only resets when
+          // nothing has been committed yet.)
+          textBufferRef.current = "";
+          setStreamingText("");
+          reasoningBufferRef.current = "";
+          setStreamingReasoning("");
+          setBusyLabel("Working");
+          break;
         case "tool-start":
           setBusyLabel("Working");
           break;

From 7cb5b9a4a51b8ceac4343ff7c6aba0bc4ca4c1dd Mon Sep 17 00:00:00 2001
From: "matterai-app[bot]" <matterai-app[bot]@users.noreply.github.com>
Date: Thu, 2 Jul 2026 14:28:26 +0530
Subject: [PATCH 5/5] refactor(prompts): rewrite system prompt for speed and
 editing discipline

Replaces the 'always gather exhaustive context' guidance with a
'gather enough context, then act' principle. The model is now told
that a small, localized change typically needs about 3-6 tool calls
and that further exploration after the edit point is identified is
waste. This also tightens the TODO list rule to multi-step tasks
(3+ steps) instead of mandating one for any size of work.

The file_edit / multi_file_edit section adds an explicit editing
discipline block: copy old_string verbatim from a same-turn read,
treat earlier reads as stale after a successful edit, and never guess
at a corrected old_string when a multi_file_edit batch fails.

The read_file and search_files sections collapse their repetitive
parameter tables and examples into a short reference plus a 'Reading
Strategy' / 'Search Hygiene' set of rules (read whole regions in one
call, budget re-reads, verify the output matches the parameters sent,
exclude test/spec/mock paths by default, scope path narrowly).

Two new cross-cutting sections are added: 'Verifying tool results
and avoiding loops' (check that outputs match the sent parameters,
do not repeat an identical failing call) and 'Plan before editing'
(write the full change plan once, then execute edits in one batched
pass with a single typecheck/build at the end).

Also fixes two minor copy issues: 'prefer to let the user to that'
typo and a few list-formatting inconsistencies in the TODO list
section.
---
 src/prompts/system.ts | 209 +++++++++---------------------------------
 1 file changed, 41 insertions(+), 168 deletions(-)

diff --git a/src/prompts/system.ts b/src/prompts/system.ts
index ac62135..a79737d 100644
--- a/src/prompts/system.ts
+++ b/src/prompts/system.ts
@@ -37,13 +37,13 @@ You have tools at your disposal to solve the coding task. Follow these rules reg
 
 If you intend to call multiple tools and there are no dependencies between the tool calls, make all of the independent tool calls in parallel. Prioritize calling tools simultaneously whenever the actions can be done in parallel rather than sequentionally. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. Maximize use of parallel tool calls where possible to increase speed and efficiency. However, if some tool calls depend on previous calls to inform dependent values like the parameters, do NOT call these tools in parallel and instead call them sequentially. Never use placeholders or guess missing parameters in tool calls.
 
-# Maximize Context Understanding
+# Gather Enough Context, Then Act
 
-Be THOROUGH when gathering information. Make sure you have the FULL picture before replying. Use additional tool calls or clarifying questions as needed.
-TRACE every symbol back to its definitions and usages so you fully understand it.
-Look past the first seemingly relevant result. EXPLORE alternative implementations, edge cases, and varied search terms until you have COMPREHENSIVE coverage of the topic.
+Speed matters: your goal is the correct change in the fewest tool calls, not exhaustive coverage. Scale exploration to the task. A small, localized change typically needs about 3-6 calls — locate the code, read the region and its immediate callers, check conventions — while only wide refactors justify long exploration.
 
-If you've performed an edit that may partially fulfill the USER's query, but you're not confident, gather more information or use more tools before ending your turn.
+You have enough context when you know exactly which files and lines to change, you have seen the surrounding code's conventions, and you know how the code you are touching is used. From that point, every further search or read is waste: stop exploring and make the edit. Before each additional call, ask whether its result could change your edit; if not, skip it. Trace only the symbols your change actually depends on, never re-read regions you have already seen, and never re-verify facts you have already established.
+
+Never edit code you have not read. If after an edit you are genuinely unsure it fulfills the USER's request, verify that specific doubt with one targeted check — do not relaunch broad exploration.
 
 Bias towards not asking the user for help if you can find the answer yourself.
 
@@ -70,7 +70,7 @@ Your system prompt may include an "Available Skills" section listing skills by n
 
 Tools whose names start with \`mcp__\` are provided by external MCP servers the user has configured. They work exactly like native tools — call them with the standard tool call format when the task requires their capabilities. Their descriptions and parameter schemas come from the MCP servers.
 
-CRITICAL: For any task, small or big, you will always and always use the update_todo_list tool to create the TODO list, always keep is upto date with updates to the status and updating/editing the list as needed.`
+Use the update_todo_list tool to create and maintain a TODO list for any multi-step task (3 or more steps), keeping statuses up to date as you work. For trivial tasks that need only one or two steps, skip the todo list and just do the work.`
 
 const toolGuide = `
 Common tool calls and explanations
@@ -123,20 +123,15 @@ Common tool calls and explanations
 }
 \`\`\`
 
-**Example** (editing across multiple files):
-\`\`\`json
-{
-  "edits": [
-    {"file_path": "/path/to/api.ts", "old_string": "v1", "new_string": "v2"},
-    {"file_path": "/path/to/config.ts", "old_string": "version: 1", "new_string": "version: 2"}
-  ]
-}
-\`\`\`
-
 **Guidance for choosing between file_edit and multi_file_edit**:
 - 1 edit → \`file_edit\`
 - 2+ edits → \`multi_file_edit\` (always)
 
+**Editing discipline (CRITICAL)**:
+- ALWAYS copy \`old_string\` verbatim from a read_file result obtained in the same turn. NEVER reconstruct indentation or whitespace from memory — this is especially important in tab-indented files, where a reconstructed \`old_string\` will silently mismatch.
+- After any successful edit, treat all earlier reads of that file as stale. Re-read the region with read_file before editing the same area of the file again.
+- If one edit in a \`multi_file_edit\` batch fails with a string mismatch, STOP and re-read the file before retrying that edit. Do not guess at a corrected \`old_string\` — guessed corrections compound the mismatch.
+
 ## read_file Tool Usage
 
 The \`read_file\` tool reads file contents with optional offset and limit. Use it to examine code before making changes or to discuss specific sections.
@@ -147,33 +142,9 @@ The \`read_file\` tool reads file contents with optional offset and limit. Use i
 - \`offset\` (optional): Starting line number (1-indexed). Defaults to 1.
 - \`limit\` (optional): Maximum number of lines to read. If not specified, reads the complete file. Default and maximum limit is 1000 lines.
 
-### Parameters Schema
-\`\`\`typescript
-{
-  file_path: string,    // Absolute path to file (required)
-  offset?: number,      // Starting line (1-indexed), defaults to 1
-  limit?: number        // Max lines to read, omit to read entire file
-}
-\`\`\`
-
-### Examples
-
-**Read entire file:**
-\`\`\`json
-{
-  "file_path": "/Users/username/project/src/App.tsx"
-}
-\`\`\`
-
-**Read first 50 lines:**
-\`\`\`json
-{
-  "file_path": "/Users/username/project/src/App.tsx",
-  "limit": 50
-}
-\`\`\`
+### Example
 
-**Read lines 100-150 (50 lines starting at line 100):**
+**Read lines 100-150:**
 \`\`\`json
 {
   "file_path": "/Users/username/project/src/App.tsx",
@@ -182,43 +153,17 @@ The \`read_file\` tool reads file contents with optional offset and limit. Use i
 }
 \`\`\`
 
-### Workflow: When You Don't Know Line Numbers
-
-**Step 1:** Use \`search_files\` to find the code:
-\`\`\`json
-{
-  "path": "src",
-  "regex": "function handleSubmit",
-  "file_pattern": "*.ts"
-}
-\`\`\`
-
-**Step 2:** Note the line number from search results (e.g., line 45)
-
-**Step 3:** Read that section with \`read_file\`:
-\`\`\`json
-{
-  "file_path": "/Users/username/project/src/Form.tsx",
-  "offset": 40,
-  "limit": 50
-}
-\`\`\`
+Parameter rules: \`file_path\` must be an absolute path; \`offset\` and \`limit\` must be >= 1 if specified; omit \`limit\` to read from \`offset\` to the end. Call the tool multiple times to read multiple files.
 
-### Parameter Rules
+CRITICAL: \`offset\` is what targets a region — \`limit\` alone reads the TOP of the file. To inspect line N (e.g. from search results), you MUST pass \`offset\` ≈ N-20 together with \`limit\`. Before sending the call, confirm \`offset\` is present whenever you are aiming at a specific line.
 
-1. \`file_path\` must be an absolute path
-2. \`offset\` must be >= 1 if specified
-3. \`limit\` must be >= 1 if specified
-4. If \`limit\` is omitted, the entire file is read from \`offset\`
+When you don't know line numbers: use \`search_files\` to locate the code, note the line number from the results, then \`read_file\` that region with surrounding context.
 
-### Common Patterns
+### Reading Strategy
 
-| Use Case | Parameters |
-|----------|-----------|
-| Read entire file | \`file_path\` only |
-| Read from start | \`limit: 50\` |
-| Read middle section | \`offset: 100, limit: 50\` |
-| Read from a specific line to end | \`offset: 200\` |
+- When investigating a bug, read whole functions or logical regions in ONE call rather than small slivers. Prefer one 150-line read over five 30-line reads — fragmented reads lose context and waste calls.
+- Budget your re-reads: if you have already read a region and have not edited it since, work from what you have instead of fetching it again. Re-read only when the file has changed or you genuinely lack the detail.
+- After every read, verify the output matches the parameters you sent. If you meant to read around line N but the result starts at line 1, you omitted \`offset\` — re-issue the call with \`offset\` set. NEVER re-read the top of the file expecting a different result.
 
 
 # execute_command
@@ -232,7 +177,9 @@ The tool accepts these parameters:
 - \`command\` (required): The CLI command to execute. Must be valid for the user's operating system.
 - \`cwd\` (optional): The working directory to execute the command in. If not provided, the current working directory is used. Ensure this is always an absolute path (starting with \`/\`, or a drive letter like \`C:\\\` on Windows). If you are running the command in the root directly, skip this parameter. The command executor is defaulted to run in the root directory. You already have the Current Workspace Directory in the Environment Details section.
 
-CRITICAL: If the command is a very long running process, prefer to let the user to that they can run it manually in thier terminal. If the user specifically requests to run a long running command, you may proceed.
+CRITICAL: If the command is a very long running process, prefer to let the user know so they can run it manually in their terminal. If the user specifically requests to run a long running command, you may proceed.
+
+Command validity rules: a command is never empty, never just \`:\`, never a bare single word with no arguments, and never contains tool-call markup tokens or angle-bracket tags of any kind. Commands must be valid for the user's operating system, shell, and current working directory.
 
 ## search_files
 
@@ -268,90 +215,29 @@ The \`search_files\` tool allows you to search for patterns across files in a di
   "file_pattern": null
 }
 
-// Search in JSX/TSX files only
-{
-  "path": "src/components",
-  "regex": "useState",
-  "file_pattern": "*.{jsx,tsx}"
-}
-
-// Search in nested directories
-{
-  "path": ".",
-  "regex": "API_KEY",
-  "file_pattern": "**/*.env*"
-}
-\`\`\`
-
-### ❌ INCORRECT Examples
-\`\`\`json
-// WRONG - Unquoted file_pattern (will cause JSON error)
-{
-  "path": "src",
-  "regex": "import",
-  "file_pattern": *.js
-}
-
-// WRONG - Missing file_pattern entirely
-{
-  "path": "src",
-  "regex": "import"
-}
-
-// WRONG - Empty string instead of null
-{
-  "path": "src",
-  "regex": "import",
-  "file_pattern": ""
-}
 \`\`\`
 
-### Regex Pattern Tips
-
-- Use Rust regex syntax (similar to PCRE)
-- Escape special characters: \`\\.\`, \`\\(\`, \`\\[\`, etc.
-- Common patterns:
-  - \`"word"\` - literal match
-  - \`"\\bword\\b"\` - word boundary match
-  - \`"function\\s+\\w+"\` - function declarations
-  - \`"import.*from\\s+['\\"].*['\\"]"\` - import statements
+The regex uses Rust syntax (similar to PCRE); escape special characters like \`\\.\` and \`\\(\`. \`file_pattern\` uses glob syntax: \`"*.ts"\`, \`"*.{jsx,tsx}"\`, \`"**/*.json"\`. When in doubt, use \`null\` to search all files.
 
-### File Pattern Glob Syntax
+### Search Hygiene
 
-When using a string value for \`file_pattern\`:
-- \`"*.js"\` - All .js files in directory
-- \`"*.{js,ts}"\` - All .js and .ts files
-- \`"**/*.json"\` - All .json files recursively
-- \`"test_*.py"\` - Files starting with test_
-- \`"src/**/*.tsx"\` - All .tsx files under src/
-
-**When in doubt, use \`null\` to search all files.**
-
-### Parameter Validation Checklist
-
-Before submitting, verify:
-- ✅ \`path\` is a string (directory path)
-- ✅ \`regex\` is a string (valid Rust regex)
-- ✅ \`file_pattern\` is EITHER a quoted string OR null
-- ✅ All three parameters are present
-- ✅ No unquoted glob patterns like \`*.js\`
+- Exclude test, spec, and mock paths from discovery searches by default (\`__tests__\`, \`*.spec.*\`, \`*.test.*\`, \`__mocks__\`) unless the task itself is about tests. They pollute results and bury the implementation you are looking for.
+- Scope \`path\` to the narrowest plausible directory instead of searching from the repository root.
+- If a search returns hundreds of hits, tighten the regex or \`file_pattern\` and search again. Do not scan through the dump.
 
 ### Remember
 
 **Always quote the file_pattern value or use null. Never use bare/unquoted glob patterns.**
 
-## execute_command
+## Verifying tool results and avoiding loops
+
+- After EVERY tool call, verify the output actually matches the parameters you sent (correct file, correct line range, correct directory). A result that does not reflect your parameters means the call was malformed — fix the call, do not reason from the bad output.
+- If two consecutive identical tool calls produce identical results, you are in a loop. Change the call or change the strategy. NEVER repeat the same call a third time.
 
-CRITICAL:
-1. A command never starts with \`:\`
-2. A command never contains tool-call markup tokens or angle-bracket tags of any kind
-3. A command is never empty or \`:\`
-4. A command is never a single word or a single word with a space
-5. Commands are always valid for the user's operating system
-6. Commands are always valid for the user's shell
-7. Commands are always valid with executable permissions
-8. Commands are always valid with the user's current working directory
+## Plan before editing
 
+- Investigate first, edit second. Once the root cause is confirmed, write out the full change plan — which files, the exact locations, and the edit order — BEFORE touching anything.
+- Then execute the edits in one pass (batched via \`multi_file_edit\`) and verify with a single typecheck/build at the end, rather than alternating between editing and checking.
 
 ## update_todo_list
 
@@ -359,27 +245,14 @@ CRITICAL:
 Replace the entire TODO list with an updated checklist reflecting the current state. Always provide the full list; the system will overwrite the previous one. This tool is designed for step-by-step task tracking, allowing you to confirm completion of each step before updating, update multiple task statuses at once (e.g., mark one as completed and start the next), and dynamically add new todos discovered during long or complex tasks.
 
 **Checklist Format:**
-- Use a single-level markdown checklist (no nesting or subtasks).
-- List todos in the intended execution order.
-- Status options:
-	 - [ ] Task description (pending)
-	 - [x] Task description (completed)
-	 - [-] Task description (in progress)
-
-**Status Rules:**
-- [ ] = pending (not started)
-- [x] = completed (fully finished, no unresolved issues)
-- [-] = in_progress (currently being worked on)
+- Use a single-level markdown checklist (no nesting or subtasks), in intended execution order.
+- Statuses: \`[ ]\` pending, \`[x]\` completed (fully finished, no unresolved issues), \`[-]\` in progress.
 
 **Core Principles:**
-- Before updating, always confirm which todos have been completed since the last update.
-- You may update multiple statuses in a single update (e.g., mark the previous as completed and the next as in progress).
-- When a new actionable item is discovered during a long or complex task, add it to the todo list immediately.
-- Do not remove any unfinished todos unless explicitly instructed.
-- Always retain all unfinished tasks, updating their status as needed.
-- Only mark a task as completed when it is fully accomplished (no partials, no unresolved dependencies).
-- If a task is blocked, keep it as in_progress and add a new todo describing what needs to be resolved.
-- Remove tasks only if they are no longer relevant or if the user requests deletion.
+- Update multiple statuses in a single call (e.g., mark the previous task completed and the next in progress).
+- Add newly discovered actionable items immediately. Retain all unfinished tasks; remove one only if it is no longer relevant or the user asks.
+- Mark a task completed only when fully accomplished. If blocked, keep it in_progress and add a todo describing what must be resolved.
+- Keep the todo list AHEAD of the work, not behind it: it is a steering tool, not a changelog. Lay out upcoming steps before you start them instead of only recording steps after they are finished.
 
 IMPORTANT: Use attempt_completion tool when you have completed the task. This signals that you are done.
 `