diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..7da8c73
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,5 @@
+# Shell scripts and the all-in-one entrypoint MUST stay LF — CRLF breaks the
+# shebang inside the Linux container ("bad interpreter: /usr/bin/env bash^M").
+*.sh        text eol=lf
+deploy/allinone/entrypoint.sh text eol=lf
+localapp/serve.py             text eol=lf
diff --git a/.github/workflows/docker-allinone.yml b/.github/workflows/docker-allinone.yml
new file mode 100644
index 0000000..29c3839
--- /dev/null
+++ b/.github/workflows/docker-allinone.yml
@@ -0,0 +1,74 @@
+name: docker-allinone
+
+# Build and publish the ALL-IN-ONE image (engine + bundled Postgres + web UI)
+# so anyone can `docker run` Vectorless with just an LLM key.
+#
+# Publishes to Docker Hub AND GitHub Container Registry:
+#   docker.io/<DOCKERHUB_USERNAME>/vectorless:latest|sha-<short>|vX.Y.Z
+#   ghcr.io/hallelx2/vectorless:latest|sha-<short>|vX.Y.Z
+#
+# Requires two repo secrets for the Docker Hub push:
+#   DOCKERHUB_USERNAME — your Docker Hub account/namespace
+#   DOCKERHUB_TOKEN    — a Docker Hub access token with Read/Write/Delete scope
+# (GHCR uses the built-in GITHUB_TOKEN — no extra secret.)
+
+on:
+  workflow_dispatch: {}        # run on demand from the Actions tab / gh CLI
+  push:
+    branches: [main]           # publish :latest on every push to the default branch
+    tags: ["v*.*.*"]
+
+permissions:
+  contents: read
+  packages: write              # push to ghcr.io
+
+jobs:
+  publish:
+    name: build + push all-in-one
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Log in to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract tags + labels
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            docker.io/${{ secrets.DOCKERHUB_USERNAME }}/vectorless
+            ghcr.io/${{ github.repository_owner }}/vectorless
+          tags: |
+            type=raw,value=latest,enable={{is_default_branch}}
+            type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }}
+            type=ref,event=tag
+            type=sha,prefix=sha-,format=short
+
+      - name: Build + push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile.allinone
+          platforms: linux/amd64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            VERSION=${{ github.ref_name }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/Dockerfile.allinone b/Dockerfile.allinone
new file mode 100644
index 0000000..21c82f3
--- /dev/null
+++ b/Dockerfile.allinone
@@ -0,0 +1,69 @@
+# ── All-in-one image: engine + bundled Postgres + viewer UI ──────────
+#
+# One `docker run` gives a fully working Vectorless: the retrieval engine,
+# a Postgres instance bundled in the same container, and the local web UI.
+# The only thing the user supplies is an LLM provider key.
+#
+#   docker run -p 8080:8080 -p 7654:7654 \
+#     -e VLE_LLM_ANTHROPIC_API_KEY=<your GLM key> \
+#     hallelx2/vectorless:latest
+#   # → UI:  http://localhost:8080
+#   # → API: http://localhost:7654
+#
+# Context: vectorless-engine/ directory.
+
+# ── Build stage ──────────────────────────────────────────────────────
+FROM golang:1.25-alpine AS build
+RUN apk add --no-cache ca-certificates
+WORKDIR /src
+COPY go.mod go.sum ./
+RUN go mod download
+COPY cmd/      ./cmd/
+COPY pkg/      ./pkg/
+COPY internal/ ./internal/
+ARG VERSION=dev
+RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
+    go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
+      -o /bin/engine ./cmd/engine
+
+# ── Runtime stage: Postgres base + python + engine + viewer ──────────
+FROM postgres:16-bookworm
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends python3 ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY --from=build /bin/engine /usr/local/bin/engine
+COPY localapp/ /opt/vectorless-app/
+COPY deploy/allinone/entrypoint.sh /usr/local/bin/vl-entrypoint.sh
+RUN chmod +x /usr/local/bin/vl-entrypoint.sh
+
+# Bundled Postgres credentials — must match engine --local's expected DSN
+# (postgres://vectorless:vectorless@localhost:5432/vectorless).
+ENV POSTGRES_USER=vectorless \
+    POSTGRES_PASSWORD=vectorless \
+    POSTGRES_DB=vectorless
+
+# Engine defaults: local mode, minimal ingest (fast, queryable in seconds),
+# document bytes under /data (mount a volume here to persist), and GLM via
+# z.ai's Anthropic-compatible gateway out of the box. Override any of these
+# with -e at runtime; the user still supplies VLE_LLM_ANTHROPIC_API_KEY.
+ENV VLE_INGEST_MODE=minimal \
+    VLE_STORAGE_LOCAL_ROOT=/data/documents \
+    VLE_LLM_DRIVER=anthropic \
+    VLE_LLM_ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic/v1 \
+    VLE_LLM_ANTHROPIC_MODEL=glm-4.6 \
+    VIEWER_PORT=8080 \
+    ENGINE_URL=http://localhost:7654 \
+    HOST=0.0.0.0
+
+EXPOSE 8080 7654
+VOLUME ["/data", "/var/lib/postgresql/data"]
+
+ENTRYPOINT ["/usr/local/bin/vl-entrypoint.sh"]
+
+LABEL org.opencontainers.image.title="vectorless (all-in-one)"
+LABEL org.opencontainers.image.description="Vectorless retrieval engine + bundled Postgres + web UI in one container. Reasoning-based document retrieval — no chunking, no embeddings, no vector DB."
+LABEL org.opencontainers.image.source="https://github.com/hallelx2/vectorless-engine"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.vendor="Vectorless"
diff --git a/cmd/engine/main.go b/cmd/engine/main.go
index 73d4e1a..3d61d21 100644
--- a/cmd/engine/main.go
+++ b/cmd/engine/main.go
@@ -105,7 +105,16 @@ func run() error {
 
 	llmClient, err := buildLLM(cfg.LLM)
 	if err != nil {
-		return fmt.Errorf("init llm: %w", err)
+		// A missing provider key is non-fatal in local mode: the bundled UI
+		// (and any caller) can supply credentials per request via X-LLM-*
+		// headers (BYOK), so boot without a shared client and let those
+		// requests build their own. Any other init error is still fatal.
+		if config.LocalModeEnabled() && llmKeyMissing(cfg.LLM) {
+			logger.Warn("no LLM provider key configured — queries require a per-request key (BYOK via X-LLM-Api-Key), or set VLE_LLM_ANTHROPIC_API_KEY")
+			llmClient = nil
+		} else {
+			return fmt.Errorf("init llm: %w", err)
+		}
 	}
 	strategy := buildStrategy(cfg.Retrieval, llmClient, store)
 
@@ -231,12 +240,16 @@ func run() error {
 	// (gated by retrieval.treewalk.enabled), even on a deployment
 	// using chunked-tree as its default selection path.
 	var treeWalkStrategy *retrieval.TreeWalkStrategy
-	if cfg.Retrieval.TreeWalk.Enabled && llmClient != nil {
+	if cfg.Retrieval.TreeWalk.Enabled {
+		// Built even when llmClient is nil (no server key): the per-request
+		// BYOK path sets the strategy's client from X-LLM-Api-Key headers,
+		// so the endpoint stays available for callers that bring their own key.
 		treeWalkStrategy = buildTreeWalkStrategy(cfg.Retrieval, llmClient, store)
 		logger.Info("retrieval: treewalk answer endpoint enabled",
 			"max_hops", treeWalkStrategy.MaxHops,
 			"page_content_limit", treeWalkStrategy.PageContentLimit,
 			"model_override", cfg.Retrieval.TreeWalk.Model,
+			"server_key", llmClient != nil,
 		)
 	}
 
@@ -260,6 +273,9 @@ func run() error {
 		Abstain:          cfg.Retrieval.Abstain,
 		TreeWalkStrategy: treeWalkStrategy,
 		TreeWalk:         cfg.Retrieval.TreeWalk,
+		BuildLLM: func(provider, apiKey, baseURL, model string) (llmgate.Client, error) {
+			return buildLLMFrom(cfg.LLM, provider, apiKey, baseURL, model)
+		},
 	}
 
 	srv := &http.Server{
@@ -399,6 +415,66 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) {
 	}
 }
 
+// buildLLMFrom constructs an llmgate client from caller-supplied
+// credentials (BYOK), inheriting the server's configured provider, base
+// URL, and model whenever a field is left empty. This backs the
+// per-request X-LLM-* headers so a user of the bundled UI can paste only
+// their API key and have everything else default to the engine's config.
+// llmKeyMissing reports whether the configured provider has no API key.
+// Used to keep local-mode boot non-fatal so per-request BYOK can work.
+func llmKeyMissing(c config.LLMConfig) bool {
+	switch c.Driver {
+	case "anthropic":
+		return c.Anthropic.APIKey == ""
+	case "openai":
+		return c.OpenAI.APIKey == ""
+	case "gemini":
+		return c.Gemini.APIKey == ""
+	}
+	return false
+}
+
+func buildLLMFrom(c config.LLMConfig, provider, apiKey, baseURL, model string) (llmgate.Client, error) {
+	if provider == "" {
+		provider = c.Driver
+	}
+	switch provider {
+	case "anthropic":
+		if model == "" {
+			model = c.Anthropic.Model
+		}
+		if baseURL == "" {
+			baseURL = c.Anthropic.BaseURL
+		}
+		return anthropic.New(anthropic.Config{
+			APIKey:         apiKey,
+			Model:          model,
+			ReasoningModel: c.Anthropic.ReasoningModel,
+			BaseURL:        baseURL,
+		})
+	case "openai":
+		if model == "" {
+			model = c.OpenAI.Model
+		}
+		return openai.New(openai.Config{
+			APIKey:         apiKey,
+			Model:          model,
+			ReasoningModel: c.OpenAI.ReasoningModel,
+		})
+	case "gemini":
+		if model == "" {
+			model = c.Gemini.Model
+		}
+		return gemini.New(gemini.Config{
+			APIKey:         apiKey,
+			Model:          model,
+			ReasoningModel: c.Gemini.ReasoningModel,
+		})
+	default:
+		return nil, fmt.Errorf("unknown llm provider: %s", provider)
+	}
+}
+
 func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy {
 	switch c.Strategy {
 	case "single-pass":
diff --git a/deploy/allinone/entrypoint.sh b/deploy/allinone/entrypoint.sh
new file mode 100644
index 0000000..d5669ee
--- /dev/null
+++ b/deploy/allinone/entrypoint.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# All-in-one entrypoint: Postgres + Vectorless engine + the local viewer UI,
+# all in one container. Postgres is bundled so `docker run` needs no external
+# services — the user only supplies an LLM provider key.
+set -euo pipefail
+
+PGUSER_="${POSTGRES_USER:-vectorless}"
+PGDB_="${POSTGRES_DB:-vectorless}"
+
+echo "[vectorless] starting bundled Postgres…"
+# The official postgres entrypoint handles first-run initdb (using the
+# POSTGRES_* env vars) and then execs postgres. Run it in the background so we
+# can start the engine + UI alongside it in the same container.
+docker-entrypoint.sh postgres &
+
+echo "[vectorless] waiting for Postgres to accept connections…"
+until pg_isready -h localhost -U "$PGUSER_" -d "$PGDB_" >/dev/null 2>&1; do
+  sleep 1
+done
+echo "[vectorless] Postgres ready."
+
+# Start the viewer UI (serves the single-page app + same-origin proxy to the
+# engine). Backgrounded; the engine is the container's main process.
+if [ -f /opt/vectorless-app/serve.py ]; then
+  echo "[vectorless] starting viewer UI on :${VIEWER_PORT:-8080} → ${ENGINE_URL:-http://localhost:7654}"
+  PYTHONIOENCODING=utf-8 python3 /opt/vectorless-app/serve.py &
+fi
+
+if [ -z "${VLE_LLM_ANTHROPIC_API_KEY:-}" ] && [ -z "${VLE_LLM_OPENAI_API_KEY:-}" ] && [ -z "${VLE_LLM_GEMINI_API_KEY:-}" ]; then
+  echo "[vectorless] WARNING: no LLM provider key set. Ingestion will work, but"
+  echo "[vectorless]          queries need e.g. -e VLE_LLM_ANTHROPIC_API_KEY=<your GLM key>"
+fi
+
+echo "[vectorless] starting engine (local mode) on :7654 …"
+# exec so the engine becomes PID 1's foreground process and receives signals.
+exec engine --local
diff --git a/internal/api/server.go b/internal/api/server.go
index c34fc14..d9eadf2 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -59,6 +59,13 @@ type Deps struct {
 	// LLMModel is the default model name. Per-request overrides win.
 	LLMModel string
 
+	// BuildLLM constructs a per-request llmgate client from caller-supplied
+	// credentials (BYOK), inheriting server defaults for any empty field.
+	// Wired in main.go. When set, callers can pass their own key/base_url/
+	// model via X-LLM-* request headers; nil disables per-request keys and
+	// handlers fall back to the shared LLM client. See resolveLLM.
+	BuildLLM func(provider, apiKey, baseURL, model string) (llmgate.Client, error)
+
 	// AnswerSpan / Answer hold the relevant config blocks. Default
 	// values (AnswerSpan disabled, Answer.MaxSections=5) are safe.
 	AnswerSpan config.AnswerSpanBlock
@@ -140,6 +147,7 @@ func Router(d Deps) http.Handler {
 			r.Get("/{id}", d.handleGetDocument)
 			r.Delete("/{id}", d.handleDeleteDocument)
 			r.Get("/{id}/tree", d.handleGetTree)
+			r.Get("/{id}/source", d.handleGetSource)
 		})
 
 		r.Get("/sections/{id}", d.handleGetSection)
@@ -364,6 +372,50 @@ func (d Deps) handleDeleteDocument(w http.ResponseWriter, r *http.Request) {
 	w.WriteHeader(http.StatusNoContent)
 }
 
+// handleGetSource streams the original uploaded bytes for a document.
+// Useful for clients that want to render the source (e.g. a PDF page
+// preview in a viewer) without a second storage system. Served inline
+// with the document's content type.
+func (d Deps) handleGetSource(w http.ResponseWriter, r *http.Request) {
+	id := tree.DocumentID(chi.URLParam(r, "id"))
+	doc, err := d.DB.GetDocument(r.Context(), id, standaloneOrgID, "")
+	if err != nil {
+		if errors.Is(err, db.ErrNotFound) {
+			writeErr(w, http.StatusNotFound, "document not found")
+			return
+		}
+		writeErr(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	if doc.SourceRef == "" {
+		writeErr(w, http.StatusNotFound, "document has no stored source")
+		return
+	}
+	rc, meta, err := d.Storage.Get(r.Context(), doc.SourceRef)
+	if err != nil {
+		if errors.Is(err, storage.ErrNotFound) {
+			writeErr(w, http.StatusNotFound, "source object not found")
+			return
+		}
+		writeErr(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	defer func() { _ = rc.Close() }()
+
+	ct := doc.ContentType
+	if ct == "" {
+		ct = "application/octet-stream"
+	}
+	w.Header().Set("Content-Type", ct)
+	if meta.Size > 0 {
+		w.Header().Set("Content-Length", strconv.FormatInt(meta.Size, 10))
+	}
+	w.Header().Set("Content-Disposition", "inline")
+	w.Header().Set("Cache-Control", "private, max-age=300")
+	w.WriteHeader(http.StatusOK)
+	_, _ = io.Copy(w, rc)
+}
+
 func (d Deps) handleGetTree(w http.ResponseWriter, r *http.Request) {
 	id := tree.DocumentID(chi.URLParam(r, "id"))
 	t, err := d.DB.LoadTree(r.Context(), id, standaloneOrgID, "")
diff --git a/internal/api/treewalk.go b/internal/api/treewalk.go
index 6bd895a..9337db2 100644
--- a/internal/api/treewalk.go
+++ b/internal/api/treewalk.go
@@ -78,8 +78,32 @@ type treeWalkAnswerRequest struct {
 //	  "stream"?: false, "reasoning"?: false }
 //
 // Response: see treeWalkAnswerResponse below.
+// resolveLLM picks the llmgate client for this request. When the caller
+// supplies BYOK credentials via the X-LLM-Api-Key header (optionally
+// X-LLM-Provider / X-LLM-Base-Url / X-LLM-Model) and a BuildLLM factory is
+// wired, it builds a per-request client inheriting server defaults for any
+// empty field; otherwise it returns the shared client. The returned model
+// string is the header-supplied model override (may be "").
+func (d Deps) resolveLLM(r *http.Request) (llmgate.Client, string, error) {
+	model := r.Header.Get("X-LLM-Model")
+	key := r.Header.Get("X-LLM-Api-Key")
+	if key == "" || d.BuildLLM == nil {
+		return d.LLM, model, nil
+	}
+	c, err := d.BuildLLM(
+		r.Header.Get("X-LLM-Provider"),
+		key,
+		r.Header.Get("X-LLM-Base-Url"),
+		model,
+	)
+	if err != nil {
+		return nil, model, err
+	}
+	return c, model, nil
+}
+
 func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) {
-	if d.LLM == nil {
+	if d.LLM == nil && d.BuildLLM == nil {
 		writeErr(w, http.StatusNotImplemented, "answer/treewalk endpoint requires an LLM client")
 		return
 	}
@@ -126,9 +150,29 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) {
 	if body.MaxPagesPerFetch > 0 {
 		perReq.PageContentLimit = body.MaxPagesPerFetch
 	}
+	// BYOK: if the caller supplies their own LLM credentials via X-LLM-*
+	// headers, build a per-request client and run BOTH the navigation loop
+	// and citation span-extraction through it. dReq is a value copy of Deps
+	// with the per-request client swapped in — Deps is passed by value so
+	// this never mutates the shared instance other goroutines read.
+	client, hdrModel, err := d.resolveLLM(r)
+	if err != nil {
+		writeErr(w, http.StatusBadRequest, "invalid LLM credentials: "+err.Error())
+		return
+	}
+	if client == nil {
+		writeErr(w, http.StatusBadRequest, "no LLM credentials: configure a server key or send an X-LLM-Api-Key header (BYOK)")
+		return
+	}
+	perReq.LLM = client
+	dReq := d
+	dReq.LLM = client
+	if body.Model == "" {
+		body.Model = hdrModel
+	}
+
 	// Per-request model override falls through to budget.ModelName
 	// the same way every other handler does.
-
 	budget := retrieval.ContextBudget{ModelName: body.Model}
 	if budget.ModelName == "" {
 		budget.ModelName = d.LLMModel
@@ -139,7 +183,7 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) {
 	// Stream variant: hijack the response writer for SSE and emit
 	// one event per tool call.
 	if body.Stream {
-		d.serveAnswerTreeWalkStream(w, r, &perReq, t, body, budget, started)
+		dReq.serveAnswerTreeWalkStream(w, r, &perReq, t, body, budget, started)
 		return
 	}
 
@@ -164,7 +208,7 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	citations := d.buildTreeWalkCitations(r.Context(), t, res, body.Query, body.Model)
+	citations := dReq.buildTreeWalkCitations(r.Context(), t, res, body.Query, body.Model)
 
 	resp := map[string]any{
 		"document_id": body.DocumentID,
diff --git a/internal/config/config.go b/internal/config/config.go
index 53d2950..72000ba 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -346,9 +346,9 @@ func applyEnvOverrides(c *Config) {
 		}
 	}
 	// Anthropic-compatible gateway overrides (e.g. GLM/Zhipu via
-	// https://api.z.ai/api/anthropic): base URL + model, so the
-	// anthropic driver can run a non-Anthropic model without a secret
-	// edit.
+	// https://api.z.ai/api/anthropic/v1 — the /v1 is required, the client
+	// posts to ${base}/messages): base URL + model, so the anthropic
+	// driver can run a non-Anthropic model without a secret edit.
 	if v := firstEnv("VLS_LLM_ANTHROPIC_BASE_URL", "VLE_LLM_ANTHROPIC_BASE_URL"); v != "" {
 		c.Engine.LLM.Anthropic.BaseURL = v
 	}
diff --git a/localapp/README.md b/localapp/README.md
new file mode 100644
index 0000000..94bc682
--- /dev/null
+++ b/localapp/README.md
@@ -0,0 +1,44 @@
+# Vectorless — local viewer
+
+A tiny, dependency-free local UI for the OSS `vectorless-engine`. Upload a PDF,
+watch it ingest into a structured tree, browse the section map, and ask
+questions that come back with **cited** answers (page range + verbatim quote) —
+answered by whatever model the engine is configured with (here: GLM-4.6 via
+z.ai's Anthropic-compatible gateway).
+
+This is the minimal slice of **HAL-188** (local dashboard). It is intentionally
+small: a single `index.html` + a stdlib Python proxy. No build step, no Node.
+
+## Why the proxy
+The `engine --local` binary emits **no CORS headers**, so a browser page can't
+call `http://localhost:7654` cross-origin. `serve.py` serves the page **and**
+reverse-proxies `/engine/*` to the engine, so every request is same-origin.
+
+## Run
+
+```bash
+# 1. Start the engine (from vectorless-engine/), local mode + your GLM key:
+cd ../vectorless-engine
+set -a; . ./.env; set +a          # GLM key + base_url (.../api/anthropic/v1) + glm-4.6
+export VLE_INGEST_MODE=minimal
+./bin/engine.exe --local           # listens on :7654
+
+# 2. Start the viewer (from this folder):
+cd ../local-viewer
+python serve.py                    # http://localhost:7655
+```
+
+Then open **http://localhost:7655** and:
+1. Drop a PDF (e.g. a FinanceBench 10-K) onto **Upload**.
+2. Watch it move to **ready** in the **Documents** list; click it.
+3. Inspect the **Structure map** (section tree + page ranges).
+4. Type a question in **Ask** → get a cited answer with confidence, hops, and cost.
+
+## Config
+- `ENGINE_URL` (default `http://localhost:7654`) — where the engine listens.
+- `VIEWER_PORT` (default `7655`) — the viewer's port.
+
+## Endpoints it uses
+`GET /v1/health` · `GET /v1/documents` · `POST /v1/documents` (multipart) ·
+`GET /v1/documents/{id}` · `GET /v1/documents/{id}/tree` ·
+`POST /v1/answer/treewalk`.
diff --git a/localapp/index.html b/localapp/index.html
new file mode 100644
index 0000000..dc4197c
--- /dev/null
+++ b/localapp/index.html
@@ -0,0 +1,505 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<title>Vectorless · local</title>
+<link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'%3E%3Crect width='24' height='24' rx='6' fill='%231456F0'/%3E%3Cpath d='M4 4 L12 20 L20 4' stroke='white' stroke-width='3' stroke-linecap='round' stroke-linejoin='round' fill='none'/%3E%3Ccircle cx='12' cy='20' r='2' fill='%23EA5EC1'/%3E%3C/svg%3E" />
+<link rel="preconnect" href="https://fonts.googleapis.com" />
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+<link href="https://fonts.googleapis.com/css2?family=Geist:wght@300;400;500;600&family=Geist+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet" />
+<script src="/vendor/pdf.min.js"></script>
+<style>
+  :root{
+    --canvas:#FCFCFD; --ink:#0A0A0A; --secondary:#3F3F46; --muted:#71717A;
+    --hairline:#E5E7EB; --hairline2:#EFEFF1; --blue:#1456F0; --pink:#EA5EC1;
+    --panel:#FFFFFF; --ok:#16A34A; --warn:#D97706; --err:#DC2626; --rail:#FAFAFB;
+  }
+  *{box-sizing:border-box}
+  html,body{margin:0;height:100%}
+  body{background:var(--canvas);color:var(--ink);font-family:"Geist",system-ui,sans-serif;font-weight:300;
+    line-height:1.5;-webkit-font-smoothing:antialiased;font-size:14px;display:flex;flex-direction:column;height:100vh;overflow:hidden}
+  .eyebrow{font-family:"Geist Mono",monospace;font-size:10.5px;font-weight:500;letter-spacing:.16em;text-transform:uppercase;color:var(--muted)}
+  .grad{background:linear-gradient(92deg,var(--blue),var(--pink));-webkit-background-clip:text;background-clip:text;color:transparent}
+  .serif{font-family:"Instrument Serif",Georgia,serif;font-style:italic;font-weight:400}
+  a{color:var(--blue);text-decoration:none}
+
+  nav{display:flex;align-items:center;gap:11px;padding:13px 22px;border-bottom:1px solid var(--hairline);flex:0 0 auto}
+  .wordmark{font-weight:500;letter-spacing:-.02em;font-size:16px}
+  .nav-meta{margin-left:14px;font-size:12px;color:var(--muted)}
+  .nav-meta b{color:var(--secondary);font-weight:500}
+  .status{margin-left:auto;display:flex;align-items:center;gap:7px;font-size:12px;color:var(--muted)}
+  .dot{width:8px;height:8px;border-radius:50%;background:var(--muted);transition:background .3s}
+  .dot.up{background:var(--ok)} .dot.down{background:var(--err)}
+  .navbtn{display:flex;align-items:center;gap:5px;background:none;border:1px solid var(--hairline);border-radius:8px;
+    padding:5px 8px;cursor:pointer;color:var(--secondary);transition:border-color .12s,color .12s}
+  .navbtn:hover{border-color:var(--blue);color:var(--blue)}
+  .keydot{width:7px;height:7px;border-radius:50%;background:var(--err)}
+  .keydot.set{background:var(--ok)}
+  /* modal */
+  .modal-bg{position:fixed;inset:0;background:rgba(10,10,10,.35);display:none;align-items:center;justify-content:center;z-index:50}
+  .modal-bg.open{display:flex}
+  .modal{background:var(--panel);border:1px solid var(--hairline);border-radius:16px;width:440px;max-width:92vw;
+    box-shadow:0 20px 60px rgba(0,0,0,.18);overflow:hidden}
+  .modal .mhead{padding:18px 22px 14px;border-bottom:1px solid var(--hairline2)}
+  .modal .mhead h3{margin:0;font-size:15px;font-weight:500} .modal .mhead p{margin:5px 0 0;font-size:12.5px;color:var(--muted)}
+  .modal .mbody{padding:18px 22px;display:flex;flex-direction:column;gap:14px}
+  .field label{display:block;font-size:12px;font-weight:500;color:var(--secondary);margin-bottom:5px}
+  .field input,.field select{width:100%;font-family:inherit;font-size:13.5px;color:var(--ink);background:var(--canvas);
+    border:1px solid var(--hairline);border-radius:9px;padding:9px 11px}
+  .field input:focus,.field select:focus{outline:none;border-color:var(--blue);box-shadow:0 0 0 3px rgba(20,86,240,.08)}
+  .field .fhint{font-size:11px;color:var(--muted);margin-top:4px}
+  .modal .mfoot{padding:14px 22px;border-top:1px solid var(--hairline2);display:flex;gap:10px;justify-content:flex-end;align-items:center}
+  .modal .mfoot .spacer{margin-right:auto;font-size:12px}
+
+  .shell{display:grid;grid-template-columns:300px 1fr;flex:1 1 auto;min-height:0}
+  @media(max-width:860px){.shell{grid-template-columns:1fr}}
+
+  aside{border-right:1px solid var(--hairline);background:var(--rail);overflow-y:auto;padding:18px}
+  .sec-label{font-family:"Geist Mono",monospace;font-size:10px;font-weight:500;letter-spacing:.12em;text-transform:uppercase;
+    color:var(--muted);margin:0 0 11px;display:flex;align-items:center;gap:7px}
+  .sec-label .ct{margin-left:auto;color:var(--muted);font-weight:400}
+  .drop{display:block;border:1.5px dashed var(--hairline);border-radius:11px;padding:22px 14px;text-align:center;cursor:pointer;
+    transition:border-color .15s,background .15s;background:var(--panel)}
+  .drop:hover,.drop.over{border-color:var(--blue);background:rgba(20,86,240,.035)}
+  .drop .big{font-size:13px;color:var(--secondary)} .drop .small{font-size:11px;color:var(--muted);margin-top:4px}
+  input[type=file]{display:none}
+  #upmsg{font-size:12px;color:var(--muted);margin-top:9px;min-height:16px}
+  .docs{margin-top:22px}
+  .doc{display:flex;gap:9px;align-items:center;padding:9px;border:1px solid transparent;border-radius:9px;cursor:pointer;
+    transition:background .12s,border-color .12s}
+  .doc:hover{background:var(--panel);border-color:var(--hairline2)}
+  .doc.active{border-color:var(--blue);background:var(--panel);box-shadow:0 0 0 1px rgba(20,86,240,.12)}
+  .doc .ic{width:22px;height:22px;border-radius:6px;background:rgba(20,86,240,.08);display:grid;place-items:center;flex:0 0 auto}
+  .doc .t{font-size:13px;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;color:var(--ink)}
+  .doc .st{margin-left:auto;flex:0 0 auto}
+  .doc .del{flex:0 0 auto;opacity:0;border:none;background:none;color:var(--muted);cursor:pointer;font-size:15px;padding:0 2px;line-height:1;transition:opacity .12s,color .12s}
+  .doc:hover .del{opacity:1} .doc .del:hover{color:var(--err)}
+  .badge{font-family:"Geist Mono",monospace;font-size:9.5px;font-weight:500;padding:2px 7px;border-radius:20px;text-transform:uppercase;letter-spacing:.04em}
+  .badge.ready{color:var(--ok);background:rgba(22,163,74,.1)} .badge.work{color:var(--warn);background:rgba(217,119,6,.1)} .badge.fail{color:var(--err);background:rgba(220,38,38,.1)}
+  .rail-empty{font-size:12.5px;color:var(--muted);padding:6px 2px}
+
+  main{overflow-y:auto;padding:28px 36px;min-width:0}
+  .wrap{max-width:1080px;margin:0 auto}
+
+  .welcome{padding:34px 0 24px;max-width:780px}
+  .welcome h1{font-weight:500;letter-spacing:-.025em;font-size:34px;line-height:1.12;margin:12px 0 0}
+  .welcome p{color:var(--secondary);font-size:16px;max-width:54ch;margin:14px 0 0}
+  .steps{display:flex;gap:14px;margin-top:30px;flex-wrap:wrap}
+  .step{flex:1 1 150px;border:1px solid var(--hairline);border-radius:12px;padding:16px;background:var(--panel)}
+  .step .n{font-family:"Geist Mono",monospace;font-size:11px;color:var(--blue);font-weight:500}
+  .step .h{font-size:13.5px;font-weight:500;margin:7px 0 3px} .step .d{font-size:12.5px;color:var(--muted)}
+
+  .doc-head{display:flex;align-items:baseline;gap:14px;flex-wrap:wrap;padding-bottom:4px}
+  .doc-head h1{font-weight:500;letter-spacing:-.02em;font-size:23px;margin:0}
+  .doc-head .facts{font-size:12.5px;color:var(--muted)} .doc-head .facts b{color:var(--secondary);font-weight:500}
+
+  .card{background:var(--panel);border:1px solid var(--hairline);border-radius:15px;margin-top:18px;overflow:hidden}
+  .card .cap{display:flex;align-items:center;gap:8px;padding:13px 18px;border-bottom:1px solid var(--hairline2)}
+  .card .cap .ttl{font-size:12.5px;font-weight:500} .card .cap .hint{margin-left:auto;font-size:11.5px;color:var(--muted)}
+  .card .body{padding:16px 18px}
+  textarea{width:100%;font-family:inherit;font-size:15px;font-weight:300;color:var(--ink);background:var(--canvas);
+    border:1px solid var(--hairline);border-radius:11px;padding:12px 14px;resize:vertical;min-height:64px;line-height:1.5}
+  textarea:focus{outline:none;border-color:var(--blue);box-shadow:0 0 0 3px rgba(20,86,240,.08)}
+  .ask-row{display:flex;gap:11px;align-items:center;margin-top:11px}
+  .btn{font-family:inherit;font-size:13.5px;font-weight:400;border-radius:10px;border:1px solid var(--ink);background:var(--ink);
+    color:#fff;padding:9px 18px;cursor:pointer;transition:opacity .15s,transform .05s}
+  .btn:hover{opacity:.88} .btn:active{transform:translateY(1px)} .btn:disabled{opacity:.35;cursor:not-allowed}
+  .askhint{font-size:12px;color:var(--muted)}
+  .examples{display:flex;gap:7px;flex-wrap:wrap;margin-top:12px}
+  .ex{font-size:12px;color:var(--secondary);background:var(--canvas);border:1px solid var(--hairline);border-radius:20px;
+    padding:5px 11px;cursor:pointer;transition:border-color .12s,color .12s}
+  .ex:hover{border-color:var(--blue);color:var(--blue)}
+
+  /* result */
+  #result:empty{display:none}
+  .res{margin-top:16px;animation:fade .25s ease}
+  @keyframes fade{from{opacity:0;transform:translateY(4px)}to{opacity:1;transform:none}}
+  .lbl{font-family:"Geist Mono",monospace;font-size:10px;letter-spacing:.08em;text-transform:uppercase;color:var(--muted);margin-bottom:8px}
+  .answer-text{font-size:17px;line-height:1.6;color:var(--ink);font-weight:400}
+  .chips{display:flex;gap:7px;flex-wrap:wrap;margin-top:14px}
+  .chip{font-family:"Geist Mono",monospace;font-size:11px;color:var(--secondary);background:var(--canvas);border:1px solid var(--hairline);border-radius:20px;padding:3px 9px}
+  .chip.good{color:var(--ok);border-color:rgba(22,163,74,.3)}
+
+  .split{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-top:16px}
+  @media(max-width:900px){.split{grid-template-columns:1fr}}
+
+  .cite{padding:10px 0;border-top:1px dashed var(--hairline);cursor:pointer;transition:background .1s}
+  .cite:first-of-type{border-top:none} .cite:hover{background:rgba(20,86,240,.02)}
+  .cite.sel{background:rgba(20,86,240,.04)}
+  .cite .pg{font-family:"Geist Mono",monospace;font-size:11px;color:var(--blue);font-weight:500;display:flex;align-items:center;gap:6px}
+  .cite .q{font-size:13px;color:var(--secondary);margin-top:3px;line-height:1.5} .cite .q::before{content:"“"} .cite .q::after{content:"”"}
+
+  .sec-item{padding:11px 0;border-top:1px solid var(--hairline2)} .sec-item:first-of-type{border-top:none}
+  .sec-item .h{display:flex;align-items:center;gap:8px}
+  .sec-item .title{font-size:13.5px;font-weight:500;color:var(--ink)}
+  .sec-item .pg{margin-left:auto;font-family:"Geist Mono",monospace;font-size:10px;color:var(--muted)}
+  .sec-item .snip{font-size:12.5px;color:var(--muted);margin-top:5px;line-height:1.5;max-height:60px;overflow:hidden}
+
+  /* pdf preview */
+  .preview .cap .nav{margin-left:auto;display:flex;align-items:center;gap:8px}
+  .pvbtn{border:1px solid var(--hairline);background:var(--panel);border-radius:7px;width:26px;height:24px;cursor:pointer;
+    color:var(--secondary);font-size:13px;line-height:1;display:grid;place-items:center;transition:border-color .12s}
+  .pvbtn:hover{border-color:var(--blue);color:var(--blue)} .pvbtn:disabled{opacity:.35;cursor:not-allowed}
+  .pvpage{font-family:"Geist Mono",monospace;font-size:11px;color:var(--muted)}
+  .pvwrap{background:#F4F4F5;display:grid;place-items:center;padding:14px;min-height:280px;max-height:560px;overflow:auto}
+  .pvwrap canvas{box-shadow:0 1px 8px rgba(0,0,0,.12);border-radius:3px;max-width:100%;height:auto}
+  .pvmsg{color:var(--muted);font-size:12.5px}
+
+  .tree{max-height:300px;overflow:auto;font-size:13px}
+  .node{display:flex;gap:8px;padding:3.5px 0;color:var(--secondary)} .node:hover{color:var(--ink)}
+  .node .pg{margin-left:auto;font-family:"Geist Mono",monospace;font-size:10px;color:var(--muted)} .node.d0{font-weight:500;color:var(--ink);margin-top:3px}
+
+  .spin{display:inline-block;width:13px;height:13px;border:2px solid var(--hairline);border-top-color:var(--blue);border-radius:50%;animation:sp .7s linear infinite;vertical-align:-2px;margin-right:7px}
+  @keyframes sp{to{transform:rotate(360deg)}}
+  .empty{color:var(--muted);font-size:12.5px;padding:6px 0} .err{color:var(--err);font-size:13px}
+</style>
+</head>
+<body>
+<nav>
+  <svg width="25" height="25" viewBox="0 0 24 24" fill="none" aria-label="Vectorless">
+    <rect width="24" height="24" rx="6" fill="#1456F0"/>
+    <path d="M4 4 L12 20 L20 4" stroke="#fff" stroke-width="3" stroke-linecap="round" stroke-linejoin="round" fill="none"/>
+    <circle cx="12" cy="20" r="2" fill="#EA5EC1"/>
+  </svg>
+  <span class="wordmark">Vectorless</span>
+  <span class="nav-meta">local engine · <b id="modelName">glm-4.6</b> · treewalk</span>
+  <button id="settingsBtn" class="navbtn" title="API key & model settings">
+    <svg width="15" height="15" viewBox="0 0 24 24" fill="none"><path d="M12 15a3 3 0 100-6 3 3 0 000 6z" stroke="currentColor" stroke-width="2"/><path d="M19.4 15a1.65 1.65 0 00.33 1.82l.06.06a2 2 0 11-2.83 2.83l-.06-.06a1.65 1.65 0 00-1.82-.33 1.65 1.65 0 00-1 1.51V21a2 2 0 11-4 0v-.09A1.65 1.65 0 008 19.4a1.65 1.65 0 00-1.82.33l-.06.06a2 2 0 11-2.83-2.83l.06-.06a1.65 1.65 0 00.33-1.82 1.65 1.65 0 00-1.51-1H2a2 2 0 110-4h.09A1.65 1.65 0 004.6 8a1.65 1.65 0 00-.33-1.82l-.06-.06a2 2 0 112.83-2.83l.06.06a1.65 1.65 0 001.82.33H9a1.65 1.65 0 001-1.51V2a2 2 0 114 0v.09a1.65 1.65 0 001 1.51 1.65 1.65 0 001.82-.33l.06-.06a2 2 0 112.83 2.83l-.06.06a1.65 1.65 0 00-.33 1.82V9a1.65 1.65 0 001.51 1H22a2 2 0 110 4h-.09a1.65 1.65 0 00-1.51 1z" stroke="currentColor" stroke-width="2" stroke-linejoin="round"/></svg>
+    <span id="keyStatus" class="keydot"></span>
+  </button>
+  <span class="status"><span id="hdot" class="dot"></span><span id="hstat">checking…</span></span>
+</nav>
+
+<div class="shell">
+  <aside>
+    <div class="sec-label">Upload</div>
+    <label class="drop" id="drop">
+      <div class="big">Drop a PDF, or <u>browse</u></div>
+      <div class="small">PDF · DOCX · HTML · MD · TXT</div>
+      <input type="file" id="file" accept=".pdf,.docx,.html,.htm,.md,.txt" />
+    </label>
+    <div id="upmsg"></div>
+    <div class="docs">
+      <div class="sec-label">Documents <span class="ct" id="docct"></span></div>
+      <div id="docs"><div class="rail-empty">Loading…</div></div>
+    </div>
+  </aside>
+
+  <main>
+    <div class="wrap">
+      <section id="welcome" class="welcome">
+        <div class="eyebrow">Reasoning-based retrieval</div>
+        <h1>Ask a document. Get a <span class="grad">cited answer</span>.</h1>
+        <p>Upload a PDF — the engine parses it into a structured tree and an LLM navigates that
+           structure to answer, <span class="serif">no chunking, no embeddings, no vectors</span>.</p>
+        <div class="steps">
+          <div class="step"><div class="n">01</div><div class="h">Upload</div><div class="d">Drop a PDF in the left rail.</div></div>
+          <div class="step"><div class="n">02</div><div class="h">Ingest</div><div class="d">Parsed into a section tree in seconds.</div></div>
+          <div class="step"><div class="n">03</div><div class="h">Ask</div><div class="d">Cited answers — page, quote & preview.</div></div>
+        </div>
+      </section>
+
+      <section id="workspace" style="display:none">
+        <div class="doc-head"><h1 id="wsTitle"></h1><span class="facts" id="wsFacts"></span></div>
+
+        <div class="card">
+          <div class="cap"><span class="ttl">Ask</span><span class="hint">answered via treewalk · ⌘/Ctrl+Enter</span></div>
+          <div class="body">
+            <textarea id="q" placeholder="e.g. What were the company's capital expenditures in fiscal year 2018?"></textarea>
+            <div class="ask-row"><button class="btn" id="ask">Ask</button><span class="askhint" id="askhint"></span></div>
+            <div class="examples" id="examples"></div>
+            <div id="result"></div>
+          </div>
+        </div>
+
+        <div class="card">
+          <div class="cap"><span class="ttl">Structure map</span><span class="hint" id="treeHint"></span></div>
+          <div class="body" style="padding-top:10px"><div id="tree"><div class="empty">Loading…</div></div></div>
+        </div>
+      </section>
+    </div>
+  </main>
+</div>
+
+<div class="modal-bg" id="settingsModal">
+  <div class="modal">
+    <div class="mhead">
+      <h3>Model & API key</h3>
+      <p>Your key is stored in this browser only and sent with each query as a request header — never persisted on the server.</p>
+    </div>
+    <div class="mbody">
+      <div class="field"><label>Provider</label>
+        <select id="setProvider">
+          <option value="anthropic">Anthropic-compatible (GLM / Z.ai, Claude)</option>
+          <option value="openai">OpenAI</option>
+          <option value="gemini">Gemini</option>
+        </select>
+      </div>
+      <div class="field"><label>API key</label>
+        <input id="setKey" type="password" placeholder="paste your API key" autocomplete="off" />
+        <div class="fhint">e.g. your GLM key from z.ai. Required to ask questions.</div>
+      </div>
+      <div class="field"><label>Base URL <span style="color:var(--muted)">(optional)</span></label>
+        <input id="setBase" type="text" placeholder="https://api.z.ai/api/anthropic/v1" />
+        <div class="fhint">Leave blank to use the server default. For GLM it must end in <code>/v1</code>.</div>
+      </div>
+      <div class="field"><label>Model <span style="color:var(--muted)">(optional)</span></label>
+        <input id="setModel" type="text" placeholder="glm-4.6" />
+      </div>
+    </div>
+    <div class="mfoot">
+      <span class="spacer" id="setStatus"></span>
+      <button class="btn ghost" id="setCancel">Cancel</button>
+      <button class="btn" id="setSave">Save</button>
+    </div>
+  </div>
+</div>
+
+<script>
+if(window.pdfjsLib) pdfjsLib.GlobalWorkerOptions.workerSrc = "/vendor/pdf.worker.min.js";
+
+// ---- BYOK settings (browser-local) ----
+const LS="vl_llm_settings";
+const DEFAULTS={provider:"anthropic",baseUrl:"https://api.z.ai/api/anthropic/v1",model:"glm-4.6",apiKey:""};
+function getSettings(){ try{ return {...DEFAULTS, ...JSON.parse(localStorage.getItem(LS)||"{}")}; }catch{ return {...DEFAULTS}; } }
+function saveSettings(s){ localStorage.setItem(LS, JSON.stringify(s)); refreshKeyStatus(); }
+function llmHeaders(){ const s=getSettings(); const h={}; if(s.apiKey){ h["X-LLM-Api-Key"]=s.apiKey;
+  if(s.provider) h["X-LLM-Provider"]=s.provider; if(s.baseUrl) h["X-LLM-Base-Url"]=s.baseUrl; if(s.model) h["X-LLM-Model"]=s.model; } return h; }
+function refreshKeyStatus(){ const s=getSettings(); const dot=document.getElementById("keyStatus");
+  dot.className="keydot"+(s.apiKey?" set":""); dot.title=s.apiKey?`key set · ${s.model||"default model"}`:"no API key — click to configure";
+  if(s.model) document.getElementById("modelName").textContent=s.model; }
+function openSettings(){ const s=getSettings();
+  document.getElementById("setProvider").value=s.provider; document.getElementById("setKey").value=s.apiKey;
+  document.getElementById("setBase").value=s.baseUrl===DEFAULTS.baseUrl?"":s.baseUrl;
+  document.getElementById("setModel").value=s.model===DEFAULTS.model?"":s.model;
+  document.getElementById("setStatus").textContent=""; document.getElementById("settingsModal").classList.add("open"); }
+function closeSettings(){ document.getElementById("settingsModal").classList.remove("open"); }
+document.getElementById("settingsBtn").onclick=openSettings;
+document.getElementById("setCancel").onclick=closeSettings;
+document.getElementById("settingsModal").onclick=e=>{ if(e.target.id==="settingsModal") closeSettings(); };
+document.getElementById("setSave").onclick=()=>{
+  saveSettings({ provider:document.getElementById("setProvider").value,
+    apiKey:document.getElementById("setKey").value.trim(),
+    baseUrl:document.getElementById("setBase").value.trim()||DEFAULTS.baseUrl,
+    model:document.getElementById("setModel").value.trim()||DEFAULTS.model });
+  closeSettings();
+};
+const E = p => "/engine" + p;
+let activeDoc=null, pollTimer=null, pdfDoc=null, pdfPage=1, pdfRange=[1,1];
+const EXAMPLES=["What were the capital expenditures in fiscal year 2018?","What was total net sales for the year?","What was total debt at year-end?","Summarize the company's business segments."];
+
+async function health(){
+  try{ const r=await fetch(E("/v1/health"));
+    document.getElementById("hdot").className="dot "+(r.ok?"up":"down");
+    document.getElementById("hstat").textContent=r.ok?"online · :7654":"error";
+  }catch{ document.getElementById("hdot").className="dot down"; document.getElementById("hstat").textContent="offline"; }
+}
+function badge(st){const c=st==="ready"?"ready":(st==="failed"?"fail":"work");return `<span class="badge ${c}">${st}</span>`;}
+const fileSvg=`<svg width="11" height="11" viewBox="0 0 24 24" fill="none"><path d="M14 3v5h5M14 3H7a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V8l-5-5z" stroke="#1456F0" stroke-width="2" stroke-linejoin="round"/></svg>`;
+
+async function loadDocs(){
+  const box=document.getElementById("docs");
+  try{
+    const r=await fetch(E("/v1/documents?limit=100")); const d=await r.json(); const items=d.items||[];
+    document.getElementById("docct").textContent=items.length||"";
+    if(!items.length){ box.innerHTML=`<div class="rail-empty">No documents yet.<br>Upload one above.</div>`; return; }
+    box.innerHTML=items.map(it=>`
+      <div class="doc ${activeDoc&&activeDoc.id===it.id?"active":""}" data-id="${it.id}" data-title="${esc(it.title)}" data-status="${it.status}" data-ct="${esc(it.content_type||'')}">
+        <span class="ic">${fileSvg}</span><span class="t" title="${esc(it.title)}">${esc(it.title||it.id)}</span>
+        <span class="st">${badge(it.status)}</span><button class="del" title="Delete" data-del="${it.id}">×</button>
+      </div>`).join("");
+    box.querySelectorAll(".doc").forEach(el=>el.addEventListener("click",ev=>{ if(ev.target.dataset.del) return;
+      selectDoc({id:el.dataset.id,title:el.dataset.title,status:el.dataset.status,ct:el.dataset.ct}); }));
+    box.querySelectorAll(".del").forEach(b=>b.addEventListener("click",ev=>{ev.stopPropagation();delDoc(b.dataset.del);}));
+  }catch{ box.innerHTML=`<div class="err">Failed to load.</div>`; }
+}
+async function delDoc(id){ await fetch(E(`/v1/documents/${id}`),{method:"DELETE"});
+  if(activeDoc&&activeDoc.id===id){ activeDoc=null; document.getElementById("welcome").style.display=""; document.getElementById("workspace").style.display="none"; } loadDocs(); }
+
+async function selectDoc(doc){
+  activeDoc=doc; pdfDoc=null;
+  document.querySelectorAll(".doc").forEach(el=>el.classList.toggle("active",el.dataset.id===doc.id));
+  document.getElementById("welcome").style.display="none";
+  document.getElementById("workspace").style.display="";
+  document.getElementById("wsTitle").textContent=doc.title||doc.id;
+  document.getElementById("result").innerHTML="";
+  const ready=doc.status==="ready";
+  document.getElementById("ask").disabled=!ready;
+  document.getElementById("askhint").textContent=ready?"":`Document is “${doc.status}” — wait until ready.`;
+  document.getElementById("examples").innerHTML=ready?EXAMPLES.map(e=>`<span class="ex">${esc(e)}</span>`).join(""):"";
+  document.querySelectorAll(".ex").forEach(x=>x.onclick=()=>{document.getElementById("q").value=x.textContent;document.getElementById("q").focus();});
+  loadTree(doc.id);
+}
+
+async function loadTree(id){
+  const box=document.getElementById("tree"); box.innerHTML=`<div class="empty"><span class="spin"></span>Loading structure…</div>`;
+  try{
+    const r=await fetch(E(`/v1/documents/${id}/tree`)); const d=await r.json();
+    const secs=(d.sections||[]).filter(s=>s.title); const pages=secs.reduce((m,s)=>Math.max(m,s.page_end||0),0);
+    document.getElementById("wsFacts").innerHTML=`<b>${secs.length}</b> sections${pages?` · pp. 1–<b>${pages}</b>`:""}`;
+    document.getElementById("treeHint").textContent=`${secs.length} sections`;
+    if(!secs.length){ box.innerHTML=`<div class="empty">No sections.</div>`; return; }
+    box.innerHTML=`<div class="tree">`+secs.slice(0,400).map(s=>{
+      const indent=Math.min(s.depth||0,6)*15; const pg=s.page_start?`p${s.page_start}${s.page_end&&s.page_end!==s.page_start?"–"+s.page_end:""}`:"";
+      return `<div class="node d${Math.min(s.depth||0,1)}" style="padding-left:${indent}px"><span>${esc(s.title)}</span>${pg?`<span class="pg">${pg}</span>`:""}</div>`;
+    }).join("")+`</div>`;
+  }catch{ box.innerHTML=`<div class="err">Failed to load tree.</div>`; }
+}
+
+// upload
+const drop=document.getElementById("drop"), fileIn=document.getElementById("file");
+["dragenter","dragover"].forEach(ev=>drop.addEventListener(ev,e=>{e.preventDefault();drop.classList.add("over")}));
+["dragleave","drop"].forEach(ev=>drop.addEventListener(ev,e=>{e.preventDefault();drop.classList.remove("over")}));
+drop.addEventListener("drop",e=>{ if(e.dataTransfer.files[0]) upload(e.dataTransfer.files[0]); });
+fileIn.addEventListener("change",e=>{ if(e.target.files[0]) upload(e.target.files[0]); });
+async function upload(file){
+  const msg=document.getElementById("upmsg"); msg.innerHTML=`<span class="spin"></span>Uploading ${esc(file.name)}…`;
+  try{ const fd=new FormData(); fd.append("file",file);
+    const r=await fetch(E("/v1/documents"),{method:"POST",body:fd}); const d=await r.json();
+    if(!r.ok||!d.document_id){ msg.innerHTML=`<span class="err">Upload failed</span>`; return; }
+    msg.innerHTML=`<span class="spin"></span>Ingesting…`; pollIngest(d.document_id,file.name,msg);
+  }catch(e){ msg.innerHTML=`<span class="err">${esc(String(e))}</span>`; }
+}
+function pollIngest(id,name,msg){ clearTimeout(pollTimer);
+  const tick=async()=>{ try{
+      const r=await fetch(E(`/v1/documents/${id}`)); const d=await r.json();
+      if(d.status==="ready"){ msg.innerHTML=`<span style="color:var(--ok)">✓</span> ${esc(name)} ready.`; await loadDocs(); selectDoc({id,title:d.title||name,status:"ready",ct:d.content_type}); return; }
+      if(d.status==="failed"){ msg.innerHTML=`<span class="err">Ingest failed: ${esc(d.error_message||"")}</span>`; await loadDocs(); return; }
+      msg.innerHTML=`<span class="spin"></span>Ingesting — <b>${esc(d.status)}</b>`; pollTimer=setTimeout(tick,1400);
+    }catch{ pollTimer=setTimeout(tick,2200); } }; tick();
+}
+
+// ask
+document.getElementById("ask").addEventListener("click",ask);
+document.getElementById("q").addEventListener("keydown",e=>{ if((e.metaKey||e.ctrlKey)&&e.key==="Enter") ask(); });
+async function ask(){
+  if(!activeDoc) return; const q=document.getElementById("q").value.trim(); if(!q) return;
+  if(!getSettings().apiKey){ openSettings(); document.getElementById("setStatus").innerHTML='<span style="color:var(--warn)">Set an API key to ask questions.</span>'; return; }
+  const out=document.getElementById("result"), btn=document.getElementById("ask"); btn.disabled=true; pdfDoc=null;
+  out.innerHTML=`<div class="res card" style="margin-top:16px"><div class="body"><span class="spin"></span><span class="askhint">Navigating the document…</span></div></div>`;
+  const t0=performance.now();
+  try{
+    const r=await fetch(E("/v1/answer/treewalk"),{method:"POST",headers:{"Content-Type":"application/json",...llmHeaders()},body:JSON.stringify({document_id:activeDoc.id,query:q})});
+    const d=await r.json();
+    if(!r.ok){ out.innerHTML=`<div class="res card"><div class="body err">Error: ${esc(d.error||JSON.stringify(d))}</div></div>`; return; }
+    renderResult(d,Math.round(performance.now()-t0));
+  }catch(e){ out.innerHTML=`<div class="res card"><div class="body err">${esc(String(e))}</div></div>`; }
+  finally{ btn.disabled=false; }
+}
+
+function renderResult(d,ms){
+  const u=d.usage||{}; const conf=(typeof d.confidence==="number")?Math.round(d.confidence*100)+"%":"—";
+  const cost=(u.cost_usd!=null)?"$"+Number(u.cost_usd).toFixed(4):"—";
+  const chips=[`<span class="chip good">confidence ${conf}</span>`,`<span class="chip">${d.hops_taken??"?"} hops</span>`,
+    `<span class="chip">${u.total_tokens??"?"} tokens</span>`,`<span class="chip">${cost}</span>`,`<span class="chip">${d.elapsed_ms??ms} ms</span>`,
+    d.model?`<span class="chip">${esc(d.model)}</span>`:""].join("");
+  const cites=d.citations||[];
+  const citeHtml=cites.map((c,i)=>{ const ps=c.start_page,pe=c.end_page; const pg=ps?`pages ${ps}${pe&&pe!==ps?"–"+pe:""}`:"section";
+    return `<div class="cite ${i===0?"sel":""}" data-ps="${ps||0}" data-pe="${pe||ps||0}"><div class="pg">▸ ${esc(pg)}</div>${c.quote?`<div class="q">${esc(c.quote)}</div>`:""}</div>`;
+  }).join("")||`<div class="empty">No page citations.</div>`;
+
+  // collect unique section ids the engine relied on (citations first, then pages_read)
+  const secIds=[]; const seen=new Set();
+  (cites||[]).forEach(c=>(c.section_ids||[]).forEach(s=>{if(!seen.has(s)){seen.add(s);secIds.push(s);}}));
+  (d.pages_read||[]).forEach(p=>(p.section_ids||[]).forEach(s=>{if(!seen.has(s)){seen.add(s);secIds.push(s);}}));
+
+  document.getElementById("result").innerHTML=`
+    <div class="res">
+      <div class="card"><div class="body">
+        <div class="lbl">Answer</div><div class="answer-text">${esc(d.answer||"(no answer)")}</div>
+        <div class="chips">${chips}</div>
+      </div></div>
+      <div class="split">
+        <div class="card">
+          <div class="cap"><span class="ttl">Citations &amp; sections read</span></div>
+          <div class="body">
+            <div class="lbl">Cited pages</div>${citeHtml}
+            <div class="lbl" style="margin-top:16px">Sections the engine read <span style="color:var(--muted)">(${secIds.length})</span></div>
+            <div id="secList"><div class="empty"><span class="spin"></span>Loading sections…</div></div>
+          </div>
+        </div>
+        <div class="card preview">
+          <div class="cap"><span class="ttl">Source preview</span>
+            <span class="nav"><a id="pvOpen" class="pvpage" href="#" target="_blank" rel="noopener" style="margin-right:4px">open ↗</a><button class="pvbtn" id="pvPrev">‹</button><span class="pvpage" id="pvLabel">—</span><button class="pvbtn" id="pvNext">›</button></span>
+          </div>
+          <div class="pvwrap" id="pvWrap"><div class="pvmsg">Loading page…</div></div>
+        </div>
+      </div>
+    </div>`;
+
+  // wire citation click → preview that page
+  document.querySelectorAll("#result .cite").forEach(el=>el.addEventListener("click",()=>{
+    document.querySelectorAll("#result .cite").forEach(c=>c.classList.remove("sel")); el.classList.add("sel");
+    const ps=+el.dataset.ps||1, pe=+el.dataset.pe||ps; pdfRange=[ps,pe]; showPdfPage(ps);
+  }));
+  document.getElementById("pvPrev").onclick=()=>showPdfPage(pdfPage-1);
+  document.getElementById("pvNext").onclick=()=>showPdfPage(pdfPage+1);
+
+  loadSections(secIds);
+  const firstPs = cites[0] && cites[0].start_page ? cites[0].start_page : 1;
+  const firstPe = cites[0] && cites[0].end_page ? cites[0].end_page : firstPs;
+  pdfRange=[firstPs,firstPe];
+  loadPdf(activeDoc.id, firstPs);
+}
+
+async function loadSections(ids){
+  const box=document.getElementById("secList");
+  if(!ids.length){ box.innerHTML=`<div class="empty">The engine answered without pinning specific sections.</div>`; return; }
+  try{
+    const results=await Promise.all(ids.slice(0,8).map(id=>fetch(E(`/v1/sections/${id}`)).then(r=>r.ok?r.json():null).catch(()=>null)));
+    const ok=results.filter(Boolean);
+    if(!ok.length){ box.innerHTML=`<div class="empty">Section details unavailable.</div>`; return; }
+    box.innerHTML=ok.map(s=>{
+      const pg=s.page_start?`p${s.page_start}${s.page_end&&s.page_end!==s.page_start?"–"+s.page_end:""}`:"";
+      const snip=(s.summary&&s.summary.trim())?s.summary:cleanSnip(s.content);
+      return `<div class="sec-item"><div class="h"><span class="title">${esc(s.title||"Untitled section")}</span>${pg?`<span class="pg">${pg}</span>`:""}</div>
+        ${snip?`<div class="snip">${esc(snip)}</div>`:""}</div>`;
+    }).join("");
+  }catch{ box.innerHTML=`<div class="err">Failed to load sections.</div>`; }
+}
+function cleanSnip(t){ if(!t) return ""; return t.replace(/\s+/g," ").trim().slice(0,200)+(t.length>200?"…":""); }
+
+// pdf preview
+async function loadPdf(docId, page){
+  const wrap=document.getElementById("pvWrap");
+  const srcUrl=E(`/v1/documents/${docId}/source`);
+  const openLink=document.getElementById("pvOpen"); if(openLink) openLink.href=srcUrl;
+  const fallback=()=>{ wrap.innerHTML=`<div class="pvmsg">Inline preview didn’t load — <a href="${srcUrl}" target="_blank" rel="noopener">open the source ↗</a>.</div>`; };
+  if(!window.pdfjsLib){ fallback(); return; }
+  try{
+    // disableRange/Stream: the local proxy serves the whole body with a 200
+    // (no HTTP Range support), so force one plain GET instead of range/stream
+    // requests that would otherwise stall.
+    const task=pdfjsLib.getDocument({url:srcUrl,disableRange:true,disableStream:true});
+    const timeout=new Promise((_,rej)=>setTimeout(()=>rej(new Error("timeout")),9000));
+    pdfDoc=await Promise.race([task.promise,timeout]); showPdfPage(page||1);
+  }catch(e){ fallback(); }
+}
+async function showPdfPage(n){
+  if(!pdfDoc) return; const wrap=document.getElementById("pvWrap");
+  n=Math.max(1,Math.min(n,pdfDoc.numPages)); pdfPage=n;
+  document.getElementById("pvLabel").textContent=`page ${n} / ${pdfDoc.numPages}`;
+  document.getElementById("pvPrev").disabled=(n<=1); document.getElementById("pvNext").disabled=(n>=pdfDoc.numPages);
+  try{
+    const page=await pdfDoc.getPage(n);
+    const vp0=page.getViewport({scale:1}); const scale=Math.min(2,(wrap.clientWidth-28)/vp0.width)*1.4;
+    const vp=page.getViewport({scale:Math.max(1,scale)});
+    const canvas=document.createElement("canvas"); canvas.width=vp.width; canvas.height=vp.height;
+    await page.render({canvasContext:canvas.getContext("2d"),viewport:vp}).promise;
+    wrap.innerHTML=""; wrap.appendChild(canvas);
+  }catch(e){ wrap.innerHTML=`<div class="pvmsg">Could not render page ${n}.</div>`; }
+}
+
+function esc(s){return String(s??"").replace(/[&<>"']/g,c=>({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]))}
+async function init(){ await loadDocs();
+  const p=new URLSearchParams(location.search); const want=p.get("doc");
+  if(want){ try{ const r=await fetch(E(`/v1/documents/${want}`)); if(r.ok){ const d=await r.json();
+    await selectDoc({id:want,title:d.title||want,status:d.status,ct:d.content_type});
+    const q=p.get("ask"); if(q){ document.getElementById("q").value=q; ask(); } } }catch{} }
+  if(new URLSearchParams(location.search).get("settings")) openSettings();
+}
+refreshKeyStatus(); health(); init(); setInterval(health,8000);
+</script>
+</body>
+</html>
diff --git a/localapp/serve.py b/localapp/serve.py
new file mode 100644
index 0000000..6eb330a
--- /dev/null
+++ b/localapp/serve.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+Local viewer for the vectorless-engine.
+
+Serves the single-page viewer (index.html) AND reverse-proxies every
+request under /engine/* to the engine on :7654. Same-origin, so the
+browser never makes a cross-origin call — no CORS config needed on the
+engine (the OSS `engine --local` binary emits no CORS headers).
+
+    python serve.py                  # viewer on http://localhost:7655, engine assumed on :7654
+    VIEWER_PORT=8000 ENGINE_URL=http://localhost:7654 python serve.py
+
+This is the minimal local-app shell tracked as HAL-188.
+"""
+import os
+import sys
+import urllib.request
+import urllib.error
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+HERE = os.path.dirname(os.path.abspath(__file__))
+ENGINE_URL = os.environ.get("ENGINE_URL", "http://localhost:7654").rstrip("/")
+PORT = int(os.environ.get("VIEWER_PORT", "7655"))
+# Bind host. Default localhost-only for local dev safety; set HOST=0.0.0.0 to
+# expose it (the all-in-one Docker image does this so the mapped port works).
+HOST = os.environ.get("HOST", "127.0.0.1")
+
+# Hop-by-hop / host headers we must not forward verbatim.
+_SKIP_REQ = {"host", "connection", "content-length", "accept-encoding"}
+_SKIP_RESP = {"transfer-encoding", "connection", "content-encoding", "content-length"}
+
+
+class Handler(BaseHTTPRequestHandler):
+    protocol_version = "HTTP/1.1"
+
+    # ---- static viewer ----
+    def _serve_index(self):
+        try:
+            with open(os.path.join(HERE, "index.html"), "rb") as f:
+                body = f.read()
+        except FileNotFoundError:
+            self.send_error(404, "index.html not found next to serve.py")
+            return
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Content-Length", str(len(body)))
+        self.send_header("Cache-Control", "no-store")
+        self.end_headers()
+        self.wfile.write(body)
+
+    # ---- reverse proxy to the engine ----
+    def _proxy(self, method):
+        target = ENGINE_URL + self.path[len("/engine"):]
+        length = int(self.headers.get("Content-Length", 0) or 0)
+        body = self.rfile.read(length) if length else None
+
+        req = urllib.request.Request(target, data=body, method=method)
+        for k, v in self.headers.items():
+            if k.lower() not in _SKIP_REQ:
+                req.add_header(k, v)
+
+        try:
+            resp = urllib.request.urlopen(req, timeout=300)
+            data = resp.read()
+            status = resp.status
+            headers = resp.getheaders()
+        except urllib.error.HTTPError as e:
+            data = e.read()
+            status = e.code
+            headers = list(e.headers.items())
+        except urllib.error.URLError as e:
+            msg = f'{{"error":"cannot reach engine at {ENGINE_URL}: {e.reason}"}}'.encode()
+            self.send_response(502)
+            self.send_header("Content-Type", "application/json")
+            self.send_header("Content-Length", str(len(msg)))
+            self.end_headers()
+            self.wfile.write(msg)
+            return
+
+        self.send_response(status)
+        sent_ct = False
+        for k, v in headers:
+            if k.lower() in _SKIP_RESP:
+                continue
+            if k.lower() == "content-type":
+                sent_ct = True
+            self.send_header(k, v)
+        if not sent_ct:
+            self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(data)))
+        self.end_headers()
+        self.wfile.write(data)
+
+    _CT = {".html": "text/html; charset=utf-8", ".svg": "image/svg+xml",
+           ".css": "text/css", ".js": "text/javascript", ".ico": "image/x-icon",
+           ".png": "image/png"}
+
+    def _serve_static(self, path):
+        rel = path.lstrip("/") or "index.html"
+        # contain to this directory — no traversal
+        full = os.path.normpath(os.path.join(HERE, rel))
+        if not full.startswith(HERE) or not os.path.isfile(full):
+            self.send_error(404)
+            return
+        with open(full, "rb") as f:
+            body = f.read()
+        ext = os.path.splitext(full)[1].lower()
+        self.send_response(200)
+        self.send_header("Content-Type", self._CT.get(ext, "application/octet-stream"))
+        self.send_header("Content-Length", str(len(body)))
+        self.send_header("Cache-Control", "no-store")
+        self.end_headers()
+        self.wfile.write(body)
+
+    def do_GET(self):
+        path = self.path.split("?", 1)[0]
+        if self.path.startswith("/engine/"):
+            self._proxy("GET")
+        else:
+            self._serve_static(path)
+
+    def do_POST(self):
+        if self.path.startswith("/engine/"):
+            self._proxy("POST")
+        else:
+            self.send_error(404)
+
+    def do_DELETE(self):
+        if self.path.startswith("/engine/"):
+            self._proxy("DELETE")
+        else:
+            self.send_error(404)
+
+    def log_message(self, *a):  # quiet
+        pass
+
+
+if __name__ == "__main__":
+    print(f"Vectorless local viewer -> http://localhost:{PORT}  (bind {HOST}:{PORT})")
+    print(f"Proxying /engine/* -> {ENGINE_URL}")
+    try:
+        ThreadingHTTPServer((HOST, PORT), Handler).serve_forever()
+    except KeyboardInterrupt:
+        sys.exit(0)
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 3a3dab1..12dfe0c 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -386,8 +386,15 @@ type AnthropicBlock struct {
 	// BaseURL overrides the Anthropic API endpoint. Empty = official
 	// api.anthropic.com. Set this to point the Anthropic driver at any
 	// Anthropic-compatible gateway — e.g. GLM/Zhipu's
-	// https://api.z.ai/api/anthropic — so the same driver can drive a
+	// https://api.z.ai/api/anthropic/v1 — so the same driver can drive a
 	// non-Anthropic model that speaks the Messages API.
+	//
+	// IMPORTANT: the value must include the API version segment (.../v1).
+	// The underlying client posts to "${base_url}/messages" (its built-in
+	// default is https://api.anthropic.com/v1), so a base_url WITHOUT /v1
+	// resolves to .../anthropic/messages — which z.ai answers with an
+	// HTTP 200 body {"code":500,"msg":"404 NOT_FOUND"} and the engine then
+	// reports the opaque "anthropic: no response".
 	BaseURL string `yaml:"base_url"`
 }
 
@@ -934,7 +941,8 @@ func applyEnvOverrides(c *Config) {
 	}
 	// Anthropic-driver overrides. These let an operator point the
 	// anthropic driver at an Anthropic-compatible gateway (e.g. GLM via
-	// https://api.z.ai/api/anthropic) without baking the values into the
+	// https://api.z.ai/api/anthropic/v1 — the /v1 is required; see
+	// AnthropicBlock.BaseURL) without baking the values into the
 	// config file or secret.
 	if v := os.Getenv("VLE_LLM_ANTHROPIC_API_KEY"); v != "" {
 		c.LLM.Anthropic.APIKey = v
diff --git a/pkg/ingest/ingest.go b/pkg/ingest/ingest.go
index dc189c7..373affe 100644
--- a/pkg/ingest/ingest.go
+++ b/pkg/ingest/ingest.go
@@ -540,7 +540,7 @@ func runParallelStages(ctx context.Context, summarizeFn, hydeFn func(context.Con
 }
 
 func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Payload) (*parser.ParsedDoc, error) {
-	rc, _, err := p.Storage.Get(ctx, pl.SourceRef)
+	rc, _, err := getSourceWithRetry(ctx, p.Storage, pl.SourceRef)
 	if err != nil {
 		return nil, fmt.Errorf("fetch source: %w", err)
 	}
@@ -548,6 +548,34 @@ func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Paylo
 	return parsers.Parse(ctx, pl.ContentType, pl.Filename, rc)
 }
 
+// getSourceWithRetry fetches a freshly-uploaded object, tolerating the
+// brief window where the background ingest job (enqueued right after the
+// upload handler's Storage.Put) outraces the source bytes becoming
+// visible. Storage.Put now fsyncs, so this is belt-and-suspenders for
+// slower or eventually-consistent backends: a transient ErrNotFound is
+// retried with short backoff rather than failing the whole document.
+// Any non-ErrNotFound error returns immediately.
+func getSourceWithRetry(ctx context.Context, s storage.Storage, key string) (io.ReadCloser, storage.Metadata, error) {
+	const attempts = 6
+	var lastErr error
+	for i := 0; i < attempts; i++ {
+		rc, meta, err := s.Get(ctx, key)
+		if err == nil {
+			return rc, meta, nil
+		}
+		if !errors.Is(err, storage.ErrNotFound) {
+			return nil, storage.Metadata{}, err
+		}
+		lastErr = err
+		select {
+		case <-ctx.Done():
+			return nil, storage.Metadata{}, ctx.Err()
+		case <-time.After(time.Duration(i+1) * 150 * time.Millisecond):
+		}
+	}
+	return nil, storage.Metadata{}, lastErr
+}
+
 // runMinimal is the fast/minimal ingest path: parse → build tree →
 // persist → ready. It does ZERO LLM work — no summarize, no HyDE, no
 // multi-axis summaries, no TOC build — and parses with table extraction
diff --git a/pkg/storage/local.go b/pkg/storage/local.go
index 440c767..147fd0d 100644
--- a/pkg/storage/local.go
+++ b/pkg/storage/local.go
@@ -39,9 +39,22 @@ func (l *Local) Put(ctx context.Context, key string, r io.Reader, _ Metadata) er
 	if err != nil {
 		return err
 	}
-	defer func() { _ = f.Close() }() // best-effort close
-	_, err = io.Copy(f, r)
-	return err
+	if _, err := io.Copy(f, r); err != nil {
+		_ = f.Close()
+		return err
+	}
+	// fsync before returning. Ingest enqueues the background parse job
+	// immediately after Put returns; the worker may pick it up within
+	// microseconds and Stat this exact path. Without the sync the bytes
+	// (and on Windows the directory entry) can lag behind, so the worker
+	// races the write and fails with ErrNotFound on a file that is in
+	// fact being written. Syncing here makes the object durably visible
+	// before the caller proceeds to enqueue.
+	if err := f.Sync(); err != nil {
+		_ = f.Close()
+		return err
+	}
+	return f.Close()
 }
 
 func (l *Local) Get(ctx context.Context, key string) (io.ReadCloser, Metadata, error) {