diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7da8c73 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +# Shell scripts and the all-in-one entrypoint MUST stay LF — CRLF breaks the +# shebang inside the Linux container ("bad interpreter: /usr/bin/env bash^M"). +*.sh text eol=lf +deploy/allinone/entrypoint.sh text eol=lf +localapp/serve.py text eol=lf diff --git a/.github/workflows/docker-allinone.yml b/.github/workflows/docker-allinone.yml new file mode 100644 index 0000000..29c3839 --- /dev/null +++ b/.github/workflows/docker-allinone.yml @@ -0,0 +1,74 @@ +name: docker-allinone + +# Build and publish the ALL-IN-ONE image (engine + bundled Postgres + web UI) +# so anyone can `docker run` Vectorless with just an LLM key. +# +# Publishes to Docker Hub AND GitHub Container Registry: +# docker.io//vectorless:latest|sha-|vX.Y.Z +# ghcr.io/hallelx2/vectorless:latest|sha-|vX.Y.Z +# +# Requires two repo secrets for the Docker Hub push: +# DOCKERHUB_USERNAME — your Docker Hub account/namespace +# DOCKERHUB_TOKEN — a Docker Hub access token with Read/Write/Delete scope +# (GHCR uses the built-in GITHUB_TOKEN — no extra secret.) + +on: + workflow_dispatch: {} # run on demand from the Actions tab / gh CLI + push: + branches: [main] # publish :latest on every push to the default branch + tags: ["v*.*.*"] + +permissions: + contents: read + packages: write # push to ghcr.io + +jobs: + publish: + name: build + push all-in-one + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Log in to ghcr.io + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract tags + labels + id: meta + uses: docker/metadata-action@v5 + with: + images: | + docker.io/${{ secrets.DOCKERHUB_USERNAME }}/vectorless + ghcr.io/${{ github.repository_owner }}/vectorless + tags: | + type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }} + type=ref,event=tag + type=sha,prefix=sha-,format=short + + - name: Build + push + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile.allinone + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + VERSION=${{ github.ref_name }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile.allinone b/Dockerfile.allinone new file mode 100644 index 0000000..21c82f3 --- /dev/null +++ b/Dockerfile.allinone @@ -0,0 +1,69 @@ +# ── All-in-one image: engine + bundled Postgres + viewer UI ────────── +# +# One `docker run` gives a fully working Vectorless: the retrieval engine, +# a Postgres instance bundled in the same container, and the local web UI. +# The only thing the user supplies is an LLM provider key. +# +# docker run -p 8080:8080 -p 7654:7654 \ +# -e VLE_LLM_ANTHROPIC_API_KEY= \ +# hallelx2/vectorless:latest +# # → UI: http://localhost:8080 +# # → API: http://localhost:7654 +# +# Context: vectorless-engine/ directory. + +# ── Build stage ────────────────────────────────────────────────────── +FROM golang:1.25-alpine AS build +RUN apk add --no-cache ca-certificates +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY cmd/ ./cmd/ +COPY pkg/ ./pkg/ +COPY internal/ ./internal/ +ARG VERSION=dev +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ + go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \ + -o /bin/engine ./cmd/engine + +# ── Runtime stage: Postgres base + python + engine + viewer ────────── +FROM postgres:16-bookworm + +RUN apt-get update \ + && apt-get install -y --no-install-recommends python3 ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /bin/engine /usr/local/bin/engine +COPY localapp/ /opt/vectorless-app/ +COPY deploy/allinone/entrypoint.sh /usr/local/bin/vl-entrypoint.sh +RUN chmod +x /usr/local/bin/vl-entrypoint.sh + +# Bundled Postgres credentials — must match engine --local's expected DSN +# (postgres://vectorless:vectorless@localhost:5432/vectorless). +ENV POSTGRES_USER=vectorless \ + POSTGRES_PASSWORD=vectorless \ + POSTGRES_DB=vectorless + +# Engine defaults: local mode, minimal ingest (fast, queryable in seconds), +# document bytes under /data (mount a volume here to persist), and GLM via +# z.ai's Anthropic-compatible gateway out of the box. Override any of these +# with -e at runtime; the user still supplies VLE_LLM_ANTHROPIC_API_KEY. +ENV VLE_INGEST_MODE=minimal \ + VLE_STORAGE_LOCAL_ROOT=/data/documents \ + VLE_LLM_DRIVER=anthropic \ + VLE_LLM_ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic/v1 \ + VLE_LLM_ANTHROPIC_MODEL=glm-4.6 \ + VIEWER_PORT=8080 \ + ENGINE_URL=http://localhost:7654 \ + HOST=0.0.0.0 + +EXPOSE 8080 7654 +VOLUME ["/data", "/var/lib/postgresql/data"] + +ENTRYPOINT ["/usr/local/bin/vl-entrypoint.sh"] + +LABEL org.opencontainers.image.title="vectorless (all-in-one)" +LABEL org.opencontainers.image.description="Vectorless retrieval engine + bundled Postgres + web UI in one container. Reasoning-based document retrieval — no chunking, no embeddings, no vector DB." +LABEL org.opencontainers.image.source="https://github.com/hallelx2/vectorless-engine" +LABEL org.opencontainers.image.licenses="Apache-2.0" +LABEL org.opencontainers.image.vendor="Vectorless" diff --git a/cmd/engine/main.go b/cmd/engine/main.go index 73d4e1a..3d61d21 100644 --- a/cmd/engine/main.go +++ b/cmd/engine/main.go @@ -105,7 +105,16 @@ func run() error { llmClient, err := buildLLM(cfg.LLM) if err != nil { - return fmt.Errorf("init llm: %w", err) + // A missing provider key is non-fatal in local mode: the bundled UI + // (and any caller) can supply credentials per request via X-LLM-* + // headers (BYOK), so boot without a shared client and let those + // requests build their own. Any other init error is still fatal. + if config.LocalModeEnabled() && llmKeyMissing(cfg.LLM) { + logger.Warn("no LLM provider key configured — queries require a per-request key (BYOK via X-LLM-Api-Key), or set VLE_LLM_ANTHROPIC_API_KEY") + llmClient = nil + } else { + return fmt.Errorf("init llm: %w", err) + } } strategy := buildStrategy(cfg.Retrieval, llmClient, store) @@ -231,12 +240,16 @@ func run() error { // (gated by retrieval.treewalk.enabled), even on a deployment // using chunked-tree as its default selection path. var treeWalkStrategy *retrieval.TreeWalkStrategy - if cfg.Retrieval.TreeWalk.Enabled && llmClient != nil { + if cfg.Retrieval.TreeWalk.Enabled { + // Built even when llmClient is nil (no server key): the per-request + // BYOK path sets the strategy's client from X-LLM-Api-Key headers, + // so the endpoint stays available for callers that bring their own key. treeWalkStrategy = buildTreeWalkStrategy(cfg.Retrieval, llmClient, store) logger.Info("retrieval: treewalk answer endpoint enabled", "max_hops", treeWalkStrategy.MaxHops, "page_content_limit", treeWalkStrategy.PageContentLimit, "model_override", cfg.Retrieval.TreeWalk.Model, + "server_key", llmClient != nil, ) } @@ -260,6 +273,9 @@ func run() error { Abstain: cfg.Retrieval.Abstain, TreeWalkStrategy: treeWalkStrategy, TreeWalk: cfg.Retrieval.TreeWalk, + BuildLLM: func(provider, apiKey, baseURL, model string) (llmgate.Client, error) { + return buildLLMFrom(cfg.LLM, provider, apiKey, baseURL, model) + }, } srv := &http.Server{ @@ -399,6 +415,66 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) { } } +// buildLLMFrom constructs an llmgate client from caller-supplied +// credentials (BYOK), inheriting the server's configured provider, base +// URL, and model whenever a field is left empty. This backs the +// per-request X-LLM-* headers so a user of the bundled UI can paste only +// their API key and have everything else default to the engine's config. +// llmKeyMissing reports whether the configured provider has no API key. +// Used to keep local-mode boot non-fatal so per-request BYOK can work. +func llmKeyMissing(c config.LLMConfig) bool { + switch c.Driver { + case "anthropic": + return c.Anthropic.APIKey == "" + case "openai": + return c.OpenAI.APIKey == "" + case "gemini": + return c.Gemini.APIKey == "" + } + return false +} + +func buildLLMFrom(c config.LLMConfig, provider, apiKey, baseURL, model string) (llmgate.Client, error) { + if provider == "" { + provider = c.Driver + } + switch provider { + case "anthropic": + if model == "" { + model = c.Anthropic.Model + } + if baseURL == "" { + baseURL = c.Anthropic.BaseURL + } + return anthropic.New(anthropic.Config{ + APIKey: apiKey, + Model: model, + ReasoningModel: c.Anthropic.ReasoningModel, + BaseURL: baseURL, + }) + case "openai": + if model == "" { + model = c.OpenAI.Model + } + return openai.New(openai.Config{ + APIKey: apiKey, + Model: model, + ReasoningModel: c.OpenAI.ReasoningModel, + }) + case "gemini": + if model == "" { + model = c.Gemini.Model + } + return gemini.New(gemini.Config{ + APIKey: apiKey, + Model: model, + ReasoningModel: c.Gemini.ReasoningModel, + }) + default: + return nil, fmt.Errorf("unknown llm provider: %s", provider) + } +} + func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy { switch c.Strategy { case "single-pass": diff --git a/deploy/allinone/entrypoint.sh b/deploy/allinone/entrypoint.sh new file mode 100644 index 0000000..d5669ee --- /dev/null +++ b/deploy/allinone/entrypoint.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# All-in-one entrypoint: Postgres + Vectorless engine + the local viewer UI, +# all in one container. Postgres is bundled so `docker run` needs no external +# services — the user only supplies an LLM provider key. +set -euo pipefail + +PGUSER_="${POSTGRES_USER:-vectorless}" +PGDB_="${POSTGRES_DB:-vectorless}" + +echo "[vectorless] starting bundled Postgres…" +# The official postgres entrypoint handles first-run initdb (using the +# POSTGRES_* env vars) and then execs postgres. Run it in the background so we +# can start the engine + UI alongside it in the same container. +docker-entrypoint.sh postgres & + +echo "[vectorless] waiting for Postgres to accept connections…" +until pg_isready -h localhost -U "$PGUSER_" -d "$PGDB_" >/dev/null 2>&1; do + sleep 1 +done +echo "[vectorless] Postgres ready." + +# Start the viewer UI (serves the single-page app + same-origin proxy to the +# engine). Backgrounded; the engine is the container's main process. +if [ -f /opt/vectorless-app/serve.py ]; then + echo "[vectorless] starting viewer UI on :${VIEWER_PORT:-8080} → ${ENGINE_URL:-http://localhost:7654}" + PYTHONIOENCODING=utf-8 python3 /opt/vectorless-app/serve.py & +fi + +if [ -z "${VLE_LLM_ANTHROPIC_API_KEY:-}" ] && [ -z "${VLE_LLM_OPENAI_API_KEY:-}" ] && [ -z "${VLE_LLM_GEMINI_API_KEY:-}" ]; then + echo "[vectorless] WARNING: no LLM provider key set. Ingestion will work, but" + echo "[vectorless] queries need e.g. -e VLE_LLM_ANTHROPIC_API_KEY=" +fi + +echo "[vectorless] starting engine (local mode) on :7654 …" +# exec so the engine becomes PID 1's foreground process and receives signals. +exec engine --local diff --git a/internal/api/server.go b/internal/api/server.go index c34fc14..d9eadf2 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -59,6 +59,13 @@ type Deps struct { // LLMModel is the default model name. Per-request overrides win. LLMModel string + // BuildLLM constructs a per-request llmgate client from caller-supplied + // credentials (BYOK), inheriting server defaults for any empty field. + // Wired in main.go. When set, callers can pass their own key/base_url/ + // model via X-LLM-* request headers; nil disables per-request keys and + // handlers fall back to the shared LLM client. See resolveLLM. + BuildLLM func(provider, apiKey, baseURL, model string) (llmgate.Client, error) + // AnswerSpan / Answer hold the relevant config blocks. Default // values (AnswerSpan disabled, Answer.MaxSections=5) are safe. AnswerSpan config.AnswerSpanBlock @@ -140,6 +147,7 @@ func Router(d Deps) http.Handler { r.Get("/{id}", d.handleGetDocument) r.Delete("/{id}", d.handleDeleteDocument) r.Get("/{id}/tree", d.handleGetTree) + r.Get("/{id}/source", d.handleGetSource) }) r.Get("/sections/{id}", d.handleGetSection) @@ -364,6 +372,50 @@ func (d Deps) handleDeleteDocument(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } +// handleGetSource streams the original uploaded bytes for a document. +// Useful for clients that want to render the source (e.g. a PDF page +// preview in a viewer) without a second storage system. Served inline +// with the document's content type. +func (d Deps) handleGetSource(w http.ResponseWriter, r *http.Request) { + id := tree.DocumentID(chi.URLParam(r, "id")) + doc, err := d.DB.GetDocument(r.Context(), id, standaloneOrgID, "") + if err != nil { + if errors.Is(err, db.ErrNotFound) { + writeErr(w, http.StatusNotFound, "document not found") + return + } + writeErr(w, http.StatusInternalServerError, err.Error()) + return + } + if doc.SourceRef == "" { + writeErr(w, http.StatusNotFound, "document has no stored source") + return + } + rc, meta, err := d.Storage.Get(r.Context(), doc.SourceRef) + if err != nil { + if errors.Is(err, storage.ErrNotFound) { + writeErr(w, http.StatusNotFound, "source object not found") + return + } + writeErr(w, http.StatusInternalServerError, err.Error()) + return + } + defer func() { _ = rc.Close() }() + + ct := doc.ContentType + if ct == "" { + ct = "application/octet-stream" + } + w.Header().Set("Content-Type", ct) + if meta.Size > 0 { + w.Header().Set("Content-Length", strconv.FormatInt(meta.Size, 10)) + } + w.Header().Set("Content-Disposition", "inline") + w.Header().Set("Cache-Control", "private, max-age=300") + w.WriteHeader(http.StatusOK) + _, _ = io.Copy(w, rc) +} + func (d Deps) handleGetTree(w http.ResponseWriter, r *http.Request) { id := tree.DocumentID(chi.URLParam(r, "id")) t, err := d.DB.LoadTree(r.Context(), id, standaloneOrgID, "") diff --git a/internal/api/treewalk.go b/internal/api/treewalk.go index 6bd895a..9337db2 100644 --- a/internal/api/treewalk.go +++ b/internal/api/treewalk.go @@ -78,8 +78,32 @@ type treeWalkAnswerRequest struct { // "stream"?: false, "reasoning"?: false } // // Response: see treeWalkAnswerResponse below. +// resolveLLM picks the llmgate client for this request. When the caller +// supplies BYOK credentials via the X-LLM-Api-Key header (optionally +// X-LLM-Provider / X-LLM-Base-Url / X-LLM-Model) and a BuildLLM factory is +// wired, it builds a per-request client inheriting server defaults for any +// empty field; otherwise it returns the shared client. The returned model +// string is the header-supplied model override (may be ""). +func (d Deps) resolveLLM(r *http.Request) (llmgate.Client, string, error) { + model := r.Header.Get("X-LLM-Model") + key := r.Header.Get("X-LLM-Api-Key") + if key == "" || d.BuildLLM == nil { + return d.LLM, model, nil + } + c, err := d.BuildLLM( + r.Header.Get("X-LLM-Provider"), + key, + r.Header.Get("X-LLM-Base-Url"), + model, + ) + if err != nil { + return nil, model, err + } + return c, model, nil +} + func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { - if d.LLM == nil { + if d.LLM == nil && d.BuildLLM == nil { writeErr(w, http.StatusNotImplemented, "answer/treewalk endpoint requires an LLM client") return } @@ -126,9 +150,29 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { if body.MaxPagesPerFetch > 0 { perReq.PageContentLimit = body.MaxPagesPerFetch } + // BYOK: if the caller supplies their own LLM credentials via X-LLM-* + // headers, build a per-request client and run BOTH the navigation loop + // and citation span-extraction through it. dReq is a value copy of Deps + // with the per-request client swapped in — Deps is passed by value so + // this never mutates the shared instance other goroutines read. + client, hdrModel, err := d.resolveLLM(r) + if err != nil { + writeErr(w, http.StatusBadRequest, "invalid LLM credentials: "+err.Error()) + return + } + if client == nil { + writeErr(w, http.StatusBadRequest, "no LLM credentials: configure a server key or send an X-LLM-Api-Key header (BYOK)") + return + } + perReq.LLM = client + dReq := d + dReq.LLM = client + if body.Model == "" { + body.Model = hdrModel + } + // Per-request model override falls through to budget.ModelName // the same way every other handler does. - budget := retrieval.ContextBudget{ModelName: body.Model} if budget.ModelName == "" { budget.ModelName = d.LLMModel @@ -139,7 +183,7 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { // Stream variant: hijack the response writer for SSE and emit // one event per tool call. if body.Stream { - d.serveAnswerTreeWalkStream(w, r, &perReq, t, body, budget, started) + dReq.serveAnswerTreeWalkStream(w, r, &perReq, t, body, budget, started) return } @@ -164,7 +208,7 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { return } - citations := d.buildTreeWalkCitations(r.Context(), t, res, body.Query, body.Model) + citations := dReq.buildTreeWalkCitations(r.Context(), t, res, body.Query, body.Model) resp := map[string]any{ "document_id": body.DocumentID, diff --git a/internal/config/config.go b/internal/config/config.go index 53d2950..72000ba 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -346,9 +346,9 @@ func applyEnvOverrides(c *Config) { } } // Anthropic-compatible gateway overrides (e.g. GLM/Zhipu via - // https://api.z.ai/api/anthropic): base URL + model, so the - // anthropic driver can run a non-Anthropic model without a secret - // edit. + // https://api.z.ai/api/anthropic/v1 — the /v1 is required, the client + // posts to ${base}/messages): base URL + model, so the anthropic + // driver can run a non-Anthropic model without a secret edit. if v := firstEnv("VLS_LLM_ANTHROPIC_BASE_URL", "VLE_LLM_ANTHROPIC_BASE_URL"); v != "" { c.Engine.LLM.Anthropic.BaseURL = v } diff --git a/localapp/README.md b/localapp/README.md new file mode 100644 index 0000000..94bc682 --- /dev/null +++ b/localapp/README.md @@ -0,0 +1,44 @@ +# Vectorless — local viewer + +A tiny, dependency-free local UI for the OSS `vectorless-engine`. Upload a PDF, +watch it ingest into a structured tree, browse the section map, and ask +questions that come back with **cited** answers (page range + verbatim quote) — +answered by whatever model the engine is configured with (here: GLM-4.6 via +z.ai's Anthropic-compatible gateway). + +This is the minimal slice of **HAL-188** (local dashboard). It is intentionally +small: a single `index.html` + a stdlib Python proxy. No build step, no Node. + +## Why the proxy +The `engine --local` binary emits **no CORS headers**, so a browser page can't +call `http://localhost:7654` cross-origin. `serve.py` serves the page **and** +reverse-proxies `/engine/*` to the engine, so every request is same-origin. + +## Run + +```bash +# 1. Start the engine (from vectorless-engine/), local mode + your GLM key: +cd ../vectorless-engine +set -a; . ./.env; set +a # GLM key + base_url (.../api/anthropic/v1) + glm-4.6 +export VLE_INGEST_MODE=minimal +./bin/engine.exe --local # listens on :7654 + +# 2. Start the viewer (from this folder): +cd ../local-viewer +python serve.py # http://localhost:7655 +``` + +Then open **http://localhost:7655** and: +1. Drop a PDF (e.g. a FinanceBench 10-K) onto **Upload**. +2. Watch it move to **ready** in the **Documents** list; click it. +3. Inspect the **Structure map** (section tree + page ranges). +4. Type a question in **Ask** → get a cited answer with confidence, hops, and cost. + +## Config +- `ENGINE_URL` (default `http://localhost:7654`) — where the engine listens. +- `VIEWER_PORT` (default `7655`) — the viewer's port. + +## Endpoints it uses +`GET /v1/health` · `GET /v1/documents` · `POST /v1/documents` (multipart) · +`GET /v1/documents/{id}` · `GET /v1/documents/{id}/tree` · +`POST /v1/answer/treewalk`. diff --git a/localapp/index.html b/localapp/index.html new file mode 100644 index 0000000..dc4197c --- /dev/null +++ b/localapp/index.html @@ -0,0 +1,505 @@ + + + + + +Vectorless · local + + + + + + + + + + +
+ + +
+
+
+
Reasoning-based retrieval
+

Ask a document. Get a cited answer.

+

Upload a PDF — the engine parses it into a structured tree and an LLM navigates that + structure to answer, no chunking, no embeddings, no vectors.

+
+
01
Upload
Drop a PDF in the left rail.
+
02
Ingest
Parsed into a section tree in seconds.
+
03
Ask
Cited answers — page, quote & preview.
+
+
+ + +
+
+
+ + + + + + diff --git a/localapp/serve.py b/localapp/serve.py new file mode 100644 index 0000000..6eb330a --- /dev/null +++ b/localapp/serve.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +Local viewer for the vectorless-engine. + +Serves the single-page viewer (index.html) AND reverse-proxies every +request under /engine/* to the engine on :7654. Same-origin, so the +browser never makes a cross-origin call — no CORS config needed on the +engine (the OSS `engine --local` binary emits no CORS headers). + + python serve.py # viewer on http://localhost:7655, engine assumed on :7654 + VIEWER_PORT=8000 ENGINE_URL=http://localhost:7654 python serve.py + +This is the minimal local-app shell tracked as HAL-188. +""" +import os +import sys +import urllib.request +import urllib.error +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +HERE = os.path.dirname(os.path.abspath(__file__)) +ENGINE_URL = os.environ.get("ENGINE_URL", "http://localhost:7654").rstrip("/") +PORT = int(os.environ.get("VIEWER_PORT", "7655")) +# Bind host. Default localhost-only for local dev safety; set HOST=0.0.0.0 to +# expose it (the all-in-one Docker image does this so the mapped port works). +HOST = os.environ.get("HOST", "127.0.0.1") + +# Hop-by-hop / host headers we must not forward verbatim. +_SKIP_REQ = {"host", "connection", "content-length", "accept-encoding"} +_SKIP_RESP = {"transfer-encoding", "connection", "content-encoding", "content-length"} + + +class Handler(BaseHTTPRequestHandler): + protocol_version = "HTTP/1.1" + + # ---- static viewer ---- + def _serve_index(self): + try: + with open(os.path.join(HERE, "index.html"), "rb") as f: + body = f.read() + except FileNotFoundError: + self.send_error(404, "index.html not found next to serve.py") + return + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body) + + # ---- reverse proxy to the engine ---- + def _proxy(self, method): + target = ENGINE_URL + self.path[len("/engine"):] + length = int(self.headers.get("Content-Length", 0) or 0) + body = self.rfile.read(length) if length else None + + req = urllib.request.Request(target, data=body, method=method) + for k, v in self.headers.items(): + if k.lower() not in _SKIP_REQ: + req.add_header(k, v) + + try: + resp = urllib.request.urlopen(req, timeout=300) + data = resp.read() + status = resp.status + headers = resp.getheaders() + except urllib.error.HTTPError as e: + data = e.read() + status = e.code + headers = list(e.headers.items()) + except urllib.error.URLError as e: + msg = f'{{"error":"cannot reach engine at {ENGINE_URL}: {e.reason}"}}'.encode() + self.send_response(502) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(msg))) + self.end_headers() + self.wfile.write(msg) + return + + self.send_response(status) + sent_ct = False + for k, v in headers: + if k.lower() in _SKIP_RESP: + continue + if k.lower() == "content-type": + sent_ct = True + self.send_header(k, v) + if not sent_ct: + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + _CT = {".html": "text/html; charset=utf-8", ".svg": "image/svg+xml", + ".css": "text/css", ".js": "text/javascript", ".ico": "image/x-icon", + ".png": "image/png"} + + def _serve_static(self, path): + rel = path.lstrip("/") or "index.html" + # contain to this directory — no traversal + full = os.path.normpath(os.path.join(HERE, rel)) + if not full.startswith(HERE) or not os.path.isfile(full): + self.send_error(404) + return + with open(full, "rb") as f: + body = f.read() + ext = os.path.splitext(full)[1].lower() + self.send_response(200) + self.send_header("Content-Type", self._CT.get(ext, "application/octet-stream")) + self.send_header("Content-Length", str(len(body))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body) + + def do_GET(self): + path = self.path.split("?", 1)[0] + if self.path.startswith("/engine/"): + self._proxy("GET") + else: + self._serve_static(path) + + def do_POST(self): + if self.path.startswith("/engine/"): + self._proxy("POST") + else: + self.send_error(404) + + def do_DELETE(self): + if self.path.startswith("/engine/"): + self._proxy("DELETE") + else: + self.send_error(404) + + def log_message(self, *a): # quiet + pass + + +if __name__ == "__main__": + print(f"Vectorless local viewer -> http://localhost:{PORT} (bind {HOST}:{PORT})") + print(f"Proxying /engine/* -> {ENGINE_URL}") + try: + ThreadingHTTPServer((HOST, PORT), Handler).serve_forever() + except KeyboardInterrupt: + sys.exit(0) diff --git a/pkg/config/config.go b/pkg/config/config.go index 3a3dab1..12dfe0c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -386,8 +386,15 @@ type AnthropicBlock struct { // BaseURL overrides the Anthropic API endpoint. Empty = official // api.anthropic.com. Set this to point the Anthropic driver at any // Anthropic-compatible gateway — e.g. GLM/Zhipu's - // https://api.z.ai/api/anthropic — so the same driver can drive a + // https://api.z.ai/api/anthropic/v1 — so the same driver can drive a // non-Anthropic model that speaks the Messages API. + // + // IMPORTANT: the value must include the API version segment (.../v1). + // The underlying client posts to "${base_url}/messages" (its built-in + // default is https://api.anthropic.com/v1), so a base_url WITHOUT /v1 + // resolves to .../anthropic/messages — which z.ai answers with an + // HTTP 200 body {"code":500,"msg":"404 NOT_FOUND"} and the engine then + // reports the opaque "anthropic: no response". BaseURL string `yaml:"base_url"` } @@ -934,7 +941,8 @@ func applyEnvOverrides(c *Config) { } // Anthropic-driver overrides. These let an operator point the // anthropic driver at an Anthropic-compatible gateway (e.g. GLM via - // https://api.z.ai/api/anthropic) without baking the values into the + // https://api.z.ai/api/anthropic/v1 — the /v1 is required; see + // AnthropicBlock.BaseURL) without baking the values into the // config file or secret. if v := os.Getenv("VLE_LLM_ANTHROPIC_API_KEY"); v != "" { c.LLM.Anthropic.APIKey = v diff --git a/pkg/ingest/ingest.go b/pkg/ingest/ingest.go index dc189c7..373affe 100644 --- a/pkg/ingest/ingest.go +++ b/pkg/ingest/ingest.go @@ -540,7 +540,7 @@ func runParallelStages(ctx context.Context, summarizeFn, hydeFn func(context.Con } func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Payload) (*parser.ParsedDoc, error) { - rc, _, err := p.Storage.Get(ctx, pl.SourceRef) + rc, _, err := getSourceWithRetry(ctx, p.Storage, pl.SourceRef) if err != nil { return nil, fmt.Errorf("fetch source: %w", err) } @@ -548,6 +548,34 @@ func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Paylo return parsers.Parse(ctx, pl.ContentType, pl.Filename, rc) } +// getSourceWithRetry fetches a freshly-uploaded object, tolerating the +// brief window where the background ingest job (enqueued right after the +// upload handler's Storage.Put) outraces the source bytes becoming +// visible. Storage.Put now fsyncs, so this is belt-and-suspenders for +// slower or eventually-consistent backends: a transient ErrNotFound is +// retried with short backoff rather than failing the whole document. +// Any non-ErrNotFound error returns immediately. +func getSourceWithRetry(ctx context.Context, s storage.Storage, key string) (io.ReadCloser, storage.Metadata, error) { + const attempts = 6 + var lastErr error + for i := 0; i < attempts; i++ { + rc, meta, err := s.Get(ctx, key) + if err == nil { + return rc, meta, nil + } + if !errors.Is(err, storage.ErrNotFound) { + return nil, storage.Metadata{}, err + } + lastErr = err + select { + case <-ctx.Done(): + return nil, storage.Metadata{}, ctx.Err() + case <-time.After(time.Duration(i+1) * 150 * time.Millisecond): + } + } + return nil, storage.Metadata{}, lastErr +} + // runMinimal is the fast/minimal ingest path: parse → build tree → // persist → ready. It does ZERO LLM work — no summarize, no HyDE, no // multi-axis summaries, no TOC build — and parses with table extraction diff --git a/pkg/storage/local.go b/pkg/storage/local.go index 440c767..147fd0d 100644 --- a/pkg/storage/local.go +++ b/pkg/storage/local.go @@ -39,9 +39,22 @@ func (l *Local) Put(ctx context.Context, key string, r io.Reader, _ Metadata) er if err != nil { return err } - defer func() { _ = f.Close() }() // best-effort close - _, err = io.Copy(f, r) - return err + if _, err := io.Copy(f, r); err != nil { + _ = f.Close() + return err + } + // fsync before returning. Ingest enqueues the background parse job + // immediately after Put returns; the worker may pick it up within + // microseconds and Stat this exact path. Without the sync the bytes + // (and on Windows the directory entry) can lag behind, so the worker + // races the write and fails with ErrNotFound on a file that is in + // fact being written. Syncing here makes the object durably visible + // before the caller proceeds to enqueue. + if err := f.Sync(); err != nil { + _ = f.Close() + return err + } + return f.Close() } func (l *Local) Get(ctx context.Context, key string) (io.ReadCloser, Metadata, error) {