hallelx2 · hallelx2 · Jun 19, 2026 · Jun 17, 2026 · Jun 18, 2026 · Jun 19, 2026
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,5 @@
+# Shell scripts and the all-in-one entrypoint MUST stay LF — CRLF breaks the
+# shebang inside the Linux container ("bad interpreter: /usr/bin/env bash^M").
+*.sh        text eol=lf
+deploy/allinone/entrypoint.sh text eol=lf
+localapp/serve.py             text eol=lf
diff --git a/.github/workflows/docker-allinone.yml b/.github/workflows/docker-allinone.yml
@@ -0,0 +1,74 @@
+name: docker-allinone
+
+# Build and publish the ALL-IN-ONE image (engine + bundled Postgres + web UI)
+# so anyone can `docker run` Vectorless with just an LLM key.
+#
+# Publishes to Docker Hub AND GitHub Container Registry:
+#   docker.io/<DOCKERHUB_USERNAME>/vectorless:latest|sha-<short>|vX.Y.Z
+#   ghcr.io/hallelx2/vectorless:latest|sha-<short>|vX.Y.Z
+#
+# Requires two repo secrets for the Docker Hub push:
+#   DOCKERHUB_USERNAME — your Docker Hub account/namespace
+#   DOCKERHUB_TOKEN    — a Docker Hub access token with Read/Write/Delete scope
+# (GHCR uses the built-in GITHUB_TOKEN — no extra secret.)
+
+on:
+  workflow_dispatch: {}        # run on demand from the Actions tab / gh CLI
+  push:
+    branches: [main]           # publish :latest on every push to the default branch
+    tags: ["v*.*.*"]
+
+permissions:
+  contents: read
+  packages: write              # push to ghcr.io
+
+jobs:
+  publish:
+    name: build + push all-in-one
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Log in to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract tags + labels
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            docker.io/${{ secrets.DOCKERHUB_USERNAME }}/vectorless
+            ghcr.io/${{ github.repository_owner }}/vectorless
+          tags: |
+            type=raw,value=latest,enable={{is_default_branch}}
+            type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }}
+            type=ref,event=tag
+            type=sha,prefix=sha-,format=short
+
+      - name: Build + push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile.allinone
+          platforms: linux/amd64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            VERSION=${{ github.ref_name }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/Dockerfile.allinone b/Dockerfile.allinone
@@ -0,0 +1,69 @@
+# ── All-in-one image: engine + bundled Postgres + viewer UI ──────────
+#
+# One `docker run` gives a fully working Vectorless: the retrieval engine,
+# a Postgres instance bundled in the same container, and the local web UI.
+# The only thing the user supplies is an LLM provider key.
+#
+#   docker run -p 8080:8080 -p 7654:7654 \
+#     -e VLE_LLM_ANTHROPIC_API_KEY=<your GLM key> \
+#     hallelx2/vectorless:latest
+#   # → UI:  http://localhost:8080
+#   # → API: http://localhost:7654
+#
+# Context: vectorless-engine/ directory.
+
+# ── Build stage ──────────────────────────────────────────────────────
+FROM golang:1.25-alpine AS build
+RUN apk add --no-cache ca-certificates
+WORKDIR /src
+COPY go.mod go.sum ./
+RUN go mod download
+COPY cmd/      ./cmd/
+COPY pkg/      ./pkg/
+COPY internal/ ./internal/
+ARG VERSION=dev
+RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
+    go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
+      -o /bin/engine ./cmd/engine
+
+# ── Runtime stage: Postgres base + python + engine + viewer ──────────
+FROM postgres:16-bookworm
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends python3 ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY --from=build /bin/engine /usr/local/bin/engine
+COPY localapp/ /opt/vectorless-app/
+COPY deploy/allinone/entrypoint.sh /usr/local/bin/vl-entrypoint.sh
+RUN chmod +x /usr/local/bin/vl-entrypoint.sh
+
+# Bundled Postgres credentials — must match engine --local's expected DSN
+# (postgres://vectorless:vectorless@localhost:5432/vectorless).
+ENV POSTGRES_USER=vectorless \
+    POSTGRES_PASSWORD=vectorless \
+    POSTGRES_DB=vectorless
+
+# Engine defaults: local mode, minimal ingest (fast, queryable in seconds),
+# document bytes under /data (mount a volume here to persist), and GLM via
+# z.ai's Anthropic-compatible gateway out of the box. Override any of these
+# with -e at runtime; the user still supplies VLE_LLM_ANTHROPIC_API_KEY.
+ENV VLE_INGEST_MODE=minimal \
+    VLE_STORAGE_LOCAL_ROOT=/data/documents \
+    VLE_LLM_DRIVER=anthropic \
+    VLE_LLM_ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic/v1 \
+    VLE_LLM_ANTHROPIC_MODEL=glm-4.6 \
+    VIEWER_PORT=8080 \
+    ENGINE_URL=http://localhost:7654 \
+    HOST=0.0.0.0
+
+EXPOSE 8080 7654
+VOLUME ["/data", "/var/lib/postgresql/data"]
+
+ENTRYPOINT ["/usr/local/bin/vl-entrypoint.sh"]
+
+LABEL org.opencontainers.image.title="vectorless (all-in-one)"
+LABEL org.opencontainers.image.description="Vectorless retrieval engine + bundled Postgres + web UI in one container. Reasoning-based document retrieval — no chunking, no embeddings, no vector DB."
+LABEL org.opencontainers.image.source="https://github.com/hallelx2/vectorless-engine"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.vendor="Vectorless"
diff --git a/cmd/engine/main.go b/cmd/engine/main.go
@@ -105,7 +105,16 @@ func run() error {
 
 	llmClient, err := buildLLM(cfg.LLM)
 	if err != nil {
-		return fmt.Errorf("init llm: %w", err)
+		// A missing provider key is non-fatal in local mode: the bundled UI
+		// (and any caller) can supply credentials per request via X-LLM-*
+		// headers (BYOK), so boot without a shared client and let those
+		// requests build their own. Any other init error is still fatal.
+		if config.LocalModeEnabled() && llmKeyMissing(cfg.LLM) {
+			logger.Warn("no LLM provider key configured — queries require a per-request key (BYOK via X-LLM-Api-Key), or set VLE_LLM_ANTHROPIC_API_KEY")
+			llmClient = nil
+		} else {
+			return fmt.Errorf("init llm: %w", err)
+		}
 	}
 	strategy := buildStrategy(cfg.Retrieval, llmClient, store)
 
@@ -231,12 +240,16 @@ func run() error {
 	// (gated by retrieval.treewalk.enabled), even on a deployment
 	// using chunked-tree as its default selection path.
 	var treeWalkStrategy *retrieval.TreeWalkStrategy
-	if cfg.Retrieval.TreeWalk.Enabled && llmClient != nil {
+	if cfg.Retrieval.TreeWalk.Enabled {
+		// Built even when llmClient is nil (no server key): the per-request
+		// BYOK path sets the strategy's client from X-LLM-Api-Key headers,
+		// so the endpoint stays available for callers that bring their own key.
 		treeWalkStrategy = buildTreeWalkStrategy(cfg.Retrieval, llmClient, store)
 		logger.Info("retrieval: treewalk answer endpoint enabled",
 			"max_hops", treeWalkStrategy.MaxHops,
 			"page_content_limit", treeWalkStrategy.PageContentLimit,
 			"model_override", cfg.Retrieval.TreeWalk.Model,
+			"server_key", llmClient != nil,
 		)
 	}
 
@@ -260,6 +273,9 @@ func run() error {
 		Abstain:          cfg.Retrieval.Abstain,
 		TreeWalkStrategy: treeWalkStrategy,
 		TreeWalk:         cfg.Retrieval.TreeWalk,
+		BuildLLM: func(provider, apiKey, baseURL, model string) (llmgate.Client, error) {
+			return buildLLMFrom(cfg.LLM, provider, apiKey, baseURL, model)
+		},
 	}
 
 	srv := &http.Server{
@@ -399,6 +415,66 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) {
 	}
 }
 
+// buildLLMFrom constructs an llmgate client from caller-supplied
+// credentials (BYOK), inheriting the server's configured provider, base
+// URL, and model whenever a field is left empty. This backs the
+// per-request X-LLM-* headers so a user of the bundled UI can paste only
+// their API key and have everything else default to the engine's config.
+// llmKeyMissing reports whether the configured provider has no API key.
+// Used to keep local-mode boot non-fatal so per-request BYOK can work.
+func llmKeyMissing(c config.LLMConfig) bool {
+	switch c.Driver {
+	case "anthropic":
+		return c.Anthropic.APIKey == ""
+	case "openai":
+		return c.OpenAI.APIKey == ""
+	case "gemini":
+		return c.Gemini.APIKey == ""
+	}
+	return false
+}
+
+func buildLLMFrom(c config.LLMConfig, provider, apiKey, baseURL, model string) (llmgate.Client, error) {
+	if provider == "" {
+		provider = c.Driver
+	}
+	switch provider {
+	case "anthropic":
+		if model == "" {
+			model = c.Anthropic.Model
+		}
+		if baseURL == "" {
+			baseURL = c.Anthropic.BaseURL
+		}
+		return anthropic.New(anthropic.Config{
+			APIKey:         apiKey,
+			Model:          model,
+			ReasoningModel: c.Anthropic.ReasoningModel,
+			BaseURL:        baseURL,
+		})
+	case "openai":
+		if model == "" {
+			model = c.OpenAI.Model
+		}
+		return openai.New(openai.Config{
+			APIKey:         apiKey,
+			Model:          model,
+			ReasoningModel: c.OpenAI.ReasoningModel,
+		})
+	case "gemini":
+		if model == "" {
+			model = c.Gemini.Model
+		}
+		return gemini.New(gemini.Config{
+			APIKey:         apiKey,
+			Model:          model,
+			ReasoningModel: c.Gemini.ReasoningModel,
+		})
+	default:
+		return nil, fmt.Errorf("unknown llm provider: %s", provider)
+	}
+}
+
 func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy {
 	switch c.Strategy {
 	case "single-pass":

diff --git a/deploy/allinone/entrypoint.sh b/deploy/allinone/entrypoint.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# All-in-one entrypoint: Postgres + Vectorless engine + the local viewer UI,
+# all in one container. Postgres is bundled so `docker run` needs no external
+# services — the user only supplies an LLM provider key.
+set -euo pipefail
+
+PGUSER_="${POSTGRES_USER:-vectorless}"
+PGDB_="${POSTGRES_DB:-vectorless}"
+
+echo "[vectorless] starting bundled Postgres…"
+# The official postgres entrypoint handles first-run initdb (using the
+# POSTGRES_* env vars) and then execs postgres. Run it in the background so we
+# can start the engine + UI alongside it in the same container.
+docker-entrypoint.sh postgres &
+
+echo "[vectorless] waiting for Postgres to accept connections…"
+until pg_isready -h localhost -U "$PGUSER_" -d "$PGDB_" >/dev/null 2>&1; do
+  sleep 1
+done
+echo "[vectorless] Postgres ready."
+
+# Start the viewer UI (serves the single-page app + same-origin proxy to the
+# engine). Backgrounded; the engine is the container's main process.
+if [ -f /opt/vectorless-app/serve.py ]; then
+  echo "[vectorless] starting viewer UI on :${VIEWER_PORT:-8080} → ${ENGINE_URL:-http://localhost:7654}"
+  PYTHONIOENCODING=utf-8 python3 /opt/vectorless-app/serve.py &
+fi
+
+if [ -z "${VLE_LLM_ANTHROPIC_API_KEY:-}" ] && [ -z "${VLE_LLM_OPENAI_API_KEY:-}" ] && [ -z "${VLE_LLM_GEMINI_API_KEY:-}" ]; then
+  echo "[vectorless] WARNING: no LLM provider key set. Ingestion will work, but"
+  echo "[vectorless]          queries need e.g. -e VLE_LLM_ANTHROPIC_API_KEY=<your GLM key>"
+fi
+
+echo "[vectorless] starting engine (local mode) on :7654 …"
+# exec so the engine becomes PID 1's foreground process and receives signals.
+exec engine --local
diff --git a/internal/api/server.go b/internal/api/server.go
@@ -59,6 +59,13 @@ type Deps struct {
 	// LLMModel is the default model name. Per-request overrides win.
 	LLMModel string
 
+	// BuildLLM constructs a per-request llmgate client from caller-supplied
+	// credentials (BYOK), inheriting server defaults for any empty field.
+	// Wired in main.go. When set, callers can pass their own key/base_url/
+	// model via X-LLM-* request headers; nil disables per-request keys and
+	// handlers fall back to the shared LLM client. See resolveLLM.
+	BuildLLM func(provider, apiKey, baseURL, model string) (llmgate.Client, error)
+
 	// AnswerSpan / Answer hold the relevant config blocks. Default
 	// values (AnswerSpan disabled, Answer.MaxSections=5) are safe.
 	AnswerSpan config.AnswerSpanBlock
@@ -140,6 +147,7 @@ func Router(d Deps) http.Handler {
 			r.Get("/{id}", d.handleGetDocument)
 			r.Delete("/{id}", d.handleDeleteDocument)
 			r.Get("/{id}/tree", d.handleGetTree)
+			r.Get("/{id}/source", d.handleGetSource)
 		})
 
 		r.Get("/sections/{id}", d.handleGetSection)
@@ -364,6 +372,50 @@ func (d Deps) handleDeleteDocument(w http.ResponseWriter, r *http.Request) {
 	w.WriteHeader(http.StatusNoContent)
 }
 
+// handleGetSource streams the original uploaded bytes for a document.
+// Useful for clients that want to render the source (e.g. a PDF page
+// preview in a viewer) without a second storage system. Served inline
+// with the document's content type.
+func (d Deps) handleGetSource(w http.ResponseWriter, r *http.Request) {
+	id := tree.DocumentID(chi.URLParam(r, "id"))
+	doc, err := d.DB.GetDocument(r.Context(), id, standaloneOrgID, "")
+	if err != nil {
+		if errors.Is(err, db.ErrNotFound) {
+			writeErr(w, http.StatusNotFound, "document not found")
+			return
+		}
+		writeErr(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	if doc.SourceRef == "" {
+		writeErr(w, http.StatusNotFound, "document has no stored source")
+		return
+	}
+	rc, meta, err := d.Storage.Get(r.Context(), doc.SourceRef)
+	if err != nil {
+		if errors.Is(err, storage.ErrNotFound) {
+			writeErr(w, http.StatusNotFound, "source object not found")
+			return
+		}
+		writeErr(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	defer func() { _ = rc.Close() }()
+
+	ct := doc.ContentType
+	if ct == "" {
+		ct = "application/octet-stream"
+	}
+	w.Header().Set("Content-Type", ct)
+	if meta.Size > 0 {
+		w.Header().Set("Content-Length", strconv.FormatInt(meta.Size, 10))
+	}
+	w.Header().Set("Content-Disposition", "inline")
+	w.Header().Set("Cache-Control", "private, max-age=300")
+	w.WriteHeader(http.StatusOK)
+	_, _ = io.Copy(w, rc)
+}
+
 func (d Deps) handleGetTree(w http.ResponseWriter, r *http.Request) {
 	id := tree.DocumentID(chi.URLParam(r, "id"))
 	t, err := d.DB.LoadTree(r.Context(), id, standaloneOrgID, "")