Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Shell scripts and the all-in-one entrypoint MUST stay LF — CRLF breaks the
# shebang inside the Linux container ("bad interpreter: /usr/bin/env bash^M").
*.sh text eol=lf
deploy/allinone/entrypoint.sh text eol=lf
localapp/serve.py text eol=lf
74 changes: 74 additions & 0 deletions .github/workflows/docker-allinone.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: docker-allinone

# Build and publish the ALL-IN-ONE image (engine + bundled Postgres + web UI)
# so anyone can `docker run` Vectorless with just an LLM key.
#
# Publishes to Docker Hub AND GitHub Container Registry:
# docker.io/<DOCKERHUB_USERNAME>/vectorless:latest|sha-<short>|vX.Y.Z
# ghcr.io/hallelx2/vectorless:latest|sha-<short>|vX.Y.Z
#
# Requires two repo secrets for the Docker Hub push:
# DOCKERHUB_USERNAME — your Docker Hub account/namespace
# DOCKERHUB_TOKEN — a Docker Hub access token with Read/Write/Delete scope
# (GHCR uses the built-in GITHUB_TOKEN — no extra secret.)

on:
workflow_dispatch: {} # run on demand from the Actions tab / gh CLI
push:
branches: [main] # publish :latest on every push to the default branch
tags: ["v*.*.*"]

permissions:
contents: read
packages: write # push to ghcr.io

jobs:
publish:
name: build + push all-in-one
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Log in to ghcr.io
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract tags + labels
id: meta
uses: docker/metadata-action@v5
with:
images: |
docker.io/${{ secrets.DOCKERHUB_USERNAME }}/vectorless
ghcr.io/${{ github.repository_owner }}/vectorless
tags: |
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }}
type=ref,event=tag
type=sha,prefix=sha-,format=short

- name: Build + push
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile.allinone
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
VERSION=${{ github.ref_name }}
cache-from: type=gha
cache-to: type=gha,mode=max
69 changes: 69 additions & 0 deletions Dockerfile.allinone
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# ── All-in-one image: engine + bundled Postgres + viewer UI ──────────
#
# One `docker run` gives a fully working Vectorless: the retrieval engine,
# a Postgres instance bundled in the same container, and the local web UI.
# The only thing the user supplies is an LLM provider key.
#
# docker run -p 8080:8080 -p 7654:7654 \
# -e VLE_LLM_ANTHROPIC_API_KEY=<your GLM key> \
# hallelx2/vectorless:latest
# # → UI: http://localhost:8080
# # → API: http://localhost:7654
#
# Context: vectorless-engine/ directory.

# ── Build stage ──────────────────────────────────────────────────────
FROM golang:1.25-alpine AS build
RUN apk add --no-cache ca-certificates
WORKDIR /src
COPY go.mod go.sum ./
RUN go mod download
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY internal/ ./internal/
ARG VERSION=dev
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
-o /bin/engine ./cmd/engine

# ── Runtime stage: Postgres base + python + engine + viewer ──────────
FROM postgres:16-bookworm

RUN apt-get update \
&& apt-get install -y --no-install-recommends python3 ca-certificates \
&& rm -rf /var/lib/apt/lists/*

COPY --from=build /bin/engine /usr/local/bin/engine
COPY localapp/ /opt/vectorless-app/
COPY deploy/allinone/entrypoint.sh /usr/local/bin/vl-entrypoint.sh
RUN chmod +x /usr/local/bin/vl-entrypoint.sh

# Bundled Postgres credentials — must match engine --local's expected DSN
# (postgres://vectorless:vectorless@localhost:5432/vectorless).
ENV POSTGRES_USER=vectorless \
POSTGRES_PASSWORD=vectorless \
POSTGRES_DB=vectorless

# Engine defaults: local mode, minimal ingest (fast, queryable in seconds),
# document bytes under /data (mount a volume here to persist), and GLM via
# z.ai's Anthropic-compatible gateway out of the box. Override any of these
# with -e at runtime; the user still supplies VLE_LLM_ANTHROPIC_API_KEY.
ENV VLE_INGEST_MODE=minimal \
VLE_STORAGE_LOCAL_ROOT=/data/documents \
VLE_LLM_DRIVER=anthropic \
VLE_LLM_ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic/v1 \
VLE_LLM_ANTHROPIC_MODEL=glm-4.6 \
VIEWER_PORT=8080 \
ENGINE_URL=http://localhost:7654 \
HOST=0.0.0.0

EXPOSE 8080 7654
VOLUME ["/data", "/var/lib/postgresql/data"]

ENTRYPOINT ["/usr/local/bin/vl-entrypoint.sh"]

LABEL org.opencontainers.image.title="vectorless (all-in-one)"
LABEL org.opencontainers.image.description="Vectorless retrieval engine + bundled Postgres + web UI in one container. Reasoning-based document retrieval — no chunking, no embeddings, no vector DB."
LABEL org.opencontainers.image.source="https://github.com/hallelx2/vectorless-engine"
LABEL org.opencontainers.image.licenses="Apache-2.0"
LABEL org.opencontainers.image.vendor="Vectorless"
80 changes: 78 additions & 2 deletions cmd/engine/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,16 @@ func run() error {

llmClient, err := buildLLM(cfg.LLM)
if err != nil {
return fmt.Errorf("init llm: %w", err)
// A missing provider key is non-fatal in local mode: the bundled UI
// (and any caller) can supply credentials per request via X-LLM-*
// headers (BYOK), so boot without a shared client and let those
// requests build their own. Any other init error is still fatal.
if config.LocalModeEnabled() && llmKeyMissing(cfg.LLM) {
logger.Warn("no LLM provider key configured — queries require a per-request key (BYOK via X-LLM-Api-Key), or set VLE_LLM_ANTHROPIC_API_KEY")
llmClient = nil
} else {
return fmt.Errorf("init llm: %w", err)
}
}
strategy := buildStrategy(cfg.Retrieval, llmClient, store)

Expand Down Expand Up @@ -231,12 +240,16 @@ func run() error {
// (gated by retrieval.treewalk.enabled), even on a deployment
// using chunked-tree as its default selection path.
var treeWalkStrategy *retrieval.TreeWalkStrategy
if cfg.Retrieval.TreeWalk.Enabled && llmClient != nil {
if cfg.Retrieval.TreeWalk.Enabled {
// Built even when llmClient is nil (no server key): the per-request
// BYOK path sets the strategy's client from X-LLM-Api-Key headers,
// so the endpoint stays available for callers that bring their own key.
treeWalkStrategy = buildTreeWalkStrategy(cfg.Retrieval, llmClient, store)
logger.Info("retrieval: treewalk answer endpoint enabled",
"max_hops", treeWalkStrategy.MaxHops,
"page_content_limit", treeWalkStrategy.PageContentLimit,
"model_override", cfg.Retrieval.TreeWalk.Model,
"server_key", llmClient != nil,
)
}

Expand All @@ -260,6 +273,9 @@ func run() error {
Abstain: cfg.Retrieval.Abstain,
TreeWalkStrategy: treeWalkStrategy,
TreeWalk: cfg.Retrieval.TreeWalk,
BuildLLM: func(provider, apiKey, baseURL, model string) (llmgate.Client, error) {
return buildLLMFrom(cfg.LLM, provider, apiKey, baseURL, model)
},
}

srv := &http.Server{
Expand Down Expand Up @@ -399,6 +415,66 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) {
}
}

// buildLLMFrom constructs an llmgate client from caller-supplied
// credentials (BYOK), inheriting the server's configured provider, base
// URL, and model whenever a field is left empty. This backs the
// per-request X-LLM-* headers so a user of the bundled UI can paste only
// their API key and have everything else default to the engine's config.
// llmKeyMissing reports whether the configured provider has no API key.
// Used to keep local-mode boot non-fatal so per-request BYOK can work.
func llmKeyMissing(c config.LLMConfig) bool {
switch c.Driver {
case "anthropic":
return c.Anthropic.APIKey == ""
case "openai":

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): OpenAI and Gemini branches ignore the baseURL argument, which may surprise callers of BuildLLM.

BuildLLM passes baseURL to buildLLMFrom, but the openai and gemini branches ignore it. Calls that set X-LLM-Base-Url for these providers will have no effect. Please either plumb baseURL into the OpenAI/Gemini client configs (as with Anthropic) or make it explicit in the API/docs that this parameter only applies to Anthropic.

Suggested implementation:

	case "openai":
		if model == "" {
			model = c.OpenAI.Model
		}
		if baseURL == "" {
			baseURL = c.OpenAI.BaseURL
		}
		return openai.New(openai.Config{
			APIKey:         apiKey,
			Model:          model,
			ReasoningModel: c.OpenAI.ReasoningModel,
			BaseURL:        baseURL,
		})
	case "gemini":
		if model == "" {
			model = c.Gemini.Model
		}
		if baseURL == "" {
			baseURL = c.Gemini.BaseURL
		}

To fully wire baseURL for the Gemini provider, ensure the gemini case returns a client configured with the BaseURL field, mirroring the Anthropic/OpenAI patterns. For example, the body should look like:

	case "gemini":
		if model == "" {
			model = c.Gemini.Model
		}
		if baseURL == "" {
			baseURL = c.Gemini.BaseURL
		}
		return gemini.New(gemini.Config{
			APIKey:         apiKey,
			Model:          model,
			ReasoningModel: c.Gemini.ReasoningModel,
			BaseURL:        baseURL,
		})

This assumes openai.Config and gemini.Config both support a BaseURL field; if they use a different field name, adjust accordingly.

return c.OpenAI.APIKey == ""
case "gemini":
return c.Gemini.APIKey == ""
}
return false
}

func buildLLMFrom(c config.LLMConfig, provider, apiKey, baseURL, model string) (llmgate.Client, error) {
if provider == "" {
provider = c.Driver
}
switch provider {
case "anthropic":
if model == "" {
model = c.Anthropic.Model
}
if baseURL == "" {
baseURL = c.Anthropic.BaseURL
}
return anthropic.New(anthropic.Config{
APIKey: apiKey,
Model: model,
ReasoningModel: c.Anthropic.ReasoningModel,
BaseURL: baseURL,
})
case "openai":
if model == "" {
model = c.OpenAI.Model
}
return openai.New(openai.Config{
APIKey: apiKey,
Model: model,
ReasoningModel: c.OpenAI.ReasoningModel,
})
case "gemini":
if model == "" {
model = c.Gemini.Model
}
return gemini.New(gemini.Config{
APIKey: apiKey,
Model: model,
ReasoningModel: c.Gemini.ReasoningModel,
})
default:
return nil, fmt.Errorf("unknown llm provider: %s", provider)
}
}

func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy {
switch c.Strategy {
case "single-pass":
Expand Down
36 changes: 36 additions & 0 deletions deploy/allinone/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# All-in-one entrypoint: Postgres + Vectorless engine + the local viewer UI,
# all in one container. Postgres is bundled so `docker run` needs no external
# services — the user only supplies an LLM provider key.
set -euo pipefail

PGUSER_="${POSTGRES_USER:-vectorless}"
PGDB_="${POSTGRES_DB:-vectorless}"

echo "[vectorless] starting bundled Postgres…"
# The official postgres entrypoint handles first-run initdb (using the
# POSTGRES_* env vars) and then execs postgres. Run it in the background so we
# can start the engine + UI alongside it in the same container.
docker-entrypoint.sh postgres &

echo "[vectorless] waiting for Postgres to accept connections…"
until pg_isready -h localhost -U "$PGUSER_" -d "$PGDB_" >/dev/null 2>&1; do
sleep 1
done
echo "[vectorless] Postgres ready."

# Start the viewer UI (serves the single-page app + same-origin proxy to the
# engine). Backgrounded; the engine is the container's main process.
if [ -f /opt/vectorless-app/serve.py ]; then
echo "[vectorless] starting viewer UI on :${VIEWER_PORT:-8080} → ${ENGINE_URL:-http://localhost:7654}"
PYTHONIOENCODING=utf-8 python3 /opt/vectorless-app/serve.py &
fi

if [ -z "${VLE_LLM_ANTHROPIC_API_KEY:-}" ] && [ -z "${VLE_LLM_OPENAI_API_KEY:-}" ] && [ -z "${VLE_LLM_GEMINI_API_KEY:-}" ]; then
echo "[vectorless] WARNING: no LLM provider key set. Ingestion will work, but"
echo "[vectorless] queries need e.g. -e VLE_LLM_ANTHROPIC_API_KEY=<your GLM key>"
fi

echo "[vectorless] starting engine (local mode) on :7654 …"
# exec so the engine becomes PID 1's foreground process and receives signals.
exec engine --local
52 changes: 52 additions & 0 deletions internal/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ type Deps struct {
// LLMModel is the default model name. Per-request overrides win.
LLMModel string

// BuildLLM constructs a per-request llmgate client from caller-supplied
// credentials (BYOK), inheriting server defaults for any empty field.
// Wired in main.go. When set, callers can pass their own key/base_url/
// model via X-LLM-* request headers; nil disables per-request keys and
// handlers fall back to the shared LLM client. See resolveLLM.
BuildLLM func(provider, apiKey, baseURL, model string) (llmgate.Client, error)

// AnswerSpan / Answer hold the relevant config blocks. Default
// values (AnswerSpan disabled, Answer.MaxSections=5) are safe.
AnswerSpan config.AnswerSpanBlock
Expand Down Expand Up @@ -140,6 +147,7 @@ func Router(d Deps) http.Handler {
r.Get("/{id}", d.handleGetDocument)
r.Delete("/{id}", d.handleDeleteDocument)
r.Get("/{id}/tree", d.handleGetTree)
r.Get("/{id}/source", d.handleGetSource)
})

r.Get("/sections/{id}", d.handleGetSection)
Expand Down Expand Up @@ -364,6 +372,50 @@ func (d Deps) handleDeleteDocument(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNoContent)
}

// handleGetSource streams the original uploaded bytes for a document.
// Useful for clients that want to render the source (e.g. a PDF page
// preview in a viewer) without a second storage system. Served inline
// with the document's content type.
func (d Deps) handleGetSource(w http.ResponseWriter, r *http.Request) {
id := tree.DocumentID(chi.URLParam(r, "id"))
doc, err := d.DB.GetDocument(r.Context(), id, standaloneOrgID, "")
if err != nil {
if errors.Is(err, db.ErrNotFound) {
writeErr(w, http.StatusNotFound, "document not found")
return
}
writeErr(w, http.StatusInternalServerError, err.Error())
return
}
if doc.SourceRef == "" {
writeErr(w, http.StatusNotFound, "document has no stored source")
return
}
rc, meta, err := d.Storage.Get(r.Context(), doc.SourceRef)
if err != nil {
if errors.Is(err, storage.ErrNotFound) {
writeErr(w, http.StatusNotFound, "source object not found")
return
}
writeErr(w, http.StatusInternalServerError, err.Error())
return
}
defer func() { _ = rc.Close() }()

ct := doc.ContentType
if ct == "" {
ct = "application/octet-stream"
}
w.Header().Set("Content-Type", ct)
if meta.Size > 0 {
w.Header().Set("Content-Length", strconv.FormatInt(meta.Size, 10))
}
w.Header().Set("Content-Disposition", "inline")
w.Header().Set("Cache-Control", "private, max-age=300")
w.WriteHeader(http.StatusOK)
_, _ = io.Copy(w, rc)
}

func (d Deps) handleGetTree(w http.ResponseWriter, r *http.Request) {
id := tree.DocumentID(chi.URLParam(r, "id"))
t, err := d.DB.LoadTree(r.Context(), id, standaloneOrgID, "")
Expand Down
Loading
Loading