Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cmd/engine/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ func run() error {
if err != nil {
return fmt.Errorf("init storage: %w", err)
}
// Log the resolved storage location at boot. For the local driver this is
// the absolute root every source/section object is read from and written
// to — the single most useful fact when diagnosing an "object not found",
// and the place to point a Windows Defender exclusion so antivirus scans of
// freshly-written PDFs don't transiently hide them from the ingest worker.
if local, ok := store.(*storage.Local); ok {
logger.Info("storage: local root resolved", "root", local.Root())
}

q, err := buildQueue(cfg.Queue, cfg.Database.URL)
if err != nil {
Expand Down
55 changes: 47 additions & 8 deletions pkg/ingest/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,10 +566,44 @@ func runParallelStages(ctx context.Context, summarizeFn, hydeFn func(context.Con
func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Payload) (*parser.ParsedDoc, error) {
rc, _, err := getSourceWithRetry(ctx, p.Storage, pl.SourceRef)
if err != nil {
// A not-yet-visible source is transient (the upload's fsync'd bytes can
// briefly read back as ErrNotFound under heavy concurrent ingestion, or
// while antivirus scans a freshly-written file), so it is returned as an
// ordinary, retryable error — never permanent.
return nil, fmt.Errorf("fetch source: %w", err)
}
defer func() { _ = rc.Close() }() // best-effort close
return parsers.Parse(ctx, pl.ContentType, pl.Filename, rc)

parsed, perr := parsers.Parse(ctx, pl.ContentType, pl.Filename, rc)
if perr != nil {
// Classify the failure so the queue stops wasting retries on inputs
// that can never succeed. A parse TIMEOUT or a context cancellation is
// transient — the same document may parse on a less-loaded attempt — so
// it stays an ordinary (retryable) error. Everything else from the
// parser is deterministic on these bytes (encrypted PDF that won't open
// with the empty password, a backend-rejected/malformed structure, a
// scanned image with no extractable text): retrying only re-derives the
// same failure, so mark it permanent and dead-letter it immediately.
if isTransientParseErr(perr) {
return nil, perr
}
return nil, queue.Permanent(perr)
}
return parsed, nil
}

// isTransientParseErr reports whether a parser error is worth retrying. Parse
// timeouts and context cancellations are load-dependent (a complex document may
// finish within budget on a quieter attempt), so they are transient. A genuine
// structural rejection is not.
func isTransientParseErr(err error) bool {
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
return true
}
// The PDF parser's own deadline wrapper formats a plain (unwrapped) message
// when its internal timer fires, so match it textually as a fallback.
msg := err.Error()
Comment on lines +599 to +605

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Reliance on parser error message substrings for transient classification is brittle.

These string checks make transient vs permanent classification dependent on exact error wording and capitalization, so a small message change could silently reclassify timeouts as permanent.

Prefer a more robust signal, e.g. typed/sentinel errors from the parser with errors.Is/errors.As. If that’s not available, at least normalize the message (e.g. strings.ToLower) and/or rely on a structured marker rather than free text, and ensure the source of these strings is clearly constrained.

Suggested implementation:

 // isTransientParseErr reports whether a parser error is worth retrying.
 // Parse timeouts and context cancellations are load-dependent (a complex
 // document may finish within budget on a quieter attempt), so they are
 // treated as transient. A genuine structural rejection is not.
 //
 // The fallback string matching below is constrained to the parser's own
 // deadline wrapper messages, which are kept stable and lowercase; we
 // normalize the error text before matching to avoid brittleness from
 // casing or minor wording changes.
func isTransientParseErr(err error) bool {
	if err == nil {
		return false
	}

	if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
		return true
	}

	// The PDF parser's own deadline wrapper formats a plain (unwrapped) message
	// when its internal timer fires; normalize before matching to reduce
	// dependence on exact wording/capitalization.
	msg := strings.ToLower(err.Error())
	return strings.Contains(msg, "parse exceeded") || strings.Contains(msg, "parse cancelled")
}

If the underlying parser can expose typed/sentinel errors for deadline and cancellation conditions, prefer to:

  1. Define exported sentinel errors (e.g. var ErrParseDeadlineExceeded = errors.New("...")) or custom types in the parser package.
  2. Replace the string matching block with errors.Is(err, parser.ErrParseDeadlineExceeded) / errors.Is(err, parser.ErrParseCancelled) checks.

return strings.Contains(msg, "parse exceeded") || strings.Contains(msg, "parse cancelled")
}

// getSourceWithRetry fetches a freshly-uploaded object, tolerating the
Expand Down Expand Up @@ -1035,13 +1069,18 @@ func (p *Pipeline) fail(ctx context.Context, store docPersister, id tree.Documen
failCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

// If the queue will retry this job, the failure is (so far) transient —
// under heavy concurrent ingestion, parse can hit a parse-timeout or a
// not-yet-visible source and recover on a later attempt. Surfacing
// "failed" now would tell a polling client the document is dead when it
// isn't, so keep it in "parsing" and let the retry run. Only the final
// attempt produces a terminal "failed".
if !isLastAttempt(ctx) {
// A permanent failure (encrypted/malformed/no-text document) will NOT be
// retried by the queue — the worker cancels the job — so it is terminal
// right now regardless of which attempt we're on. Marking it failed
// immediately gives the caller the real reason instead of leaving the
// document wedged in "parsing" forever waiting for a retry that never runs.
if !queue.IsPermanent(cause) && !isLastAttempt(ctx) {
// Otherwise, if the queue will retry this job, the failure is (so far)
// transient — under heavy concurrent ingestion, parse can hit a
// parse-timeout or a not-yet-visible source and recover on a later
// attempt. Surfacing "failed" now would tell a polling client the
// document is dead when it isn't, so keep it in "parsing" and let the
// retry run. Only the final attempt produces a terminal "failed".
p.Logger.Warn("ingest: transient failure, will retry",
"document_id", string(id), "stage", stage, "cause", cause.Error())
if err := store.SetDocumentStatus(failCtx, id, db.StatusParsing, ""); err != nil {
Expand Down
38 changes: 38 additions & 0 deletions pkg/ingest/parse_classify_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package ingest

import (
"context"
"errors"
"fmt"
"testing"
)

// The classifier decides whether a parser failure is worth retrying. Getting
// this wrong is expensive in both directions: marking a transient timeout
// permanent dead-letters a document that would have parsed on a quieter
// attempt; marking a deterministic rejection transient burns the whole retry
// budget (and, as seen in HAL-321, interleaves confusing "object not found"
// errors into the job history).
func TestIsTransientParseErr(t *testing.T) {
cases := []struct {
name string
err error
transient bool
}{
{"deadline exceeded", context.DeadlineExceeded, true},
{"canceled", context.Canceled, true},
{"wrapped deadline", fmt.Errorf("pdf: parse cancelled: %w", context.DeadlineExceeded), true},
{"pdf timeout message", errors.New("pdf: parse exceeded 5m0s — document too complex or malformed"), true},
{"pdf cancelled message", errors.New("pdf: parse cancelled: context deadline exceeded"), true},
{"encrypted", errors.New("pdf: open: encrypted and could not be unlocked with empty password: x"), false},
{"backend rejected", errors.New("pdf: open: ledongthuc/pdf backend rejected the document: malformed PDF: 256-bit encryption key"), false},
{"no extractable text", errors.New("pdf: parsed but no extractable text — the document may be a scanned image"), false},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := isTransientParseErr(tc.err); got != tc.transient {
t.Fatalf("isTransientParseErr(%q) = %v, want %v", tc.err, got, tc.transient)
}
})
}
}
47 changes: 47 additions & 0 deletions pkg/queue/permanent_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package queue

import (
"errors"
"fmt"
"testing"
)

func TestPermanentWrapsAndUnwraps(t *testing.T) {
base := errors.New("encrypted PDF")
perm := Permanent(base)

if !IsPermanent(perm) {
t.Fatal("IsPermanent should be true for a wrapped permanent error")
}
if !errors.Is(perm, base) {
t.Fatal("errors.Is should still match the wrapped cause")
}
if perm.Error() != base.Error() {
t.Fatalf("Error() = %q, want %q", perm.Error(), base.Error())
}
}

func TestPermanentDetectedThroughWrapping(t *testing.T) {
// A permanent error wrapped again with fmt.Errorf("%w") must still be
// detectable — the ingest pipeline returns queue.Permanent(...) which a
// caller may decorate with stage context.
wrapped := fmt.Errorf("parse: %w", Permanent(errors.New("malformed")))
if !IsPermanent(wrapped) {
Comment on lines +24 to +29

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider adding a test that verifies river workers translate Permanent errors into JobCancel and stop retries

Current tests cover Permanent and IsPermanent wrapping semantics, but not the new River worker behavior that turns a Permanent error into river.JobCancel and stops retries. Please add a focused test in the queue package that constructs an envelopeWorker with a handler returning queue.Permanent(errors.New("malformed")) and asserts that Work yields river.JobCancel (or that River marks the job as canceled/dead-lettered). This ensures the higher-level retry policy actually honors Permanent errors, not just their detection in isolation.

Suggested implementation:

import (
	"context"
	"errors"
	"fmt"
	"testing"

	"github.com/riverqueue/river"
)
func TestPermanentDetectedThroughWrapping(t *testing.T) {
	// A permanent error wrapped again with fmt.Errorf("%w") must still be
	// detectable — the ingest pipeline returns queue.Permanent(...) which a
	// caller may decorate with stage context.
	wrapped := fmt.Errorf("parse: %w", Permanent(errors.New("malformed")))
	if !IsPermanent(wrapped) {
		t.Fatal("IsPermanent should see through an outer fmt.Errorf wrap")
	}
}

func TestPermanentErrorCancelsRiverJob(t *testing.T) {
	t.Parallel()

	// Construct a worker whose handler always returns a Permanent error.
	//
	// The expectation is that the queue worker layer will translate this into a
	// river.JobCancel so that River marks the job as canceled and does not
	// retry it.
	worker := &envelopeWorker{
		handler: func(ctx context.Context, env *Envelope) error {
			return Permanent(errors.New("malformed"))
		},
	}

	ctx := context.Background()

	// Use a minimal job instance; adapt fields as needed for your codebase.
	job := &river.Job{}

	err := worker.Work(ctx, job)
	if err == nil {
		t.Fatalf("Work() error = nil, want river.JobCancel")
	}

	var jobCancel river.JobCancel
	if !errors.As(err, &jobCancel) {
		t.Fatalf("Work() error = %T (%v), want river.JobCancel", err, err)
	}
}

The suggested test assumes:

  1. There is an envelopeWorker type in the queue package with a field handler compatible with func(context.Context, *Envelope) error and a method Work(ctx context.Context, job *river.Job) error.
  2. There is an Envelope type in the same package.
  3. Work returns a river.JobCancel error when the handler returns a Permanent error.

To fully integrate this test, you may need to:

  1. Adjust the construction of envelopeWorker (field names, constructor function, or handler signature) to match the real implementation. For example, if you normally construct workers via a constructor like newEnvelopeWorker(handler Handler), use that instead of struct literal initialization.
  2. Provide any required fields on river.Job (e.g., Queue, Kind, or IDs) so that Work runs without panicking in your environment.
  3. If your worker returns a wrapped river.JobCancel (e.g., with fmt.Errorf("...: %w", river.JobCancel{...})), keep the errors.As assertion as-is; otherwise, if you return a sentinel error, change the assertion to compare directly with that sentinel.
  4. If your worker uses a different method name than Work (e.g., Handle, Process), update the test accordingly.

t.Fatal("IsPermanent should see through an outer fmt.Errorf wrap")
}
}

func TestPermanentOfNilIsNil(t *testing.T) {
if Permanent(nil) != nil {
t.Fatal("Permanent(nil) must be nil so success paths stay clean")
}
if IsPermanent(nil) {
t.Fatal("IsPermanent(nil) must be false")
}
}

func TestOrdinaryErrorIsNotPermanent(t *testing.T) {
if IsPermanent(errors.New("storage: object not found")) {
t.Fatal("a plain (transient) error must not be classified permanent")
}
}
38 changes: 38 additions & 0 deletions pkg/queue/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,41 @@ type Queue interface {
// ErrUnknownKind is returned by Queue implementations when a job with no
// registered handler is received.
var ErrUnknownKind = errors.New("queue: no handler registered for job kind")

// PermanentError marks a handler failure as NON-retryable: the input is
// deterministically bad (e.g. an encrypted or malformed document the parser
// can't open, or content with no extractable text), so re-running the job can
// only fail the same way. A queue backend that understands retries MUST stop
// retrying a job whose handler returns a PermanentError and dead-letter it
// immediately, instead of burning the full retry budget on an outcome that
// will never change.
//
// Transient failures (a not-yet-visible source under heavy concurrent
// ingestion, a parse timeout under load, a flaky network call) are the
// opposite: they are returned as ordinary errors so the queue retries them.
type PermanentError struct{ Err error }

func (e *PermanentError) Error() string {
if e.Err == nil {
return "permanent failure"
}
return e.Err.Error()
}

func (e *PermanentError) Unwrap() error { return e.Err }

// Permanent wraps err so the queue treats the failure as non-retryable. It
// returns nil for a nil err so `return queue.Permanent(doThing())` stays
// correct on the success path.
func Permanent(err error) error {
if err == nil {
return nil
}
return &PermanentError{Err: err}
}

// IsPermanent reports whether err (or anything it wraps) is a PermanentError.
func IsPermanent(err error) bool {
var pe *PermanentError
return errors.As(err, &pe)
}
10 changes: 9 additions & 1 deletion pkg/queue/river.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,20 @@ func (w *envelopeWorker) Work(ctx context.Context, job *river.Job[envelopeArgs])
if job.JobRow != nil {
attempt, maxAttempts = job.Attempt, job.MaxAttempts
}
return h(ctx, Job{
err := h(ctx, Job{
Kind: job.Args.DomainKind,
Payload: job.Args.Payload,
Attempt: attempt,
MaxAttempts: maxAttempts,
})
// A PermanentError means the input is deterministically bad — retrying it
// can only waste the remaining attempts (and, worse, interleave confusing
// transient errors into the job's history). river.JobCancel stops all
// further retries and dead-letters the job now, preserving the real cause.
if IsPermanent(err) {
return river.JobCancel(err)
}
return err
}

// NewRiver constructs a new River-backed Queue. It opens its own pgxpool
Expand Down
28 changes: 25 additions & 3 deletions pkg/storage/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,31 @@ type Local struct {

// NewLocal returns a Local storage rooted at dir. The directory is created
// if it does not exist.
//
// dir is resolved to an ABSOLUTE path up front. A relative root (the default
// is "./data/documents") is otherwise resolved against the process's current
// working directory on every call — so if the engine is ever relaunched from a
// different directory while the queue still holds jobs that reference earlier
// uploads (River persists jobs in Postgres across restarts), the worker would
// look under a different root than the one the bytes were written to and fail
// with "object not found" on a file that is in fact on disk. Pinning the root
// to an absolute path at construction makes it stable for the process lifetime.
func NewLocal(dir string) (*Local, error) {
if err := os.MkdirAll(dir, 0o755); err != nil {
abs, err := filepath.Abs(dir)
if err != nil {
return nil, fmt.Errorf("resolve storage root %q: %w", dir, err)
}
if err := os.MkdirAll(abs, 0o755); err != nil {
return nil, fmt.Errorf("create storage root: %w", err)
}
return &Local{root: dir}, nil
return &Local{root: abs}, nil
}

// Root returns the absolute filesystem path the storage is rooted at. Exposed
// so the engine can log the resolved root at boot (the single most useful fact
// when diagnosing an "object not found" on a file that appears to be on disk).
func (l *Local) Root() string { return l.root }

func (l *Local) path(key string) string {
// Keys may include slashes; treat them as path separators.
return filepath.Join(l.root, filepath.FromSlash(key))
Expand Down Expand Up @@ -62,7 +80,11 @@ func (l *Local) Get(ctx context.Context, key string) (io.ReadCloser, Metadata, e
info, err := os.Stat(full)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, Metadata{}, ErrNotFound
// Wrap ErrNotFound (errors.Is still matches) but carry the resolved
// absolute path so the failure is self-diagnosing: a caller seeing
// this in a log can immediately tell whether it looked in the wrong
// root vs. the bytes genuinely being absent.
return nil, Metadata{}, fmt.Errorf("%w: %s", ErrNotFound, full)
Comment on lines 82 to +87

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚨 suggestion (security): Including the absolute path in the ErrNotFound wrapper may leak filesystem layout to callers/logs.

This improves diagnostics but also exposes the exact filesystem path wherever the error is surfaced. If errors can reach external boundaries (API responses, shared logs, multi-tenant contexts), this may leak host layout.

Consider either restricting the full path to internal logs (and using a more generic error message outward), or storing full in a non-string field (e.g., a custom error type) so it’s available for logging without being embedded directly in Error().

Suggested implementation:

type notFoundError struct {
	// path holds the absolute filesystem path that was probed. It is not
	// included in Error() to avoid leaking filesystem layout across process
	// boundaries, but is available to internal callers via errors.As.
	path string
}

func (e notFoundError) Error() string {
	// Preserve the existing outward-facing message while keeping the path
	// available for internal diagnostics.
	return ErrNotFound.Error()
}

func (e notFoundError) Unwrap() error {
	// Maintain errors.Is(err, ErrNotFound) semantics.
	return ErrNotFound
}

// Root returns the absolute filesystem path the storage is rooted at. Exposed
// so the engine can log the resolved root at boot (the single most useful fact
// when diagnosing an "object not found" on a file that appears to be on disk).
func (l *Local) Root() string { return l.root }
			// Wrap ErrNotFound (errors.Is still matches) but carry the resolved
			// absolute path in a structured error so it can be logged internally
			// without leaking filesystem layout via Error().
			return nil, Metadata{}, notFoundError{path: full}

To fully benefit from this change, any logging or diagnostics that previously relied on the error string to see the path should be updated to use errors.As to extract the notFoundError:

  1. Where you log these errors, add something like:
    var nfErr notFoundError
    if errors.As(err, &nfErr) {
        logger.With("path", nfErr.path).Warn("object not found")
    }
  2. Ensure that ErrNotFound continues to carry whatever user-facing message you want for external boundaries, since notFoundError.Error() now defers to ErrNotFound.Error().

}
return nil, Metadata{}, err
}
Expand Down
76 changes: 76 additions & 0 deletions pkg/storage/local_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package storage

import (
"bytes"
"context"
"errors"
"io"
"os"
"path/filepath"
"strings"
"testing"
)

func TestLocalResolvesRootToAbsolute(t *testing.T) {
// A relative root must be pinned to an absolute path so the worker reads
// from the same place regardless of the process's working directory at
// read time (the HAL-321 "object not found on a file that is on disk" bug).
dir := t.TempDir()
rel, err := filepath.Rel(mustGetwd(t), dir)
if err != nil {
t.Skipf("cannot relativise temp dir: %v", err)
}
l, err := NewLocal(rel)
if err != nil {
t.Fatalf("NewLocal: %v", err)
}
if !filepath.IsAbs(l.Root()) {
t.Fatalf("Root() = %q, want absolute", l.Root())
}
}

func TestLocalGetNotFoundCarriesPath(t *testing.T) {
l, err := NewLocal(t.TempDir())
if err != nil {
t.Fatalf("NewLocal: %v", err)
}
_, _, err = l.Get(context.Background(), "documents/missing/source.pdf")
if !errors.Is(err, ErrNotFound) {
t.Fatalf("err = %v, want ErrNotFound", err)
}
// The error must name the resolved path so an operator can immediately see
Comment on lines +32 to +41

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Strengthen the not-found error test by asserting the full absolute path (or at least that the path is absolute)

Since the change is about surfacing the resolved absolute path, this test should assert on that behavior rather than just the filename. For example, check that the error string includes l.Root() (or filepath.Join(l.Root(), ...)), or at least that the path in the message is absolute (e.g., using filepath.IsAbs). This will better protect against regressions where the filename remains but the absolute root is dropped.

Suggested implementation:

func TestLocalGetNotFoundCarriesPath(t *testing.T) {
	l, err := NewLocal(t.TempDir())
	if err != nil {
		t.Fatalf("NewLocal: %v", err)
	}

	_, _, err = l.Get(context.Background(), "documents/missing/source.pdf")
	if !errors.Is(err, ErrNotFound) {
		t.Fatalf("err = %v, want ErrNotFound", err)
	}

	msg := err.Error()

	// The error must name the resolved path so an operator can immediately see
	// WHERE the worker looked.
	if !strings.Contains(msg, "source.pdf") {
		t.Fatalf("not-found error %q should include the filename", msg)
	}

	// The error should surface the resolved absolute path rooted at l.Root().
	wantPath := filepath.Join(l.Root(), "documents", "missing", "source.pdf")
	if !strings.Contains(msg, wantPath) {
		t.Fatalf("not-found error %q should include resolved absolute path %q", msg, wantPath)
	}

	// Sanity check that the expected path is absolute; if this ever fails, the
	// test itself is misconfigured.
	if !filepath.IsAbs(wantPath) {
		t.Fatalf("test bug: wantPath %q must be absolute", wantPath)
	}
}

To compile, ensure local_test.go imports the packages used in this test:

  1. context
  2. errors
  3. strings
  4. path/filepath (likely already present, as shown in your snippet)

If any of these are missing from the import block, add them accordingly, respecting the existing grouping and style in the file.

// WHERE the worker looked.
if !strings.Contains(err.Error(), "source.pdf") {
t.Fatalf("not-found error %q should include the resolved path", err)
}
Comment on lines +41 to +45

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Assert the full resolved path, not just filename suffix.

The current check can pass without validating the key contract (resolved absolute path in the error). Assert against the expected full path built from l.Root() and key.

Suggested test hardening
 	// The error must name the resolved path so an operator can immediately see
 	// WHERE the worker looked.
-	if !strings.Contains(err.Error(), "source.pdf") {
+	wantPath := filepath.Join(l.Root(), filepath.FromSlash("documents/missing/source.pdf"))
+	if !strings.Contains(err.Error(), wantPath) {
 		t.Fatalf("not-found error %q should include the resolved path", err)
 	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// The error must name the resolved path so an operator can immediately see
// WHERE the worker looked.
if !strings.Contains(err.Error(), "source.pdf") {
t.Fatalf("not-found error %q should include the resolved path", err)
}
// The error must name the resolved path so an operator can immediately see
// WHERE the worker looked.
wantPath := filepath.Join(l.Root(), filepath.FromSlash("documents/missing/source.pdf"))
if !strings.Contains(err.Error(), wantPath) {
t.Fatalf("not-found error %q should include the resolved path", err)
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@pkg/storage/local_test.go` around lines 41 - 45, The test assertion in the
error message validation only checks if the error contains the filename suffix
"source.pdf", but it should validate the full resolved absolute path to ensure
the contract is properly tested. Construct the expected full resolved path by
combining l.Root() with the key, then update the assertion to check that
err.Error() contains this complete path instead of just the filename suffix
"source.pdf". This ensures the test properly validates that the error message
includes the resolved absolute path that an operator needs to see.

}

func TestLocalPutThenGetRoundTrip(t *testing.T) {
l, err := NewLocal(t.TempDir())
if err != nil {
t.Fatalf("NewLocal: %v", err)
}
ctx := context.Background()
want := []byte("hello vectorless")
if err := l.Put(ctx, "documents/doc1/source.pdf", bytes.NewReader(want), Metadata{}); err != nil {
t.Fatalf("Put: %v", err)
}
rc, _, err := l.Get(ctx, "documents/doc1/source.pdf")
if err != nil {
t.Fatalf("Get: %v", err)
}
defer func() { _ = rc.Close() }()
got, _ := io.ReadAll(rc)
if !bytes.Equal(got, want) {
t.Fatalf("round-trip = %q, want %q", got, want)
}
}

func mustGetwd(t *testing.T) string {
t.Helper()
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
return wd
}
Loading