Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions internal/httputil/conditional.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,23 @@ func ConditionalOptions(r *http.Request) []client.RequestOption {
return opts
}

// RangeOptions extracts only the Range/If-Range options from r, for callers
// that evaluate If-Match/If-None-Match separately (e.g. via CheckConditionals)
// and so must not double-handle them by forwarding the full set to Open.
// If-Range is only meaningful alongside Range, so it is dropped when Range is
// absent.
func RangeOptions(r *http.Request) []client.RequestOption {
v := r.Header.Get("Range")
if v == "" {
return nil
}
opts := []client.RequestOption{func(o *client.RequestOptions) { o.Range = v }}
if ir := r.Header.Get("If-Range"); ir != "" {
opts = append(opts, client.IfRange(ir))
}
return opts
}

// CheckConditionals evaluates RFC 7232 If-Match and If-None-Match precondition
// headers on r against etag. It returns 0 when all preconditions pass,
// otherwise the HTTP status the caller should send: 412 Precondition Failed for
Expand Down
104 changes: 79 additions & 25 deletions internal/strategy/git/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,24 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request,
}
s.maybeBackgroundFetch(repo)

reader, headers, err := s.cache.Open(ctx, cacheKey)
// Forward only Range/If-Range here; If-Match/If-None-Match are evaluated
// against the served body below via CheckConditionals.
reader, headers, err := s.cache.Open(ctx, cacheKey, httputil.RangeOptions(r)...)
if errors.Is(err, cache.ErrRangeNotSatisfiable) {
// A failed If-Match (412) or satisfied If-None-Match (304) takes
// precedence over an unsatisfiable range (RFC 7232 §3, RFC 7233 §3.1).
if status := httputil.CheckConditionals(r, headers.Get(cache.ETagKey)); status != 0 {
w.WriteHeader(status)
return
}
w.Header().Set("Content-Type", "application/zstd")
w.Header().Set("Accept-Ranges", "bytes")
if cr := headers.Get("Content-Range"); cr != "" {
w.Header().Set("Content-Range", cr)
}
w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
return
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
logger.ErrorContext(ctx, "Failed to open snapshot from cache", "upstream", upstreamURL, "error", err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
Expand Down Expand Up @@ -517,23 +534,42 @@ func (s *Strategy) handleBundleRequest(w http.ResponseWriter, r *http.Request, h
}

func (s *Strategy) serveSnapshotWithBundle(ctx context.Context, w http.ResponseWriter, r *http.Request, reader io.ReadCloser, headers http.Header, repo *gitclone.Repository, upstreamURL, repoName string, start time.Time) error {
snapshotCommit := headers.Get("X-Cachew-Snapshot-Commit")
mirrorHead := s.getMirrorHead(ctx, repo)

// Forward the snapshot commit to the client so it knows whether the
// snapshot is fresh (no bundle URL = already at HEAD, skip freshen).
if snapshotCommit != "" {
w.Header().Set("X-Cachew-Snapshot-Commit", snapshotCommit)
// If-Match/If-None-Match are evaluated before serving any body: a satisfied
// If-None-Match (304) or a failed If-Match (412) takes precedence over a
// range response, so revalidating clients are not handed a 206 they would
// have to discard (RFC 7232 §3, RFC 7233 §3.1).
if status := httputil.CheckConditionals(r, headers.Get(cache.ETagKey)); status != 0 {
w.WriteHeader(status)
Comment on lines +541 to +542

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve metadata when returning 304

When a GET or ranged GET includes a matching If-None-Match, this early return writes the 304 before applySnapshotCacheHeaders or setSnapshotMetadataHeaders run. The previous GET path populated the ETag and X-Cachew-* headers before the conditional status, and stale snapshots still need X-Cachew-Bundle-Url so a client revalidating a cached tarball can apply the delta instead of treating that cached snapshot as current.

Useful? React with 👍 / 👎.

s.metrics.recordSnapshotServe(ctx, "cache", repoName, 0, time.Since(start))
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
span.SetAttributes(attribute.String("cachew.source", "cache"), attribute.Int64("cachew.bytes", 0))
}
return nil
}

if snapshotCommit != "" && mirrorHead != "" && snapshotCommit != mirrorHead {
repoPath, err := gitclone.RepoPathFromURL(upstreamURL)
if err == nil {
bundleURL := fmt.Sprintf("/git/%s/snapshot.bundle?base=%s", repoPath, snapshotCommit)
w.Header().Set("X-Cachew-Bundle-Url", bundleURL)
// A satisfied byte range is served as-is. Bundle negotiation applies only to
// whole-snapshot downloads, so a partial read (e.g. a client.ParallelGet
// chunk) skips it and returns 206 directly.
if cr := headers.Get("Content-Range"); cr != "" {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Check validators before serving snapshot ranges

When a client sends a satisfiable Range together with a matching If-None-Match (or a stale If-Match), handleSnapshotRequest now opens the cache with RangeOptions, so those validators are not evaluated by the cache; this Content-Range branch then returns before the CheckConditionals call below. That changes conditional range requests from the expected 304/412 into a 206 body response, so revalidating clients redownload chunks and failed If-Match preconditions are not enforced.

Useful? React with 👍 / 👎.

applySnapshotCacheHeaders(w, headers)
w.Header().Set("Accept-Ranges", "bytes")
w.Header().Set("Content-Range", cr)
// Ranged clients (client.ParallelGet) read the freshen metadata from the
// discovery chunk, so it must be present on partial responses too.
s.setSnapshotMetadataHeaders(ctx, w, headers, repo, upstreamURL)
w.WriteHeader(http.StatusPartialContent)
n, err := io.Copy(w, reader)
s.metrics.recordSnapshotServe(ctx, "cache_range", repoName, n, time.Since(start))
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
span.SetAttributes(attribute.String("cachew.source", "cache_range"), attribute.Int64("cachew.bytes", n))
}
return errors.Wrap(err, "stream snapshot range")
}

snapshotCommit, bundleURL := s.setSnapshotMetadataHeaders(ctx, w, headers, repo, upstreamURL)

// Proactively generate and cache the bundle so any pod can serve it.
// Proactively generate and cache the advertised bundle so any pod can serve it.
if bundleURL != "" {
go func() {
bgCtx := context.WithoutCancel(ctx)
logger := logging.FromContext(bgCtx)
Expand All @@ -550,25 +586,43 @@ func (s *Strategy) serveSnapshotWithBundle(ctx context.Context, w http.ResponseW
}

applySnapshotCacheHeaders(w, headers)
w.Header().Set("Accept-Ranges", "bytes")

// Honour conditional GETs against the advertised ETag. ServeContent does this
// natively for *os.File readers, but cache backends returning non-file readers
// (S3, memory, remote) fall through to io.Copy, so revalidate explicitly to
// avoid streaming the full snapshot when the client already has it.
var n int64
var err error
if status := httputil.CheckConditionals(r, headers.Get(cache.ETagKey)); status != 0 {
w.WriteHeader(status)
} else {
n, err = serveReaderFast(w, r, reader)
}
n, err := serveReaderFast(w, r, reader)
s.metrics.recordSnapshotServe(ctx, "cache", repoName, n, time.Since(start))
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
span.SetAttributes(attribute.String("cachew.source", "cache"), attribute.Int64("cachew.bytes", n))
}
return errors.Wrap(err, "stream snapshot")
}

// setSnapshotMetadataHeaders advertises the snapshot's commit and, when the
// snapshot trails the mirror's HEAD, the delta-bundle URL clients use to
// fast-forward. Shared by the full and ranged serve paths so ranged clients
// (client.ParallelGet) receive the same freshen metadata on the discovery
// chunk. It returns the snapshot commit and the bundle URL it set (empty when
// the snapshot is already at HEAD), so callers can decide whether to
// pre-generate the bundle.
func (s *Strategy) setSnapshotMetadataHeaders(ctx context.Context, w http.ResponseWriter, headers http.Header, repo *gitclone.Repository, upstreamURL string) (snapshotCommit, bundleURL string) {
snapshotCommit = headers.Get("X-Cachew-Snapshot-Commit")
if snapshotCommit == "" {
return "", ""
}
w.Header().Set("X-Cachew-Snapshot-Commit", snapshotCommit)

mirrorHead := s.getMirrorHead(ctx, repo)
if mirrorHead == "" || snapshotCommit == mirrorHead {
return snapshotCommit, ""
}
repoPath, err := gitclone.RepoPathFromURL(upstreamURL)
if err != nil {
return snapshotCommit, ""
}
bundleURL = fmt.Sprintf("/git/%s/snapshot.bundle?base=%s", repoPath, snapshotCommit)
w.Header().Set("X-Cachew-Bundle-Url", bundleURL)
return snapshotCommit, bundleURL
}

// applySnapshotCacheHeaders forwards the cached snapshot's validators so clients
// can revalidate (ETag) and size the transfer (Content-Length). Content-Type is
// fixed for snapshots regardless of what the cache backend recorded.
Expand Down
106 changes: 106 additions & 0 deletions internal/strategy/git/snapshot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -930,3 +931,108 @@ func TestSnapshotRemoteURLUsesUpstreamURL(t *testing.T) {
assert.NoError(t, err, string(output))
assert.Equal(t, upstreamURL+"\n", string(output))
}

func TestSnapshotGetHonorsRange(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found in PATH")
}

_, ctx := logging.Configure(context.Background(), logging.Config{})
tmpDir := t.TempDir()
mirrorRoot := filepath.Join(tmpDir, "mirrors")
upstreamURL := "https://github.com/org/repo"
mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo")
createTestMirrorRepo(t, mirrorPath)

// The memory cache returns a non-*os.File reader, so range handling must come
// from the strategy forwarding Range to Open rather than http.ServeContent.
memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{MaxTTL: time.Hour})
assert.NoError(t, err)
mux := newTestMux()

cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil)
s, err := git.New(ctx, git.Config{}, newTestScheduler(ctx, t), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil
assert.NoError(t, err)

manager, err := cm()
assert.NoError(t, err)
repo, err := manager.GetOrCreate(ctx, upstreamURL)
assert.NoError(t, err)

waitForReady(t, s)
err = s.GenerateAndUploadSnapshot(ctx, repo)
assert.NoError(t, err)

handler := mux.handlers["GET /git/{host}/{path...}"]
assert.NotZero(t, handler)

get := func(rangeHeader string) *httptest.ResponseRecorder {
req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/snapshot.tar.zst", nil)
req = req.WithContext(ctx)
req.SetPathValue("host", "github.com")
req.SetPathValue("path", "org/repo/snapshot.tar.zst")
if rangeHeader != "" {
req.Header.Set("Range", rangeHeader)
}
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
return w
}

// Full GET advertises range support and yields the whole body.
full := get("")
assert.Equal(t, 200, full.Code)
assert.Equal(t, "bytes", full.Header().Get("Accept-Ranges"))
body := full.Body.Bytes()
assert.True(t, len(body) > 4, "snapshot body should be larger than the test range")
commit := full.Header().Get("X-Cachew-Snapshot-Commit")
assert.NotZero(t, commit, "full GET should advertise the snapshot commit")

// A satisfiable range returns 206 with the matching bytes and Content-Range.
partial := get("bytes=0-3")
assert.Equal(t, http.StatusPartialContent, partial.Code)
assert.Equal(t, "bytes", partial.Header().Get("Accept-Ranges"))
assert.Equal(t, "bytes 0-3/"+strconv.Itoa(len(body)), partial.Header().Get("Content-Range"))
assert.Equal(t, body[:4], partial.Body.Bytes())
// Ranged clients must receive the same freshen metadata as a full GET so
// they can apply a delta bundle after a parallel download.
assert.Equal(t, commit, partial.Header().Get("X-Cachew-Snapshot-Commit"))

// A range beyond the object is not satisfiable.
tooBig := get("bytes=" + strconv.Itoa(len(body)+10) + "-" + strconv.Itoa(len(body)+20))
assert.Equal(t, http.StatusRequestedRangeNotSatisfiable, tooBig.Code)
assert.Equal(t, "bytes */"+strconv.Itoa(len(body)), tooBig.Header().Get("Content-Range"))

etag := full.Header().Get("ETag")
assert.NotZero(t, etag, "snapshot should advertise an ETag")

getCond := func(rangeHeader string, condHeaders map[string]string) *httptest.ResponseRecorder {
req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/snapshot.tar.zst", nil)
req = req.WithContext(ctx)
req.SetPathValue("host", "github.com")
req.SetPathValue("path", "org/repo/snapshot.tar.zst")
req.Header.Set("Range", rangeHeader)
for k, v := range condHeaders {
req.Header.Set(k, v)
}
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
return w
}

// Conditional validators take precedence over the range: a matching
// If-None-Match revalidates to 304 and a stale If-Match fails with 412,
// rather than returning a 206 body the client would discard.
notModified := getCond("bytes=0-3", map[string]string{"If-None-Match": etag})
assert.Equal(t, http.StatusNotModified, notModified.Code)
assert.Equal(t, 0, notModified.Body.Len())

preconditionFailed := getCond("bytes=0-3", map[string]string{"If-Match": `"stale-etag"`})
assert.Equal(t, http.StatusPreconditionFailed, preconditionFailed.Code)

// An unsatisfiable range with a stale If-Match is a precondition failure
// (412), not a 416.
beyond := strconv.Itoa(len(body)+10) + "-" + strconv.Itoa(len(body)+20)
rangeWithStaleIfMatch := getCond("bytes="+beyond, map[string]string{"If-Match": `"stale-etag"`})
assert.Equal(t, http.StatusPreconditionFailed, rangeWithStaleIfMatch.Code)
}