-
Notifications
You must be signed in to change notification settings - Fork 1
perf: pre-size reads and skip redundant copy to cut sync memory #9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
1505af6
perf: pre-size reads and skip redundant copy to cut sync memory
9cd04f5
review: address Copilot — overflow guard, reuse readAll in tarGz, doc…
8cf4c4a
review: close resp.Body on non-success, clarify bytesConsumer is inte…
117d2aa
review: drop no-copy fast-path, drain bodies before close
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| package keepcurrent | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "io" | ||
| ) | ||
|
|
||
| // maxPreAlloc caps how much readAll will allocate up front from a reader's | ||
| // self-reported size. It guards against a bogus or hostile size (e.g. a wildly | ||
| // inflated HTTP Content-Length) turning into a giant make() that panics or | ||
| // OOMs; anything larger falls back to io.ReadAll, which grows against the bytes | ||
| // actually delivered. The bound sits comfortably above the payloads keepcurrent | ||
| // syncs in practice (a MaxMind database is well under 100MB). | ||
| const maxPreAlloc = 256 << 20 // 256 MiB | ||
|
|
||
| // readAll reads r to EOF into a single buffer. It differs from io.ReadAll only | ||
| // in that, when r can report how many bytes remain, it allocates that buffer up | ||
| // front. io.ReadAll grows its buffer by repeatedly appending and reallocating, | ||
| // so reading an N-byte payload churns through a sequence of ever-larger backing | ||
| // arrays (N/2, 3N/4, N, ...) that all become garbage. For the multi-megabyte | ||
| // payloads keepcurrent is built to sync (e.g. a ~75MB MaxMind database) that | ||
| // transient churn dominates memory on small hosts. Pre-sizing turns the read | ||
| // into a single allocation. | ||
| func readAll(r io.Reader) ([]byte, error) { | ||
| if n, ok := knownSize(r); ok && n >= 0 && n <= maxPreAlloc { | ||
| // +bytes.MinRead leaves room for the final zero-byte read that signals | ||
| // EOF, so an exactly-sized payload never forces ReadFrom to reallocate. | ||
| buf := bytes.NewBuffer(make([]byte, 0, int(n)+bytes.MinRead)) | ||
| _, err := buf.ReadFrom(r) | ||
| return buf.Bytes(), err | ||
| } | ||
| return io.ReadAll(r) | ||
| } | ||
|
|
||
| // knownSize reports the number of bytes remaining in r when r can tell us. It | ||
| // recognises the sized readers keepcurrent constructs internally (sizedReadCloser) | ||
| // as well as the standard in-memory readers (*bytes.Reader, *bytes.Buffer, | ||
| // *strings.Reader) whose Len() reports the unread remainder. | ||
| func knownSize(r io.Reader) (int64, bool) { | ||
| switch v := r.(type) { | ||
| case interface{ size() int64 }: | ||
| return v.size(), true | ||
| case interface{ Len() int }: | ||
| return int64(v.Len()), true | ||
| } | ||
| return 0, false | ||
| } | ||
|
|
||
| // sizedReadCloser couples a ReadCloser with the total number of bytes it will | ||
| // yield, so readAll can pre-size its buffer. keepcurrent wraps HTTP bodies | ||
| // (Content-Length) and extracted archive entries in this. | ||
| type sizedReadCloser struct { | ||
| io.ReadCloser | ||
| n int64 | ||
| } | ||
|
|
||
| func (s sizedReadCloser) size() int64 { return s.n } | ||
|
|
||
| // bytesReadCloser wraps an in-memory payload in a size-aware ReadCloser. | ||
| func bytesReadCloser(b []byte) io.ReadCloser { | ||
| return sizedReadCloser{ReadCloser: io.NopCloser(bytes.NewReader(b)), n: int64(len(b))} | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| package keepcurrent | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "io" | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| // unsizedReader hides any size its wrapped reader might otherwise expose (it | ||
| // implements only Read), forcing readAll down the io.ReadAll fallback path. | ||
| type unsizedReader struct{ r io.Reader } | ||
|
|
||
| func (u *unsizedReader) Read(p []byte) (int, error) { return u.r.Read(p) } | ||
|
|
||
| func TestReadAllPreSizesKnownReaders(t *testing.T) { | ||
| payload := bytes.Repeat([]byte("x"), 1<<20) // 1 MiB | ||
|
|
||
| // *bytes.Reader reports Len(), so readAll should allocate exactly once and | ||
| // not overshoot the way io.ReadAll's doubling does. | ||
| got, err := readAll(bytes.NewReader(payload)) | ||
| require.NoError(t, err) | ||
| assert.Equal(t, payload, got) | ||
| assert.Equalf(t, len(payload)+bytes.MinRead, cap(got), | ||
| "buffer for a sized reader should be pre-allocated to the payload size, not grown") | ||
|
|
||
| // sizedReadCloser (what the web/tar.gz sources return) is also recognised. | ||
| got, err = readAll(bytesReadCloser(payload)) | ||
| require.NoError(t, err) | ||
| assert.Equal(t, payload, got) | ||
| assert.Equal(t, len(payload)+bytes.MinRead, cap(got)) | ||
| } | ||
|
|
||
| func TestReadAllFallsBackForUnsizedReaders(t *testing.T) { | ||
| payload := bytes.Repeat([]byte("y"), 4096) | ||
| // An opaque reader exposes no size; readAll must still return the full data. | ||
| got, err := readAll(&unsizedReader{bytes.NewReader(payload)}) | ||
| require.NoError(t, err) | ||
| assert.Equal(t, payload, got) | ||
| } | ||
|
|
||
| func TestReadAllFallsBackWhenSizeExceedsCap(t *testing.T) { | ||
| // A reader that reports a huge size (e.g. a bogus/hostile Content-Length) | ||
| // but only delivers a small payload. readAll must not attempt the giant | ||
| // pre-allocation; it should fall back to io.ReadAll and still return the | ||
| // real bytes. | ||
| payload := bytes.Repeat([]byte("z"), 1024) | ||
| r := sizedReadCloser{ReadCloser: io.NopCloser(bytes.NewReader(payload)), n: maxPreAlloc + 1} | ||
|
|
||
| n, ok := knownSize(r) | ||
| require.True(t, ok) | ||
| require.Greater(t, n, int64(maxPreAlloc)) | ||
|
|
||
| got, err := readAll(r) | ||
| require.NoError(t, err) | ||
| assert.Equal(t, payload, got) | ||
| assert.LessOrEqual(t, cap(got), maxPreAlloc, "must not pre-allocate the reported (bogus) size") | ||
| } | ||
|
|
||
| func TestKnownSize(t *testing.T) { | ||
| n, ok := knownSize(bytes.NewReader(make([]byte, 42))) | ||
| assert.True(t, ok) | ||
| assert.EqualValues(t, 42, n) | ||
|
|
||
| n, ok = knownSize(bytesReadCloser(make([]byte, 7))) | ||
| assert.True(t, ok) | ||
| assert.EqualValues(t, 7, n) | ||
|
|
||
| _, ok = knownSize(&unsizedReader{}) | ||
| assert.False(t, ok) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.