From 3efb095b0345a16f3478003d88a0b633830b2dc1 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Thu, 18 Jun 2026 10:08:16 +0200 Subject: [PATCH 1/8] Documentation/technical: add paint-down-to-common doc Add a technical document describing the paint_down_to_common() algorithm used for merge-base computation, covering the paint walk, generation number regions, and termination conditions. Signed-off-by: Kristofer Karlsson --- Documentation/Makefile | 1 + Documentation/technical/meson.build | 1 + .../technical/paint-down-to-common.adoc | 135 ++++++++++++++++++ commit-reach.c | 6 +- 4 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 Documentation/technical/paint-down-to-common.adoc diff --git a/Documentation/Makefile b/Documentation/Makefile index 2699f0b24af192..f8dea4b3953250 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -129,6 +129,7 @@ TECH_DOCS += technical/long-running-process-protocol TECH_DOCS += technical/multi-pack-index TECH_DOCS += technical/packfile-uri TECH_DOCS += technical/pack-heuristics +TECH_DOCS += technical/paint-down-to-common TECH_DOCS += technical/parallel-checkout TECH_DOCS += technical/partial-clone TECH_DOCS += technical/platform-support diff --git a/Documentation/technical/meson.build b/Documentation/technical/meson.build index ec07088c57617f..9ce11d5e484d9c 100644 --- a/Documentation/technical/meson.build +++ b/Documentation/technical/meson.build @@ -18,6 +18,7 @@ articles = [ 'multi-pack-index.adoc', 'packfile-uri.adoc', 'pack-heuristics.adoc', + 'paint-down-to-common.adoc', 'parallel-checkout.adoc', 'partial-clone.adoc', 'platform-support.adoc', diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc new file mode 100644 index 00000000000000..a4dfcba038dd4d --- /dev/null +++ b/Documentation/technical/paint-down-to-common.adoc @@ -0,0 +1,135 @@ +Merge-Base Computation and paint_down_to_common() +================================================== + +The function `paint_down_to_common()` in `commit-reach.c` computes merge +bases by walking the commit graph backwards from two sets of tips and +finding where their ancestry meets. + +Use cases +--------- + +Computing merge bases is used in two different ways: + + 1. *Finding all merge bases* (`merge-base --all`, `merge-tree`, + `merge`, `rebase`). A merge base is a common ancestor that is + not itself an ancestor of another common ancestor. + + 2. *Ancestry checks* (`in_merge_bases`, used by `merge-base + --is-ancestor`, `branch -d`, `fetch`). These ask: "is commit A + an ancestor of commit B?" If a common ancestor equals one of the + inputs, that input is necessarily the only merge base -- no other + common ancestor can be both as recent and not an ancestor of it. + +Both use cases share the same algorithm and implementation. + +Algorithm +--------- + +Given a commit `one` and a set of commits `twos[]`, the walk paints +commits with two colors: + + - PARENT1: reachable from `one` + - PARENT2: reachable from any commit in `twos[]` + +The walk uses a priority queue ordered by generation number (falling +back to commit date when generation numbers are unavailable). Each +step dequeues the highest-priority commit (this is when we say a +commit is "visited") and propagates its paint flags to its parents, +enqueuing them if they gained new flags. When a commit receives +both PARENT1 and PARENT2, it is a merge-base candidate. A candidate +gains the STALE flag so its ancestors propagate staleness -- any +deeper common ancestor is necessarily redundant. + +INFINITY and finite generation regions +-------------------------------------- + +The commit-graph stores a generation number for each commit. Commits +not in the commit-graph have generation `GENERATION_NUMBER_INFINITY`. The +graph is closed under reachability: if a commit is in the graph, all +its ancestors are too. This partitions the commit graph into two regions: + +.... + +---------------------------------------+ + | INFINITY region | + | generation = INFINITY | + | queue order: heuristic (commit date) | + +---------------------------------------+ + | + v + +---------------------------------------+ + | Finite region | + | generation = finite | + | queue order: topological | + +---------------------------------------+ +.... + +When the commit-graph is enabled, the INFINITY region is typically +very small -- it only contains commits added since the last +commit-graph refresh. + +All reachable INFINITY-generation commits are visited before any +finite-generation commit, because INFINITY is larger than any finite +value. Once the walk crosses into the finite region, it stays there. + +In the finite region, generation ordering guarantees topological +traversal: children are always visited before their parents. This +means that paint on already-visited commits is final -- no future +traversal step can add paint to them. + +In the INFINITY region, commit-date ordering can violate this: a +parent with a later date can be visited before a child with an earlier +date. Paint flags are therefore NOT final at visit time, and a +commit visited with only one side's paint may later gain the other. + +Paint flags are only added, never removed. Since each flag can be set +at most once per commit, the number of times a commit can be +re-enqueued is bounded by the number of flag transitions. + +Termination +----------- + +The walk uses a `nonstale_queue` wrapper around `prio_queue` that +tracks `max_nonstale`: the lowest-priority non-stale commit enqueued +so far. Once that commit is dequeued, every remaining entry is known +to be STALE and the loop terminates. Specifically, the main loop +ends when one of the following conditions holds: + + 1. The queue is empty. + 2. `max_nonstale` has been dequeued, meaning the queue only contains + STALE entries. + 3. Generation cutoff: the dequeued commit's generation is below + a caller-supplied `min_generation` threshold. + 4. Single result: the caller only needs one merge base, one has + been found, and the walk has entered the finite-generation + region. + +Stale entry condition +~~~~~~~~~~~~~~~~~~~~~ +Once all queued entries are stale, no new merge-base candidates can +be discovered -- that requires at least one non-stale commit from +each side meeting. Continuing the walk could still invalidate +existing candidates by proving one is an ancestor of another, but +`remove_redundant()` handles that as a post-processing step, so it +is safe to exit early. + +Generation cutoff +~~~~~~~~~~~~~~~~~ +Some callers (notably `remove_redundant()`) supply a `min_generation` +threshold -- the minimum generation of the input commits. No merge +base can have a generation below this threshold, so the walk +terminates as soon as it dequeues such a commit. + +Single result +~~~~~~~~~~~~~ +When only one merge base is needed and the walk is in the +finite-generation region, the first candidate found is necessarily +the highest-generation common ancestor. No remaining commit in the +queue can be a descendant of this candidate (generation ordering +guarantees children are visited first), so it cannot be redundant +and the walk can stop immediately. + +Related documentation +--------------------- + + - `Documentation/technical/commit-graph.adoc` -- generation numbers + and the reachability closure property. diff --git a/commit-reach.c b/commit-reach.c index 5df471a313cf6b..a9483759e06c59 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -96,7 +96,11 @@ static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue) return commit; } -/* all input commits in one and twos[] must have been parsed! */ +/* + * See Documentation/technical/paint-down-to-common.adoc + * + * All input commits in one and twos[] must have been parsed! + */ static int paint_down_to_common(struct repository *r, struct commit *one, int n, struct commit **twos, From 1a0154b406343ba7e43bc60da45c4a5deadc7e30 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Sun, 14 Jun 2026 13:03:32 +0200 Subject: [PATCH 2/8] t6600: add test cases for side-exhaustion edge cases Add test cases to t6600-test-reach.sh that exercise edge cases in the side-exhaustion optimization for paint_down_to_common(): - in_merge_bases_many:self: commit is both A and one of the X inputs - get_merge_bases_many:duplicate-twos: duplicate entries in X list - get_merge_bases_many:pending-stale: STALE transition on an already-painted commit (ps-* diamond topology) - get_merge_bases_many:infinity-both-sides: both tips outside the commit-graph with non-monotonic dates (pi-* topology) Signed-off-by: Elijah Newren Signed-off-by: Kristofer Karlsson --- t/t6600-test-reach.sh | 111 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index b5b314e57068f9..c2e091aad11a6d 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -49,6 +49,62 @@ test_expect_success 'setup' ' git tag -a -m "$x-$i" tag-$x-$i commit-$x-$i || return 1 done done && + + # Build a small side topology to exercise the (PARENT1|PARENT2) -> + # (PARENT1|PARENT2|STALE) transition in paint_down_to_common(); the + # 10x10 grid above does not exercise it because no merge-base candidate + # there is a descendant of another, so STALE never reaches a + # still-pending candidate. + # + # ps-X + # /|\ + # / | \ + # ps-Z ps-B ps-W + # | / \ | + # | / \ | + # |/ \| + # ps-T1 ps-T2 + # + # where ps-T1=merge(ps-Z,ps-B), ps-T2=merge(ps-W,ps-B), so + # merge-base(ps-T1,ps-T2) = ps-B. During the walk, ps-X transitions + # to (PARENT1|PARENT2) via ps-Z and ps-W before ps-B is dequeued; + # then the STALE-walk from ps-B transitions ps-X to + # (PARENT1|PARENT2|STALE). + git checkout --orphan ps-orphan && + test_commit ps-X && + git checkout -b ps-B-br ps-X && test_commit ps-B && + git checkout -b ps-Z-br ps-X && test_commit ps-Z && + git checkout -b ps-W-br ps-X && test_commit ps-W && + git checkout -b ps-T1 ps-Z && + git merge --no-ff -m ps-T1 ps-B && + git checkout -b ps-T2 ps-W && + git merge --no-ff -m ps-T2 ps-B && + + # Build a side topology that lives entirely outside the half + # commit-graph and has non-monotonic commit dates, to exercise the + # INFINITY-gate in paint_down_to_common. With both tips outside + # the graph, generation is INFINITY and the queue falls back to + # commit-date order, which here is non-monotonic. + # + # pi-X (date 500, PARENT1 tip) --> pi-P, pi-D + # pi-D (date 480) --> pi-C + # pi-C (date 200) --> pi-B + # pi-B (date 100, PARENT2 tip) --> pi-P + # pi-P (date 450, root) + # + # merge-base(pi-X, pi-B) = pi-B (it is an ancestor of pi-X and is + # itself one of the queried tips). + git checkout --orphan pi-orphan && + test_commit --date "@450 +0000" pi-P && + test_commit --date "@100 +0000" pi-B && + test_commit --date "@200 +0000" pi-C && + test_commit --date "@480 +0000" pi-D && + GIT_AUTHOR_DATE="@500 +0000" GIT_COMMITTER_DATE="@500 +0000" \ + git commit-tree -p pi-D -p pi-P -m pi-X pi-D^{tree} >pi-X-oid && + pi_x="$(cat pi-X-oid)" && + git branch -f pi-X-br "$pi_x" && + git tag pi-X "$pi_x" && + git commit-graph write --reachable && mv .git/objects/info/commit-graph commit-graph-full && chmod u+w commit-graph-full && @@ -146,6 +202,16 @@ test_expect_success 'in_merge_bases_many:miss-heuristic' ' test_all_modes in_merge_bases_many ' +test_expect_success 'in_merge_bases_many:self' ' + cat >input <<-\EOF && + A:commit-6-8 + X:commit-5-9 + X:commit-6-8 + EOF + echo "in_merge_bases_many(A,X):1" >expect && + test_all_modes in_merge_bases_many +' + test_expect_success 'is_descendant_of:hit' ' cat >input <<-\EOF && A:commit-5-7 @@ -183,6 +249,51 @@ test_expect_success 'get_merge_bases_many' ' test_all_modes get_merge_bases_many ' +test_expect_success 'get_merge_bases_many:duplicate-twos' ' + cat >input <<-\EOF && + A:commit-5-7 + X:commit-4-8 + X:commit-4-8 + X:commit-6-6 + X:commit-6-6 + X:commit-8-3 + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse commit-5-6 \ + commit-4-7 | sort + } >expect && + test_all_modes get_merge_bases_many +' + +test_expect_success 'get_merge_bases_many:pending-stale' ' + # Exercises the (PARENT1|PARENT2) -> (...|STALE) transition path in + # paint_down_to_common(). See the topology comment in the setup test. + cat >input <<-\EOF && + A:ps-T1 + X:ps-T2 + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse ps-B + } >expect && + test_all_modes get_merge_bases_many +' + +test_expect_success 'get_merge_bases_many:infinity-both-sides' ' + # Exercises the push-time INFINITY-gate in paint_down_to_common(). See + # the pi-* topology comment in the setup test. + cat >input <<-\EOF && + A:pi-X + X:pi-B + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse pi-B + } >expect && + test_all_modes get_merge_bases_many +' + test_expect_success 'reduce_heads' ' cat >input <<-\EOF && X:commit-1-10 From 017bf156c57ceb2df33b316d4537baf3a72672fd Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Fri, 12 Jun 2026 15:33:49 +0200 Subject: [PATCH 3/8] t6099, t6600: add side-exhaustion regression tests Add t6099 to test the case where multiple merge-base candidates exist and one is an ancestor of another. This exercises the side-exhaustion optimization in paint_down_to_common together with the remove_redundant safety net in get_merge_bases_many_0. Add a mixed finite/INFINITY test to t6600 where one tip is outside the commit-graph (INFINITY generation) and the other is inside. This exercises the region transition: the walk starts in the INFINITY region where side-exhaustion is disabled, then crosses into the finite region where it can fire. Signed-off-by: Kristofer Karlsson --- t/meson.build | 1 + t/t6099-merge-base-side-exhaustion.sh | 82 +++++++++++++++++++++++++++ t/t6600-test-reach.sh | 25 ++++++++ 3 files changed, 108 insertions(+) create mode 100755 t/t6099-merge-base-side-exhaustion.sh diff --git a/t/meson.build b/t/meson.build index 3219264fe7d497..ee6ebdffb92840 100644 --- a/t/meson.build +++ b/t/meson.build @@ -786,6 +786,7 @@ integration_tests = [ 't6041-bisect-submodule.sh', 't6050-replace.sh', 't6060-merge-index.sh', + 't6099-merge-base-side-exhaustion.sh', 't6100-rev-list-in-order.sh', 't6101-rev-parse-parents.sh', 't6102-rev-list-unexpected-objects.sh', diff --git a/t/t6099-merge-base-side-exhaustion.sh b/t/t6099-merge-base-side-exhaustion.sh new file mode 100755 index 00000000000000..4f1e0d50efbf38 --- /dev/null +++ b/t/t6099-merge-base-side-exhaustion.sh @@ -0,0 +1,82 @@ +#!/bin/sh + +test_description='merge-base with ancestor among merge-base candidates + +Test that merge-base --all correctly handles cases where +multiple merge-base candidates exist and one is an ancestor +of another. The side-exhaustion optimization in +paint_down_to_common may exit before STALE propagation +removes the ancestor, but remove_redundant catches it. + +Graph shape (parents are below children): + + A ----------- X + |\ /| + | B---------/ | + | | | + e2 \ f2 + | | | + e1 d1 f1 + \ | / + \ | / + \| / + C + +A and X are the two tips. +B and C are both reachable from A and X. +B reaches C through d1. +Only B should appear in merge-base --all output. +' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_expect_success 'setup ancestor merge-base candidate' ' + test_commit C && + + git checkout -b d-chain HEAD && + test_commit d1 && + test_commit B && + + git checkout -b e-path C && + test_commit e1 && + test_commit e2 && + + git checkout -b f-path C && + test_commit f1 && + test_commit f2 && + + git checkout -b branch-A e-path && + test_merge A B && + + git checkout -b branch-X f-path && + test_merge X B && + + git commit-graph write --reachable +' + +test_expect_success 'merge-base --all excludes ancestor candidate' ' + git rev-parse B >expected && + git merge-base --all A X >actual && + test_cmp expected actual +' + +test_expect_success 'merge-base (single) finds shallowest' ' + git rev-parse B >expected && + git merge-base A X >actual && + test_cmp expected actual +' + +# Without commit-graph: generation numbers are INFINITY, +# side-exhaustion optimization does not fire. +test_expect_success 'merge-base --all without commit-graph' ' + rm -f .git/objects/info/commit-graph && + git rev-parse B >expected && + git merge-base --all A X >actual && + test_cmp expected actual +' + +test_done diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index c2e091aad11a6d..4b771b4c58748a 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -294,6 +294,31 @@ test_expect_success 'get_merge_bases_many:infinity-both-sides' ' test_all_modes get_merge_bases_many ' +test_expect_success 'setup mixed finite/INFINITY topology' ' + # Create a commit outside all saved commit-graph files so it always + # has INFINITY generation, while its parent (ps-X) is in the graph + # with a finite generation. Use the ps-* orphan topology so we do + # not pollute the grid-based rev-list tests. + git checkout ps-X && + test_env GIT_TEST_COMMIT_GRAPH= test_commit pm-INF +' + +test_expect_success 'get_merge_bases_many:mixed-finite-infinity' ' + # One tip (pm-INF) is outside the commit-graph with INFINITY + # generation; the other (ps-B) is in the graph with finite + # generation. The walk starts in the INFINITY region and crosses + # into the finite region where side-exhaustion can fire. + cat >input <<-\EOF && + A:pm-INF + X:ps-B + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse ps-X + } >expect && + test_all_modes get_merge_bases_many +' + test_expect_success 'reduce_heads' ' cat >input <<-\EOF && X:commit-1-10 From df3b090a2bf43737cb9b6ac9b4e769a658250bb5 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Wed, 24 Jun 2026 20:49:00 +0200 Subject: [PATCH 4/8] commit-reach: add trace2 instrumentation to paint_down_to_common() Add a step counter and trace2_data_intmax() call so that the number of commits visited during the paint walk is observable via GIT_TRACE2_EVENT. This provides a way to measure the impact of future optimizations without relying on wall-clock benchmarks alone. Signed-off-by: Kristofer Karlsson --- commit-reach.c | 5 ++++ t/t6600-test-reach.sh | 53 ++++++++++++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index a9483759e06c59..f6a438550b7902 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -11,6 +11,7 @@ #include "tag.h" #include "commit-reach.h" #include "ewah/ewok.h" +#include "trace2.h" /* Remember to update object flag allocation in object.h */ #define PARENT1 (1u<<16) @@ -112,6 +113,7 @@ static int paint_down_to_common(struct repository *r, { compare_commits_by_gen_then_commit_date } }; int i; + int steps = 0; timestamp_t last_gen = GENERATION_NUMBER_INFINITY; struct commit_list **tail = result; @@ -135,6 +137,7 @@ static int paint_down_to_common(struct repository *r, struct commit_list *parents; int flags; timestamp_t generation = commit_graph_generation(commit); + steps++; if (min_generation && generation > last_gen) BUG("bad generation skip %"PRItime" > %"PRItime" at %s", @@ -190,6 +193,8 @@ static int paint_down_to_common(struct repository *r, } clear_nonstale_queue(&queue); + trace2_data_intmax("paint_down_to_common", r, + "steps", steps); commit_list_sort_by_date(result); return 0; } diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 4b771b4c58748a..b3a31b80acdf66 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -118,24 +118,34 @@ test_expect_success 'setup' ' ' run_all_modes () { - test_when_finished rm -rf .git/objects/info/commit-graph && - "$@" actual && - test_cmp expect actual && - cp commit-graph-full .git/objects/info/commit-graph && - "$@" actual && - test_cmp expect actual && - cp commit-graph-half .git/objects/info/commit-graph && - "$@" actual && - test_cmp expect actual && - cp commit-graph-no-gdat .git/objects/info/commit-graph && - "$@" actual && - test_cmp expect actual + graph=.git/objects/info/commit-graph && + test_when_finished rm -rf "$graph" "${graph}s" && + rm -f trace-mode-*.txt && + + for mode in none full half no-gdat + do + rm -rf "$graph" "${graph}s" && + cp "commit-graph-${mode}" "$graph" 2>/dev/null || + true && + GIT_TRACE2_EVENT="$(pwd)/trace-mode-${mode}.txt" \ + "$@" actual && + test_cmp expect actual || return 1 + done } test_all_modes () { run_all_modes test-tool reach "$@" } +test_paint_down_steps () { + for mode in none full half no-gdat + do + test_trace2_data paint_down_to_common steps "$1" \ + <"trace-mode-${mode}.txt" || return 1 + shift + done +} + test_expect_success 'ref_newer:miss' ' cat >input <<-\EOF && A:commit-5-7 @@ -209,7 +219,8 @@ test_expect_success 'in_merge_bases_many:self' ' X:commit-6-8 EOF echo "in_merge_bases_many(A,X):1" >expect && - test_all_modes in_merge_bases_many + test_all_modes in_merge_bases_many && + test_paint_down_steps 45 2 25 3 ' test_expect_success 'is_descendant_of:hit' ' @@ -277,7 +288,8 @@ test_expect_success 'get_merge_bases_many:pending-stale' ' echo "get_merge_bases_many(A,X):" && git rev-parse ps-B } >expect && - test_all_modes get_merge_bases_many + test_all_modes get_merge_bases_many && + test_paint_down_steps 6 6 6 6 ' test_expect_success 'get_merge_bases_many:infinity-both-sides' ' @@ -291,7 +303,8 @@ test_expect_success 'get_merge_bases_many:infinity-both-sides' ' echo "get_merge_bases_many(A,X):" && git rev-parse pi-B } >expect && - test_all_modes get_merge_bases_many + test_all_modes get_merge_bases_many && + test_paint_down_steps 5 5 5 5 ' test_expect_success 'setup mixed finite/INFINITY topology' ' @@ -316,7 +329,15 @@ test_expect_success 'get_merge_bases_many:mixed-finite-infinity' ' echo "get_merge_bases_many(A,X):" && git rev-parse ps-X } >expect && - test_all_modes get_merge_bases_many + test_all_modes get_merge_bases_many && + test_paint_down_steps 3 3 3 3 +' + +test_expect_success 'merge-base --all commit-walk steps' ' + >input && + git rev-parse commit-9-1 >expect && + run_all_modes git merge-base --all commit-9-9 commit-9-1 && + test_paint_down_steps 81 80 81 81 ' test_expect_success 'reduce_heads' ' From fed9f2c36827f17d636e0ef986b358c15fd86239 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Wed, 24 Jun 2026 20:53:35 +0200 Subject: [PATCH 5/8] commit-reach: introduce struct paint_state with per-side counters Add a paint_state struct for use by paint_down_to_common() that wraps a prio_queue with per-side commit counters. Each non-stale queued commit occupies exactly one counter bucket based on its paint flags: PARENT1-only, PARENT2-only, or both sides (a pending merge-base candidate). The counters are maintained by paint_count_update() which adjusts the appropriate bucket by a signed delta. An exhaustive switch on the paint+stale bits documents all valid flag combinations in one place. Convert paint_down_to_common() to use paint_state. The loop now drains the queue via paint_queue_get() which returns NULL when all counters reach zero, replacing the old pointer-based termination (max_nonstale). This is equivalent behavior -- both conditions detect that no non-stale entries remain. paint_queue_get() uses a "pop first" form: it dequeues a commit, then checks the counters. This means the loop exits one iteration earlier than the old code in some topologies (the popped stale commit is never processed), so a few step counts drop by one. The existing nonstale_queue is left in place for ahead_behind(). Signed-off-by: Kristofer Karlsson --- .../technical/paint-down-to-common.adoc | 9 +- commit-reach.c | 94 ++++++++++++++++--- t/t6600-test-reach.sh | 4 +- 3 files changed, 85 insertions(+), 22 deletions(-) diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc index a4dfcba038dd4d..ac3e2b39a5aeed 100644 --- a/Documentation/technical/paint-down-to-common.adoc +++ b/Documentation/technical/paint-down-to-common.adoc @@ -88,15 +88,12 @@ re-enqueued is bounded by the number of flag transitions. Termination ----------- -The walk uses a `nonstale_queue` wrapper around `prio_queue` that -tracks `max_nonstale`: the lowest-priority non-stale commit enqueued -so far. Once that commit is dequeued, every remaining entry is known -to be STALE and the loop terminates. Specifically, the main loop +The walk tracks the number of commits of each type in the queue +(PARENT1-only, PARENT2-only, pending merge-base). The main loop ends when one of the following conditions holds: 1. The queue is empty. - 2. `max_nonstale` has been dequeued, meaning the queue only contains - STALE entries. + 2. The queue contains only stale entries. 3. Generation cutoff: the dequeued commit's generation is below a caller-supplied `min_generation` threshold. 4. Single result: the caller only needs one merge base, one has diff --git a/commit-reach.c b/commit-reach.c index f6a438550b7902..9ae306f60cf123 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -97,6 +97,75 @@ static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue) return commit; } +/* + * Priority queue with per-side commit counters for paint_down_to_common(). + * Each non-stale queued commit occupies exactly one bucket: PARENT1-only, + * PARENT2-only, or both (a pending merge-base candidate). + */ +struct paint_state { + struct prio_queue queue; + size_t parent1_count; + size_t parent2_count; + size_t mb_candidate_count; +}; + +static void paint_count_update(struct paint_state *state, + unsigned flags, int delta) +{ + switch (flags & (PARENT1 | PARENT2 | STALE)) { + case PARENT1: + state->parent1_count += delta; + break; + + case PARENT2: + state->parent2_count += delta; + break; + + case PARENT1 | PARENT2: + state->mb_candidate_count += delta; + break; + + case PARENT1 | PARENT2 | STALE: + break; + + default: + BUG("unexpected paint state"); + } +} + +static void paint_queue_put(struct paint_state *state, + struct commit *c, unsigned add_flags) +{ + unsigned old_flags = c->object.flags; + c->object.flags |= add_flags; + + if (old_flags & ENQUEUED) { + paint_count_update(state, old_flags, -1); + paint_count_update(state, c->object.flags, 1); + } else { + c->object.flags |= ENQUEUED; + prio_queue_put(&state->queue, c); + paint_count_update(state, c->object.flags, 1); + } +} + +static struct commit *paint_queue_get(struct paint_state *state) +{ + struct commit *commit = prio_queue_get(&state->queue); + + if (!commit) + return NULL; + + commit->object.flags &= ~ENQUEUED; + + if (!state->parent1_count && !state->parent2_count && + !state->mb_candidate_count) + return NULL; + + paint_count_update(state, commit->object.flags, -1); + return commit; +} + /* * See Documentation/technical/paint-down-to-common.adoc * @@ -109,31 +178,29 @@ static int paint_down_to_common(struct repository *r, enum merge_base_flags mb_flags, struct commit_list **result) { - struct nonstale_queue queue = { - { compare_commits_by_gen_then_commit_date } + struct paint_state state = { + .queue = { compare_commits_by_gen_then_commit_date } }; + struct commit *commit; int i; int steps = 0; timestamp_t last_gen = GENERATION_NUMBER_INFINITY; struct commit_list **tail = result; if (!min_generation && !corrected_commit_dates_enabled(r)) - queue.pq.compare = compare_commits_by_commit_date; + state.queue.compare = compare_commits_by_commit_date; one->object.flags |= PARENT1; if (!n) { commit_list_append(one, result); return 0; } - nonstale_queue_put_dedup(&queue, one); + paint_queue_put(&state, one, 0); - for (i = 0; i < n; i++) { - twos[i]->object.flags |= PARENT2; - nonstale_queue_put_dedup(&queue, twos[i]); - } + for (i = 0; i < n; i++) + paint_queue_put(&state, twos[i], PARENT2); - while (queue.max_nonstale) { - struct commit *commit = nonstale_queue_get_dedup(&queue); + while ((commit = paint_queue_get(&state))) { struct commit_list *parents; int flags; timestamp_t generation = commit_graph_generation(commit); @@ -172,7 +239,7 @@ static int paint_down_to_common(struct repository *r, if ((p->object.flags & flags) == flags) continue; if (repo_parse_commit(r, p)) { - clear_nonstale_queue(&queue); + clear_prio_queue(&state.queue); commit_list_free(*result); *result = NULL; /* @@ -187,12 +254,11 @@ static int paint_down_to_common(struct repository *r, return error(_("could not parse commit %s"), oid_to_hex(&p->object.oid)); } - p->object.flags |= flags; - nonstale_queue_put_dedup(&queue, p); + paint_queue_put(&state, p, flags); } } - clear_nonstale_queue(&queue); + clear_prio_queue(&state.queue); trace2_data_intmax("paint_down_to_common", r, "steps", steps); commit_list_sort_by_date(result); diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index b3a31b80acdf66..51f3d70492553b 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -289,7 +289,7 @@ test_expect_success 'get_merge_bases_many:pending-stale' ' git rev-parse ps-B } >expect && test_all_modes get_merge_bases_many && - test_paint_down_steps 6 6 6 6 + test_paint_down_steps 5 5 5 5 ' test_expect_success 'get_merge_bases_many:infinity-both-sides' ' @@ -304,7 +304,7 @@ test_expect_success 'get_merge_bases_many:infinity-both-sides' ' git rev-parse pi-B } >expect && test_all_modes get_merge_bases_many && - test_paint_down_steps 5 5 5 5 + test_paint_down_steps 5 4 5 5 ' test_expect_success 'setup mixed finite/INFINITY topology' ' From 4db485b48aae810eeba28ea4feb47401ab352e88 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Tue, 23 Jun 2026 10:49:37 +0200 Subject: [PATCH 6/8] commit-reach: remove unused nonstale_queue dedup wrappers nonstale_queue_put_dedup() and nonstale_queue_get_dedup() became unused after the previous commit. The core nonstale_queue functions remain in use by ahead_behind(). Signed-off-by: Kristofer Karlsson --- commit-reach.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 9ae306f60cf123..176ffd68d0db63 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -79,24 +79,6 @@ static void clear_nonstale_queue(struct nonstale_queue *queue) queue->max_nonstale = NULL; } -static void nonstale_queue_put_dedup(struct nonstale_queue *queue, - struct commit *c) -{ - if (c->object.flags & ENQUEUED) - return; - c->object.flags |= ENQUEUED; - nonstale_queue_put(queue, c); -} - -static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue) -{ - struct commit *commit = nonstale_queue_get(queue); - - if (commit) - commit->object.flags &= ~ENQUEUED; - return commit; -} - /* * Priority queue with per-side commit counters for paint_down_to_common(). * Each non-stale queued commit occupies exactly one bucket: PARENT1-only, From 450678064972beae5f21b9135e34069a1a8ea585 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Wed, 24 Jun 2026 20:55:23 +0200 Subject: [PATCH 7/8] commit-reach: terminate merge-base walk when one paint side is exhausted Add an early termination check to paint_down_to_common() using the per-side counters introduced earlier. Once the walk enters the finite-generation region, terminate early when one side's exclusive count drops to zero -- no new merge-base can form without both paint sides meeting. The check also waits for pending_merge_bases to reach zero, ensuring all merge-base candidates have been dequeued and recorded before exiting. The INFINITY gate ensures correctness: commits without a commit-graph entry have GENERATION_NUMBER_INFINITY and are ordered by commit date, which is not topologically reliable. The optimization only fires once the walk enters the finite-generation region where ordering guarantees hold. Step counts measured with trace2 on git.git with commit-graph: merge-base --all v2.0.0 v2.55.0-rc1: before: 72264 steps after: 44589 steps merge-base --all v2.55.0-rc1 v2.55.0-rc1~5: before: 110 steps after: 7 steps Helped-by: Derrick Stolee Helped-by: Elijah Newren Signed-off-by: Kristofer Karlsson --- .../technical/paint-down-to-common.adoc | 17 +++++++++++++++++ commit-reach.c | 17 ++++++++++++++--- t/t6600-test-reach.sh | 4 ++-- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc index ac3e2b39a5aeed..15adac78851ca4 100644 --- a/Documentation/technical/paint-down-to-common.adoc +++ b/Documentation/technical/paint-down-to-common.adoc @@ -99,6 +99,9 @@ ends when one of the following conditions holds: 4. Single result: the caller only needs one merge base, one has been found, and the walk has entered the finite-generation region. + 5. Side exhaustion: no pure PARENT1 or pure PARENT2 commits + remain in the queue, no pending merge-base candidates exist, + and the walk has entered the finite-generation region. Stale entry condition ~~~~~~~~~~~~~~~~~~~~~ @@ -109,6 +112,20 @@ existing candidates by proving one is an ancestor of another, but `remove_redundant()` handles that as a post-processing step, so it is safe to exit early. +Side-exhaustion condition +~~~~~~~~~~~~~~~~~~~~~~~~~ +A new merge-base requires commits from both sides to meet. When one +side's exclusive counter reaches zero and there are no pending +merge-base candidates, no future traversal step can produce a new +candidate. + +This optimization only activates in the finite-generation region +where topological ordering holds. In that region, children are +always visited before parents, so paint flags are final at visit +time and an exhausted side cannot reappear. In the INFINITY region, +commit-date ordering can violate this guarantee, so the check is +skipped. + Generation cutoff ~~~~~~~~~~~~~~~~~ Some callers (notably `remove_redundant()`) supply a `min_generation` diff --git a/commit-reach.c b/commit-reach.c index 176ffd68d0db63..e174b219c60dd8 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -131,6 +131,10 @@ static void paint_queue_put(struct paint_state *state, } } +/* + * Dequeue the next commit for the paint walk, or return NULL when + * no more merge bases can be discovered. + */ static struct commit *paint_queue_get(struct paint_state *state) { struct commit *commit = prio_queue_get(&state->queue); @@ -140,9 +144,16 @@ static struct commit *paint_queue_get(struct paint_state *state) commit->object.flags &= ~ENQUEUED; - if (!state->parent1_count && !state->parent2_count && - !state->mb_candidate_count) - return NULL; + if (!state->mb_candidate_count) { + /* only stale entries remain */ + if (!state->parent1_count && !state->parent2_count) + return NULL; + + /* one side is exhausted */ + if ((!state->parent1_count || !state->parent2_count) && + commit_graph_generation(commit) < GENERATION_NUMBER_INFINITY) + return NULL; + } paint_count_update(state, commit->object.flags, -1); return commit; diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 51f3d70492553b..63650075603109 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -220,7 +220,7 @@ test_expect_success 'in_merge_bases_many:self' ' EOF echo "in_merge_bases_many(A,X):1" >expect && test_all_modes in_merge_bases_many && - test_paint_down_steps 45 2 25 3 + test_paint_down_steps 45 1 25 1 ' test_expect_success 'is_descendant_of:hit' ' @@ -337,7 +337,7 @@ test_expect_success 'merge-base --all commit-walk steps' ' >input && git rev-parse commit-9-1 >expect && run_all_modes git merge-base --all commit-9-9 commit-9-1 && - test_paint_down_steps 81 80 81 81 + test_paint_down_steps 81 9 57 10 ' test_expect_success 'reduce_heads' ' From 8dd15d44e6a60fc39bbf6d894628507e839f9248 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Fri, 26 Jun 2026 17:18:20 +0200 Subject: [PATCH 8/8] commit-reach: move min_generation check into paint_queue_get() Consolidate the min_generation termination condition into paint_queue_get(), alongside the existing stale-entry and side-exhaustion checks. Move last_gen into struct paint_state so that commit_graph_generation() is called exactly once per dequeued commit and the result is shared across all termination checks and the monotonicity BUG assertion. Signed-off-by: Kristofer Karlsson --- commit-reach.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index e174b219c60dd8..5c5c54d66ec660 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -89,6 +89,8 @@ struct paint_state { size_t parent1_count; size_t parent2_count; size_t mb_candidate_count; + timestamp_t min_generation; + timestamp_t last_gen; }; static void paint_count_update(struct paint_state *state, @@ -138,11 +140,23 @@ static void paint_queue_put(struct paint_state *state, static struct commit *paint_queue_get(struct paint_state *state) { struct commit *commit = prio_queue_get(&state->queue); + timestamp_t generation; if (!commit) return NULL; commit->object.flags &= ~ENQUEUED; + generation = commit_graph_generation(commit); + + if (state->min_generation && generation > state->last_gen) + BUG("bad generation skip %"PRItime" > %"PRItime" at %s", + generation, state->last_gen, + oid_to_hex(&commit->object.oid)); + state->last_gen = generation; + + /* generation cutoff */ + if (generation < state->min_generation) + return NULL; if (!state->mb_candidate_count) { /* only stale entries remain */ @@ -151,7 +165,7 @@ static struct commit *paint_queue_get(struct paint_state *state) /* one side is exhausted */ if ((!state->parent1_count || !state->parent2_count) && - commit_graph_generation(commit) < GENERATION_NUMBER_INFINITY) + generation < GENERATION_NUMBER_INFINITY) return NULL; } @@ -177,9 +191,10 @@ static int paint_down_to_common(struct repository *r, struct commit *commit; int i; int steps = 0; - timestamp_t last_gen = GENERATION_NUMBER_INFINITY; struct commit_list **tail = result; + state.min_generation = min_generation; + state.last_gen = GENERATION_NUMBER_INFINITY; if (!min_generation && !corrected_commit_dates_enabled(r)) state.queue.compare = compare_commits_by_commit_date; @@ -196,18 +211,8 @@ static int paint_down_to_common(struct repository *r, while ((commit = paint_queue_get(&state))) { struct commit_list *parents; int flags; - timestamp_t generation = commit_graph_generation(commit); steps++; - if (min_generation && generation > last_gen) - BUG("bad generation skip %"PRItime" > %"PRItime" at %s", - generation, last_gen, - oid_to_hex(&commit->object.oid)); - last_gen = generation; - - if (generation < min_generation) - break; - flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); if (flags == (PARENT1 | PARENT2)) { if (!(commit->object.flags & RESULT)) { @@ -219,7 +224,7 @@ static int paint_down_to_common(struct repository *r, * descendant of this one. */ if (!(mb_flags & MERGE_BASE_FIND_ALL) && - generation < GENERATION_NUMBER_INFINITY) + state.last_gen < GENERATION_NUMBER_INFINITY) break; } /* Mark parents of a found merge stale */