From bb1d885a348e886ded68227e3e7446f2e9a6e77d Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:06 +0200 Subject: [PATCH 1/7] refs: add function to translate errors to strings The commit 76e760b999 (refs: introduce enum-based transaction error types, 2025-04-08) introduced enum-based transaction error types. The refs transaction logic was also modified to propagate these errors. For clients of the ref transaction system, it would be beneficial to provide human readable messages for these errors. There is already an existing mapping in 'builtin/update-ref.c', move it to 'refs.c' as `ref_transaction_error_msg()` and use the same within the 'builtin/update-ref.c'. Helped-by: Junio C Hamano Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/update-ref.c | 25 +------------------------ refs.c | 20 ++++++++++++++++++++ refs.h | 5 +++++ 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/builtin/update-ref.c b/builtin/update-ref.c index 2b1e336ba1a..1e6131e04ad 100644 --- a/builtin/update-ref.c +++ b/builtin/update-ref.c @@ -575,30 +575,7 @@ static void print_rejected_refs(const char *refname, void *cb_data UNUSED) { struct strbuf sb = STRBUF_INIT; - const char *reason = ""; - - switch (err) { - case REF_TRANSACTION_ERROR_NAME_CONFLICT: - reason = "refname conflict"; - break; - case REF_TRANSACTION_ERROR_CREATE_EXISTS: - reason = "reference already exists"; - break; - case REF_TRANSACTION_ERROR_NONEXISTENT_REF: - reason = "reference does not exist"; - break; - case REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE: - reason = "incorrect old value provided"; - break; - case REF_TRANSACTION_ERROR_INVALID_NEW_VALUE: - reason = "invalid new value provided"; - break; - case REF_TRANSACTION_ERROR_EXPECTED_SYMREF: - reason = "expected symref but found regular ref"; - break; - default: - reason = "unkown failure"; - } + const char *reason = ref_transaction_error_msg(err); strbuf_addf(&sb, "rejected %s %s %s %s\n", refname, new_oid ? oid_to_hex(new_oid) : new_target, diff --git a/refs.c b/refs.c index dce5c49ca2b..3d580d03bb5 100644 --- a/refs.c +++ b/refs.c @@ -3314,3 +3314,23 @@ int ref_update_expects_existing_old_ref(struct ref_update *update) return (update->flags & REF_HAVE_OLD) && (!is_null_oid(&update->old_oid) || update->old_target); } + +const char *ref_transaction_error_msg(enum ref_transaction_error err) +{ + switch (err) { + case REF_TRANSACTION_ERROR_NAME_CONFLICT: + return "refname conflict"; + case REF_TRANSACTION_ERROR_CREATE_EXISTS: + return "reference already exists"; + case REF_TRANSACTION_ERROR_NONEXISTENT_REF: + return "reference does not exist"; + case REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE: + return "incorrect old value provided"; + case REF_TRANSACTION_ERROR_INVALID_NEW_VALUE: + return "invalid new value provided"; + case REF_TRANSACTION_ERROR_EXPECTED_SYMREF: + return "expected symref but found regular ref"; + default: + return "unknown failure"; + } +} diff --git a/refs.h b/refs.h index 46a6008e07f..2d58af3d882 100644 --- a/refs.h +++ b/refs.h @@ -907,6 +907,11 @@ void ref_transaction_for_each_rejected_update(struct ref_transaction *transactio ref_transaction_for_each_rejected_update_fn cb, void *cb_data); +/* + * Translate errors to human readable error messages. + */ +const char *ref_transaction_error_msg(enum ref_transaction_error err); + /* * Free `*transaction` and all associated data. */ -- GitLab From 344410ce07cfa2496124979f79db29c8d598c305 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:07 +0200 Subject: [PATCH 2/7] fetch: use batched reference updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference updates performed as a part of 'git-fetch(1)', take place one at a time. For each reference update, a new transaction is created and committed. This is necessary to ensure we can allow individual updates to fail without failing the entire command. The command also supports an '--atomic' mode, which uses a single transaction to update all of the references. But this mode has an all-or-nothing approach, where if a single update fails, all updates would fail. In 23fc8e4f61 (refs: implement batch reference update support, 2025-04-08), we introduced a new mechanism to batch reference updates. Under the hood, this uses a single transaction to perform a batch of reference updates, while allowing only individual updates to fail. Utilize this newly introduced batch update mechanism in 'git-fetch(1)'. This provides a significant bump in performance, especially when dealing with repositories with large number of references. Adding support for batched updates is simply modifying the flow to also create a batch update transaction in the non-atomic flow. With the reftable backend there is a 22x performance improvement, when performing 'git-fetch(1)' with 10000 refs: Benchmark 1: fetch: many refs (refformat = reftable, refcount = 10000, revision = master) Time (mean ± σ): 3.403 s ± 0.775 s [User: 1.875 s, System: 1.417 s] Range (min … max): 2.454 s … 4.529 s 10 runs Benchmark 2: fetch: many refs (refformat = reftable, refcount = 10000, revision = HEAD) Time (mean ± σ): 154.3 ms ± 17.6 ms [User: 102.5 ms, System: 56.1 ms] Range (min … max): 145.2 ms … 220.5 ms 18 runs Summary fetch: many refs (refformat = reftable, refcount = 10000, revision = HEAD) ran 22.06 ± 5.62 times faster than fetch: many refs (refformat = reftable, refcount = 10000, revision = master) In similar conditions, the files backend sees a 1.25x performance improvement: Benchmark 1: fetch: many refs (refformat = files, refcount = 10000, revision = master) Time (mean ± σ): 605.5 ms ± 9.4 ms [User: 117.8 ms, System: 483.3 ms] Range (min … max): 595.6 ms … 621.5 ms 10 runs Benchmark 2: fetch: many refs (refformat = files, refcount = 10000, revision = HEAD) Time (mean ± σ): 485.8 ms ± 4.3 ms [User: 91.1 ms, System: 396.7 ms] Range (min … max): 477.6 ms … 494.3 ms 10 runs Summary fetch: many refs (refformat = files, refcount = 10000, revision = HEAD) ran 1.25 ± 0.02 times faster than fetch: many refs (refformat = files, refcount = 10000, revision = master) With this we'll either be using a regular transaction or a batch update transaction. This helps cleanup some code which is no longer needed as we'll now always have some type of 'ref_transaction' object being propagated. One big change is that earlier, each individual update would propagate a failure. Whereas now, the `ref_transaction_for_each_rejected_update` function is called at the end of the flow to capture the exit status for 'git-fetch(1)' and also to print F/D conflict errors. This does change the order of the errors being printed, but the behavior stays the same. Since transaction errors are now explicitly defined as part of 76e760b999 (refs: introduce enum-based transaction error types, 2025-04-08), utilize them and get rid of custom errors defined within 'builtin/fetch.c'. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 127 ++++++++++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 54 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 40a0e8d2443..13c4583f69e 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -640,9 +640,6 @@ static struct ref *get_ref_map(struct remote *remote, return ref_map; } -#define STORE_REF_ERROR_OTHER 1 -#define STORE_REF_ERROR_DF_CONFLICT 2 - static int s_update_ref(const char *action, struct ref *ref, struct ref_transaction *transaction, @@ -650,7 +647,6 @@ static int s_update_ref(const char *action, { char *msg; char *rla = getenv("GIT_REFLOG_ACTION"); - struct ref_transaction *our_transaction = NULL; struct strbuf err = STRBUF_INIT; int ret; @@ -660,43 +656,10 @@ static int s_update_ref(const char *action, rla = default_rla.buf; msg = xstrfmt("%s: %s", rla, action); - /* - * If no transaction was passed to us, we manage the transaction - * ourselves. Otherwise, we trust the caller to handle the transaction - * lifecycle. - */ - if (!transaction) { - transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - 0, &err); - if (!transaction) { - ret = STORE_REF_ERROR_OTHER; - goto out; - } - } - ret = ref_transaction_update(transaction, ref->name, &ref->new_oid, check_old ? &ref->old_oid : NULL, NULL, NULL, 0, msg, &err); - if (ret) { - ret = STORE_REF_ERROR_OTHER; - goto out; - } - - if (our_transaction) { - switch (ref_transaction_commit(our_transaction, &err)) { - case 0: - break; - case REF_TRANSACTION_ERROR_NAME_CONFLICT: - ret = STORE_REF_ERROR_DF_CONFLICT; - goto out; - default: - ret = STORE_REF_ERROR_OTHER; - goto out; - } - } -out: - ref_transaction_free(our_transaction); if (ret) error("%s", err.buf); strbuf_release(&err); @@ -1139,7 +1102,6 @@ N_("it took %.2f seconds to check forced updates; you can use\n" "to avoid this check\n"); static int store_updated_refs(struct display_state *display_state, - const char *remote_name, int connectivity_checked, struct ref_transaction *transaction, struct ref *ref_map, struct fetch_head *fetch_head, @@ -1277,11 +1239,6 @@ static int store_updated_refs(struct display_state *display_state, } } - if (rc & STORE_REF_ERROR_DF_CONFLICT) - error(_("some local refs could not be updated; try running\n" - " 'git remote prune %s' to remove any old, conflicting " - "branches"), remote_name); - if (advice_enabled(ADVICE_FETCH_SHOW_FORCED_UPDATES)) { if (!config->show_forced_updates) { warning(_(warn_show_forced_updates)); @@ -1365,9 +1322,8 @@ static int fetch_and_consume_refs(struct display_state *display_state, } trace2_region_enter("fetch", "consume_refs", the_repository); - ret = store_updated_refs(display_state, transport->remote->name, - connectivity_checked, transaction, ref_map, - fetch_head, config); + ret = store_updated_refs(display_state, connectivity_checked, + transaction, ref_map, fetch_head, config); trace2_region_leave("fetch", "consume_refs", the_repository); out: @@ -1687,6 +1643,36 @@ static int set_head(const struct ref *remote_refs, struct remote *remote) return result; } +struct ref_rejection_data { + int *retcode; + int conflict_msg_shown; + const char *remote_name; +}; + +static void ref_transaction_rejection_handler(const char *refname, + const struct object_id *old_oid UNUSED, + const struct object_id *new_oid UNUSED, + const char *old_target UNUSED, + const char *new_target UNUSED, + enum ref_transaction_error err, + void *cb_data) +{ + struct ref_rejection_data *data = cb_data; + + if (err == REF_TRANSACTION_ERROR_NAME_CONFLICT && !data->conflict_msg_shown) { + error(_("some local refs could not be updated; try running\n" + " 'git remote prune %s' to remove any old, conflicting " + "branches"), data->remote_name); + data->conflict_msg_shown = 1; + } else { + const char *reason = ref_transaction_error_msg(err); + + error(_("fetching ref %s failed: %s"), refname, reason); + } + + *data->retcode = 1; +} + static int do_fetch(struct transport *transport, struct refspec *rs, const struct fetch_config *config) @@ -1807,6 +1793,24 @@ static int do_fetch(struct transport *transport, retcode = 1; } + /* + * If not atomic, we can still use batched updates, which would be much + * more performant. We don't initiate the transaction before pruning, + * since pruning must be an independent step, to avoid F/D conflicts. + * + * TODO: if reference transactions gain logical conflict resolution, we + * can delete and create refs (with F/D conflicts) in the same transaction + * and this can be moved above the 'prune_refs()' block. + */ + if (!transaction) { + transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), + REF_TRANSACTION_ALLOW_FAILURE, &err); + if (!transaction) { + retcode = -1; + goto cleanup; + } + } + if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map, &fetch_head, config)) { retcode = 1; @@ -1838,16 +1842,31 @@ static int do_fetch(struct transport *transport, free_refs(tags_ref_map); } - if (transaction) { - if (retcode) - goto cleanup; + if (retcode) + goto cleanup; - retcode = ref_transaction_commit(transaction, &err); + retcode = ref_transaction_commit(transaction, &err); + if (retcode) { + /* + * Explicitly handle transaction cleanup to avoid + * aborting an already closed transaction. + */ + ref_transaction_free(transaction); + transaction = NULL; + goto cleanup; + } + + if (!atomic_fetch) { + struct ref_rejection_data data = { + .retcode = &retcode, + .conflict_msg_shown = 0, + .remote_name = transport->remote->name, + }; + + ref_transaction_for_each_rejected_update(transaction, + ref_transaction_rejection_handler, + &data); if (retcode) { - /* - * Explicitly handle transaction cleanup to avoid - * aborting an already closed transaction. - */ ref_transaction_free(transaction); transaction = NULL; goto cleanup; -- GitLab From 04a548ead75c015aa8e93f786696d013366480b8 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:08 +0200 Subject: [PATCH 3/7] send-pack: fix memory leak around duplicate refs The 'git-send-pack(1)' allows users to push objects to a remote repository and explicitly list the references to be pushed. The status of each reference pushed is captured into a list mapped by refname. If a reference fails to be updated, its error message is captured in the `ref->remote_status` field. While the command allows duplicate ref inputs, the list doesn't accommodate this behavior as a particular refname is linked to a single `struct ref*` element. So if the user inputs a reference twice like: git send-pack remote.git A:foo B:foo where the user is trying to update the same reference 'foo' twice and the reference fails to be updated, we first fill `ref->remote_status` with error message for the input 'A:foo' then we override the same field with the error message for 'B:foo'. This override happens without first free'ing the previous value. Fix this leak. The current tests already incorporate the above example, but in the test 'A:foo' succeeds while 'B:foo' fails, meaning that the memory leak isn't triggered. Add a new test with multiple duplicates. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- send-pack.c | 7 +++++++ t/t5408-send-pack-stdin.sh | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/send-pack.c b/send-pack.c index 86592ce526d..e2faa25b98e 100644 --- a/send-pack.c +++ b/send-pack.c @@ -257,6 +257,13 @@ static int receive_status(struct repository *r, refname); continue; } + + /* + * Clients sending duplicate refs can cause the same value + * to be overridden, causing a memory leak. + */ + free(hint->remote_status); + if (!strcmp(head, "ng")) { hint->status = REF_STATUS_REMOTE_REJECT; if (p) diff --git a/t/t5408-send-pack-stdin.sh b/t/t5408-send-pack-stdin.sh index 526a6750459..45fb20179b7 100755 --- a/t/t5408-send-pack-stdin.sh +++ b/t/t5408-send-pack-stdin.sh @@ -73,6 +73,12 @@ test_expect_success 'cmdline refs written in order' ' verify_push A foo ' +test_expect_success 'cmdline refs with multiple duplicates' ' + clear_remote && + test_must_fail git send-pack remote.git A:foo B:foo C:foo && + verify_push A foo +' + test_expect_success '--stdin refs come after cmdline' ' clear_remote && echo A:foo >input && -- GitLab From 931c39f05e078e0df968a439379cb04b5c4666ef Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:09 +0200 Subject: [PATCH 4/7] receive-pack: use batched reference updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference updates performed as a part of 'git-receive-pack(1)', take place one at a time. For each reference update, a new transaction is created and committed. This is necessary to ensure we can allow individual updates to fail without failing the entire command. The command also supports an 'atomic' mode, which uses a single transaction to update all of the references. But this mode has an all-or-nothing approach, where if a single update fails, all updates would fail. In 23fc8e4f61 (refs: implement batch reference update support, 2025-04-08), we introduced a new mechanism to batch reference updates. Under the hood, this uses a single transaction to perform a batch of reference updates, while allowing only individual updates to fail. Utilize this newly introduced batch update mechanism in 'git-receive-pack(1)'. This provides a significant bump in performance, especially when dealing with repositories with large number of references. With the reftable backend there is a 18x performance improvement, when performing receive-pack with 10000 refs: Benchmark 1: receive: many refs (refformat = reftable, refcount = 10000, revision = master) Time (mean ± σ): 4.276 s ± 0.078 s [User: 0.796 s, System: 3.318 s] Range (min … max): 4.185 s … 4.430 s 10 runs Benchmark 2: receive: many refs (refformat = reftable, refcount = 10000, revision = HEAD) Time (mean ± σ): 235.4 ms ± 6.9 ms [User: 75.4 ms, System: 157.3 ms] Range (min … max): 228.5 ms … 254.2 ms 11 runs Summary receive: many refs (refformat = reftable, refcount = 10000, revision = HEAD) ran 18.16 ± 0.63 times faster than receive: many refs (refformat = reftable, refcount = 10000, revision = master) In similar conditions, the files backend sees a 1.21x performance improvement: Benchmark 1: receive: many refs (refformat = files, refcount = 10000, revision = master) Time (mean ± σ): 1.121 s ± 0.021 s [User: 0.128 s, System: 0.975 s] Range (min … max): 1.097 s … 1.156 s 10 runs Benchmark 2: receive: many refs (refformat = files, refcount = 10000, revision = HEAD) Time (mean ± σ): 927.9 ms ± 22.6 ms [User: 99.0 ms, System: 815.2 ms] Range (min … max): 903.1 ms … 978.0 ms 10 runs Summary receive: many refs (refformat = files, refcount = 10000, revision = HEAD) ran 1.21 ± 0.04 times faster than receive: many refs (refformat = files, refcount = 10000, revision = master) As using batched updates requires the error handling to be moved to the end of the flow, create and use a 'struct strset' to track the failed refs and attribute the correct errors to them. This change also uncovers an issue when a client provides multiple updates to the same reference. For example: $ git send-pack remote.git A:foo B:foo Enumerating objects: 3, done. Counting objects: 100% (3/3), done. Delta compression using up to 20 threads Compressing objects: 100% (2/2), done. Writing objects: 100% (3/3), 226 bytes | 226.00 KiB/s, done. Total 3 (delta 1), reused 0 (delta 0), pack-reused 0 (from 0) remote: error: cannot lock ref 'refs/heads/foo': reference already exists To remote.git ! [remote rejected] A -> foo (failed to update ref) ! [remote failure] B -> foo (remote failed to report status) As you can see, the remote runs into an error because it cannot lock the target reference for the second update. Furthermore, the remote complains that the first update has been rejected whereas the second update didn't receive any status update because we failed to lock it. Reading this status message alone a user would probably expect that `foo` has not been updated at all. But that's not the case: while we claim that the ref wasn't updated, it surprisingly points to `A` now. One could argue that this is merely an error in how we report the result of this push. But ultimately, the user's request itself is already broken and doesn't make any sense in the first place and cannot ever lead to a sensible outcome that honors the full request. The conversion to batched transactions fixes the issue because we now try to queue both updates in the same transaction. As such, the transaction itself will notice this conflict and refuse the update altogether before we commit any of the values. Note that this requires changes to a couple of tests in t5408 that happened to exercise this behaviour. Given that the generated output is misleading and given that the user request cannot ever be fully honored this really feels more like a bug than properly designed behaviour. As such, changing the behaviour feels like the right thing to do. Since now reference updates are batched, the 'reference-transaction' hook will be invoked with all updates together. Currently git will 'die' when the hook returns with a non-zero exit status in the 'prepared' stage. For 'git-receive-pack(1)', this allowed users to reject an individual reference update, git would have applied previous updates but immediately abort further execution. This is definitely an incorrect usage of this hook, since the right place to do this would be the 'update' hook. This patch retains the latter behavior, but 'reference-transaction' hook now changes to a all-or-nothing behavior when a non-zero exit status is returned in the 'prepared' stage, since batch updates use a transaction under the hood. This explains the change in 't1416'. Helped-by: Jeff King Helped-by: Patrick Steinhardt Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 64 ++++++++++++++++++++++++-------- t/t1416-ref-transaction-hooks.sh | 2 - t/t5408-send-pack-stdin.sh | 13 ++++--- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index a317d6c278d..9e3cfb85cf7 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1846,35 +1846,67 @@ static void BUG_if_skipped_connectivity_check(struct command *commands, BUG_if_bug("connectivity check skipped???"); } +static void ref_transaction_rejection_handler(const char *refname, + const struct object_id *old_oid UNUSED, + const struct object_id *new_oid UNUSED, + const char *old_target UNUSED, + const char *new_target UNUSED, + enum ref_transaction_error err, + void *cb_data) +{ + struct strmap *failed_refs = cb_data; + + strmap_put(failed_refs, refname, (char *)ref_transaction_error_msg(err)); +} + static void execute_commands_non_atomic(struct command *commands, struct shallow_info *si) { struct command *cmd; struct strbuf err = STRBUF_INIT; + const char *reported_error = NULL; + struct strmap failed_refs = STRMAP_INIT; + + transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), + REF_TRANSACTION_ALLOW_FAILURE, &err); + if (!transaction) { + rp_error("%s", err.buf); + strbuf_reset(&err); + reported_error = "transaction failed to start"; + goto failure; + } for (cmd = commands; cmd; cmd = cmd->next) { if (!should_process_cmd(cmd) || cmd->run_proc_receive) continue; - transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - 0, &err); - if (!transaction) { - rp_error("%s", err.buf); - strbuf_reset(&err); - cmd->error_string = "transaction failed to start"; - continue; - } - cmd->error_string = update(cmd, si); + } - if (!cmd->error_string - && ref_transaction_commit(transaction, &err)) { - rp_error("%s", err.buf); - strbuf_reset(&err); - cmd->error_string = "failed to update ref"; - } - ref_transaction_free(transaction); + if (ref_transaction_commit(transaction, &err)) { + rp_error("%s", err.buf); + reported_error = "failed to update refs"; + goto failure; + } + + ref_transaction_for_each_rejected_update(transaction, + ref_transaction_rejection_handler, + &failed_refs); + + if (strmap_empty(&failed_refs)) + goto cleanup; + +failure: + for (cmd = commands; cmd; cmd = cmd->next) { + if (reported_error) + cmd->error_string = reported_error; + else if (strmap_contains(&failed_refs, cmd->ref_name)) + cmd->error_string = strmap_get(&failed_refs, cmd->ref_name); } + +cleanup: + ref_transaction_free(transaction); + strmap_clear(&failed_refs, 0); strbuf_release(&err); } diff --git a/t/t1416-ref-transaction-hooks.sh b/t/t1416-ref-transaction-hooks.sh index 8c777f7cf87..d91dd3a3b55 100755 --- a/t/t1416-ref-transaction-hooks.sh +++ b/t/t1416-ref-transaction-hooks.sh @@ -120,8 +120,6 @@ test_expect_success 'interleaving hook calls succeed' ' cat >expect <<-EOF && hooks/update refs/tags/PRE $ZERO_OID $PRE_OID - hooks/reference-transaction prepared - hooks/reference-transaction committed hooks/update refs/tags/POST $ZERO_OID $POST_OID hooks/reference-transaction prepared hooks/reference-transaction committed diff --git a/t/t5408-send-pack-stdin.sh b/t/t5408-send-pack-stdin.sh index 45fb20179b7..ec339761c2d 100755 --- a/t/t5408-send-pack-stdin.sh +++ b/t/t5408-send-pack-stdin.sh @@ -69,21 +69,24 @@ test_expect_success 'stdin mixed with cmdline' ' test_expect_success 'cmdline refs written in order' ' clear_remote && - test_must_fail git send-pack remote.git A:foo B:foo && - verify_push A foo + test_must_fail git send-pack remote.git A:foo B:foo 2>err && + test_grep "multiple updates for ref ${SQ}refs/heads/foo${SQ} not allowed" err && + test_must_fail git --git-dir=remote.git rev-parse foo ' test_expect_success 'cmdline refs with multiple duplicates' ' clear_remote && - test_must_fail git send-pack remote.git A:foo B:foo C:foo && - verify_push A foo + test_must_fail git send-pack remote.git A:foo B:foo C:foo 2>err && + test_grep "multiple updates for ref ${SQ}refs/heads/foo${SQ} not allowed" err && + test_must_fail git --git-dir=remote.git rev-parse foo ' test_expect_success '--stdin refs come after cmdline' ' clear_remote && echo A:foo >input && test_must_fail git send-pack remote.git --stdin B:foo Date: Wed, 28 May 2025 10:02:44 +0200 Subject: [PATCH 5/7] refs: fix some bugs with batched-updates In 23fc8e4f61 (refs: implement batch reference update support, 2025-04-08) we introduced a mechanism to batch reference updates. The idea being that we wanted to batch updates similar to a transaction, but allow certain updates to fail. This would help reference backends optimize such operations and also remove the overhead of processing updates individually. Especially for the reftable backend, where batching updates would ensure that the autocompaction would only occur at the end of the batch instead of after every reference update. As of 9d2962a7c4 (receive-pack: use batched reference updates, 2025-05-19) we also updated the 'git-fetch(1)' and 'git-receive-pack(1)' command to use batched reference updates. This series fixes some bugs that we found at GitLab by running our Gitaly service with the `next` build of Git. The first being in the files backend, which missed skipping over failed updates in certain flows. When certain individual updates fail, we mark them as such. However, we missed skipping over such failed updates, which can cause a SEGFAULT. The other is in the git-receive-pack(1) implementation when a user pushes multiple branches such as: delete refs/heads/branch/conflict create refs/heads/branch Before using batched updates, the references would be applied sequentially and hence no conflicts would arise. With batched updates, while the first update applies, the second fails due to F/D conflict. A similar issue was present in 'git-fetch(1)' and was fixed by using separating out reference pruning into a separate transaction. Apply a similar mechanism for 'git-receive-pack(1)' and separate out reference deletions into its own batch. This is based off master 7014b55638 (A bit more topics for -rc1, 2025-05-30), with the changes from kn/fetch-push-bulk-ref-update merged in. --- Changes in v5: - EDITME: describe what is new in this series revision. - EDITME: use bulletpoints and terse descriptions. - Link to v4: https://lore.kernel.org/r/20250613-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v4-0-ebf53edb9795@gmail.com Changes in v4: - Swapped out F/D for D/F in the second commit, since we are talking about conflicts between a directory and a file, also D/F is more consistent. - Fixed some typos in the second commit. - Changed comment to single line. - Link to v3: https://lore.kernel.org/r/20250606-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v3-0-e1c41693bd35@gmail.com Changes in v3: - Cleanup the second commit message and remove stale comments. - In the second commit, only create the transaction if needed. - Link to v2: https://lore.kernel.org/r/20250605-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v2-0-26cd05b8a79e@gmail.com Changes in v2: - Modify the test in the first commit to no longer do a quiet grep, and some more tests. - Remove the second commit as it was unnecessary. - Modify the commit message in the last commit, to also talk about how we now use 2 transactions at minimum but this is still better than before. - Modify the logic in the last commit to no longer use an XOR and instead loop over the two different scenarios (deletion updates, other updates). - Link to v1: https://lore.kernel.org/r/20250602-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v1-0-903d1db3f10e@gmail.com --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 5, "change-id": "20250528-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-24ff53680144", "prefixes": [], "history": { "v1": [ "20250602-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v1-0-903d1db3f10e@gmail.com" ], "v2": [ "20250605-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v2-0-26cd05b8a79e@gmail.com" ], "v3": [ "20250606-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v3-0-e1c41693bd35@gmail.com" ], "v4": [ "20250613-6769-address-test-failures-in-the-next-branch-caused-by-batched-reference-updates-v4-0-ebf53edb9795@gmail.com" ] } } } -- GitLab From afda0a95fd69cfb8d72c21bb2d9e09d884233ade Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Fri, 30 May 2025 16:17:42 +0200 Subject: [PATCH 6/7] refs/files: skip updates with errors in batched updates The commit 23fc8e4f61 (refs: implement batch reference update support, 2025-04-08) introduced support for batched reference updates. This allows users to batch updates together, while allowing some of the updates to fail. Under the hood, batched updates use the reference transaction mechanism. Each update which fails is marked as such. Any failed updates must be skipped over in the rest of the code, as they wouldn't apply any more. In two of the loops within 'files_transaction_finish()' of the files backend, the failed updates aren't skipped over. This can cause a SEGFAULT otherwise. Add the missing skips and a test to validate the same. Signed-off-by: Karthik Nayak --- refs/files-backend.c | 7 +++++++ t/t1400-update-ref.sh | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/refs/files-backend.c b/refs/files-backend.c index 4d1f65a57a5..c4a0f290727 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3208,6 +3208,10 @@ static int files_transaction_finish(struct ref_store *ref_store, */ for (i = 0; i < transaction->nr; i++) { struct ref_update *update = transaction->updates[i]; + + if (update->rejection_err) + continue; + if (update->flags & REF_DELETING && !(update->flags & REF_LOG_ONLY) && !(update->flags & REF_IS_PRUNING)) { @@ -3239,6 +3243,9 @@ static int files_transaction_finish(struct ref_store *ref_store, struct ref_update *update = transaction->updates[i]; struct ref_lock *lock = update->backend_data; + if (update->rejection_err) + continue; + if (update->flags & REF_DELETING && !(update->flags & REF_LOG_ONLY)) { update->flags |= REF_DELETED_RMDIR; diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index d29d23cb890..ca7eee7de21 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -2299,6 +2299,51 @@ do test_grep -q "refname conflict" stdout ) ' + + test_expect_success "stdin $type batch-updates delete incorrect symbolic ref" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit c1 && + head=$(git rev-parse HEAD) && + git symbolic-ref refs/heads/symbolic refs/heads/non-existent && + + format_command $type "delete refs/heads/symbolic" "$head" >stdin && + git update-ref $type --stdin --batch-updates stdout && + test_grep "reference does not exist" stdout + ) + ' + + test_expect_success "stdin $type batch-updates delete with incorrect old_oid" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit c1 && + git branch new-branch && + test_commit c2 && + head=$(git rev-parse HEAD) && + + format_command $type "delete refs/heads/new-branch" "$head" >stdin && + git update-ref $type --stdin --batch-updates stdout && + test_grep "incorrect old value provided" stdout + ) + ' + + test_expect_success "stdin $type batch-updates delete non-existent ref" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit commit && + head=$(git rev-parse HEAD) && + + format_command $type "delete refs/heads/non-existent" "$head" >stdin && + git update-ref $type --stdin --batch-updates stdout && + test_grep "reference does not exist" stdout + ) + ' done test_expect_success 'update-ref should also create reflog for HEAD' ' -- GitLab From b0e402ad7bf891d851f3a9000128c84abac75f3b Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Wed, 28 May 2025 18:02:12 +0200 Subject: [PATCH 7/7] receive-pack: handle reference deletions separately In 9d2962a7c4 (receive-pack: use batched reference updates, 2025-05-19) we updated the 'git-receive-pack(1)' command to use batched reference updates. One edge case which was missed during this implementation was when a user pushes multiple branches such as: delete refs/heads/branch/conflict create refs/heads/branch Before using batched updates, the references would be applied sequentially and hence no conflicts would arise. With batched updates, while the first update applies, the second fails due to D/F conflict. A similar issue was present in 'git-fetch(1)' and was fixed by separating out reference pruning into a separate transaction in the commit 'fetch: use batched reference updates'. Apply a similar mechanism for 'git-receive-pack(1)' and separate out reference deletions into its own batch. This means 'git-receive-pack(1)' will now use up to two transactions, whereas before using batched updates it would use _at least_ two transactions. So using batched updates is still the better option. Add a test to validate this behavior. Signed-off-by: Karthik Nayak --- builtin/receive-pack.c | 102 +++++++++++++++++++++++++++-------------- t/t5516-fetch-push.sh | 17 +++++-- 2 files changed, 81 insertions(+), 38 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 9e3cfb85cf7..1809539201a 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1867,47 +1867,81 @@ static void execute_commands_non_atomic(struct command *commands, const char *reported_error = NULL; struct strmap failed_refs = STRMAP_INIT; - transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - REF_TRANSACTION_ALLOW_FAILURE, &err); - if (!transaction) { - rp_error("%s", err.buf); - strbuf_reset(&err); - reported_error = "transaction failed to start"; - goto failure; - } + /* + * Reference updates, where D/F conflicts shouldn't arise due to + * one reference being deleted, while the other being created + * are treated as conflicts in batched updates. This is because + * we don't do conflict resolution inside a transaction. To + * mitigate this, delete references in a separate batch. + * + * NEEDSWORK: Add conflict resolution between deletion and creation + * of reference updates within a transaction. With that, we can + * combine the two phases. + */ + enum processing_phase { + PHASE_DELETIONS, + PHASE_OTHERS + }; - for (cmd = commands; cmd; cmd = cmd->next) { - if (!should_process_cmd(cmd) || cmd->run_proc_receive) - continue; + for (enum processing_phase phase = PHASE_DELETIONS; phase <= PHASE_OTHERS; phase++) { + for (cmd = commands; cmd; cmd = cmd->next) { + if (!should_process_cmd(cmd) || cmd->run_proc_receive) + continue; - cmd->error_string = update(cmd, si); - } + if (phase == PHASE_DELETIONS && !is_null_oid(&cmd->new_oid)) + continue; + else if (phase == PHASE_OTHERS && is_null_oid(&cmd->new_oid)) + continue; - if (ref_transaction_commit(transaction, &err)) { - rp_error("%s", err.buf); - reported_error = "failed to update refs"; - goto failure; - } + /* + * Lazily create a transaction only when we know there are + * updates to be added. + */ + if (!transaction) { + transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), + REF_TRANSACTION_ALLOW_FAILURE, &err); + if (!transaction) { + rp_error("%s", err.buf); + strbuf_reset(&err); + reported_error = "transaction failed to start"; + goto failure; + } + } - ref_transaction_for_each_rejected_update(transaction, - ref_transaction_rejection_handler, - &failed_refs); + cmd->error_string = update(cmd, si); + } - if (strmap_empty(&failed_refs)) - goto cleanup; + /* No transaction, so nothing to commit */ + if (!transaction) + goto cleanup; -failure: - for (cmd = commands; cmd; cmd = cmd->next) { - if (reported_error) - cmd->error_string = reported_error; - else if (strmap_contains(&failed_refs, cmd->ref_name)) - cmd->error_string = strmap_get(&failed_refs, cmd->ref_name); - } + if (ref_transaction_commit(transaction, &err)) { + rp_error("%s", err.buf); + reported_error = "failed to update refs"; + goto failure; + } -cleanup: - ref_transaction_free(transaction); - strmap_clear(&failed_refs, 0); - strbuf_release(&err); + ref_transaction_for_each_rejected_update(transaction, + ref_transaction_rejection_handler, + &failed_refs); + + if (strmap_empty(&failed_refs)) + goto cleanup; + + failure: + for (cmd = commands; cmd; cmd = cmd->next) { + if (reported_error) + cmd->error_string = reported_error; + else if (strmap_contains(&failed_refs, cmd->ref_name)) + cmd->error_string = strmap_get(&failed_refs, cmd->ref_name); + } + + cleanup: + ref_transaction_free(transaction); + transaction = NULL; + strmap_clear(&failed_refs, 0); + strbuf_release(&err); + } } static void execute_commands_atomic(struct command *commands, diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index dabcc5f8117..1649667441f 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -744,8 +744,8 @@ test_expect_success 'pushing valid refs triggers post-receive and post-update ho EOF cat >update.expect <<-EOF && - refs/heads/main $orgmain $newmain refs/heads/next $orgnext $newnext + refs/heads/main $orgmain $newmain EOF cat >post-receive.expect <<-EOF && @@ -808,8 +808,8 @@ test_expect_success 'deletion of a non-existent ref is not fed to post-receive a EOF cat >update.expect <<-EOF && - refs/heads/main $orgmain $newmain refs/heads/nonexistent $ZERO_OID $ZERO_OID + refs/heads/main $orgmain $newmain EOF cat >post-receive.expect <<-EOF && @@ -868,10 +868,10 @@ test_expect_success 'mixed ref updates, deletes, invalid deletes trigger hooks w EOF cat >update.expect <<-EOF && - refs/heads/main $orgmain $newmain refs/heads/next $orgnext $newnext - refs/heads/seen $orgseen $newseen refs/heads/nonexistent $ZERO_OID $ZERO_OID + refs/heads/main $orgmain $newmain + refs/heads/seen $orgseen $newseen EOF cat >post-receive.expect <<-EOF && @@ -1909,4 +1909,13 @@ test_expect_success 'push with config push.useBitmaps' ' --thin --delta-base-offset -q --no-use-bitmap-index