From eeef176f118cb8fe7db1f957340def64d2a2bde9 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Wed, 15 Jan 2025 11:54:51 +0000 Subject: [PATCH 1/2] reftable: write correct max_update_index to header In 297c09eabb (refs: allow multiple reflog entries for the same refname, 2024-12-16), the reftable backend learned to handle multiple reflog entries within the same transaction. This was done modifying the `update_index` for reflogs with multiple indices. During writing the logs, the `max_update_index` of the writer was modified to ensure the limits were raised to the modified `update_index`s. However, since ref entries are written before the modification to the `max_update_index`, if there are multiple blocks to be written, the reftable backend writes the header with the old `max_update_index`. When all logs are finally written, the footer will be written with the new `min_update_index`. This causes a mismatch between the header and the footer and causes the reftable file to be corrupted. The existing tests only spawn a single block and since headers are lazily written with the first block, the tests didn't capture this bug. To fix the issue, the appropriate `max_update_index` limit must be set even before the first block is written. Add a `max_index` field to the transaction which holds the `max_index` within all its updates, then propagate this value to the reftable backend, wherein this is used to the set the `max_update_index` correctly. Add a test which creates a few thousand reference updates with multiple reflog entries, which should trigger the bug. Reported-by: brian m. carlson Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.c | 7 +++++++ refs/refs-internal.h | 1 + refs/reftable-backend.c | 20 ++++++++++---------- t/t1460-refs-migrate.sh | 12 ++++++++++++ 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/refs.c b/refs.c index 0f41b2fd4a6..f7b6f0f897e 100644 --- a/refs.c +++ b/refs.c @@ -1345,6 +1345,13 @@ int ref_transaction_update_reflog(struct ref_transaction *transaction, update->flags &= ~REF_HAVE_OLD; update->index = index; + /* + * Reference backends may need to know the max index to optimize + * their writes. So we store the max_index on the transaction level. + */ + if (index > transaction->max_index) + transaction->max_index = index; + return 0; } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 16550862d3e..aaab711bb96 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -203,6 +203,7 @@ struct ref_transaction { enum ref_transaction_state state; void *backend_data; unsigned int flags; + unsigned int max_index; }; /* diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 00d95a9a2f4..289496058e6 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -942,6 +942,7 @@ struct write_transaction_table_arg { size_t updates_nr; size_t updates_alloc; size_t updates_expected; + unsigned int max_index; }; struct reftable_transaction_data { @@ -1428,7 +1429,6 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data struct reftable_log_record *logs = NULL; struct ident_split committer_ident = {0}; size_t logs_nr = 0, logs_alloc = 0, i; - uint64_t max_update_index = ts; const char *committer_info; int ret = 0; @@ -1438,7 +1438,12 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data QSORT(arg->updates, arg->updates_nr, transaction_update_cmp); - reftable_writer_set_limits(writer, ts, ts); + /* + * During reflog migration, we add indexes for a single reflog with + * multiple entries. Each entry will contain a different update_index, + * so set the limits accordingly. + */ + reftable_writer_set_limits(writer, ts, ts + arg->max_index); for (i = 0; i < arg->updates_nr; i++) { struct reftable_transaction_update *tx_update = &arg->updates[i]; @@ -1540,12 +1545,6 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data */ log->update_index = ts + u->index; - /* - * Note the max update_index so the limit can be set later on. - */ - if (log->update_index > max_update_index) - max_update_index = log->update_index; - log->refname = xstrdup(u->refname); memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ); @@ -1609,8 +1608,6 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data * and log blocks. */ if (logs) { - reftable_writer_set_limits(writer, ts, max_update_index); - ret = reftable_writer_add_logs(writer, logs, logs_nr); if (ret < 0) goto done; @@ -1631,6 +1628,9 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED, struct reftable_transaction_data *tx_data = transaction->backend_data; int ret = 0; + if (tx_data->args) + tx_data->args->max_index = transaction->max_index; + for (size_t i = 0; i < tx_data->args_nr; i++) { ret = reftable_addition_add(tx_data->args[i].addition, write_transaction_table, &tx_data->args[i]); diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh index f59bc4860f1..307b2998efe 100755 --- a/t/t1460-refs-migrate.sh +++ b/t/t1460-refs-migrate.sh @@ -227,6 +227,18 @@ do done done +test_expect_success 'multiple reftable blocks with multiple entries' ' + test_when_finished "rm -rf repo" && + git init --ref-format=files repo && + test_commit -C repo first && + printf "create refs/heads/ref-%d HEAD\n" $(test_seq 5000) >stdin && + git -C repo update-ref --stdin stdin && + git -C repo update-ref --stdin Date: Fri, 24 Jan 2025 12:08:45 +0100 Subject: [PATCH 2/2] refs: fix uninitialized memory access of `max_index` When migrating reflogs between reference backends, maintaining the original order of the reflog entries is crucial. To achieve this, an `index` field is stored within the `ref_update` struct. In the reftable backend, before writing any references, the writer must be configured with the minimum and maximum update index values. The `max_update_index` is derived from the maximum `ref_update.index` value in a transaction . The commit bc67b4ab5f (reftable: write correct max_update_index to header, 2025-01-15) addressed this by propagating the `max_update_index` value from the transaction to `write_transaction_table_arg` and, ultimately, to `reftable_writer_set_limits()`, which sets the min and max index for the reftable writer. However, that commit introduced two issues: - The `max_index` field in `write_transaction_table_arg` was not initialized to 0. - In `reftable_transaction_data`, which contains an array of `write_transaction_table_arg`, only the first element was assigned the `max_index` value. As a result, any elements beyond the first in the array contained uninitialized `max_index`. This uninitialized value was later used to set the `max_update_index` for the writer, potentially causing overflow or undefined behavior. Fix this by: - Initializing the `max_index` field to 0. - Moving the assignment of `max_index` in `reftable_be_transaction_finish()` inside the loop, ensuring all elements of the array are correctly initialized. Initializing `max_index` to `0` is not strictly necessary, as all elements of `write_transaction_table_arg.max_index` are now assigned correctly. However, this initialization is added for consistency and to safeguard against potential future changes that might inadvertently introduce uninitialized memory access. Reported-by: Johannes Schindelin Signed-off-by: Karthik Nayak --- refs/reftable-backend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 289496058e6..d39a14c5a46 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1020,6 +1020,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out, arg->updates_nr = 0; arg->updates_alloc = 0; arg->updates_expected = 0; + arg->max_index = 0; } arg->updates_expected++; @@ -1628,10 +1629,9 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED, struct reftable_transaction_data *tx_data = transaction->backend_data; int ret = 0; - if (tx_data->args) - tx_data->args->max_index = transaction->max_index; - for (size_t i = 0; i < tx_data->args_nr; i++) { + tx_data->args[i].max_index = transaction->max_index; + ret = reftable_addition_add(tx_data->args[i].addition, write_transaction_table, &tx_data->args[i]); if (ret < 0) -- GitLab