From af1ddcb2b55e7e91894fb911c04a5a4a509e2dc7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:14 -0400 Subject: [PATCH 01/65] builtin/repack.c: avoid "the_repository" in `cmd_repack()` Reduce builtin/repack.c's reliance on `the_repository` by using the currently-UNUSED "repo" parameter within cmd_repack(). The following commits will continue to reduce the usage of the_repository in other places within builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e8730808c53..305782b2c9f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1247,7 +1247,7 @@ static const char *find_pack_prefix(const char *packdir, const char *packtmp) int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1344,7 +1344,7 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + repo_config(repo, repack_config, &cruft_po_args); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1354,7 +1354,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1376,7 +1376,7 @@ int cmd_repack(int argc, die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1391,7 +1391,8 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); @@ -1400,7 +1401,7 @@ int cmd_repack(int argc, strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); @@ -1439,7 +1440,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1535,7 +1536,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } @@ -1618,9 +1619,9 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { + if (get_multi_pack_index(repo->objects->sources)) { struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); + get_multi_pack_index(repo->objects->sources); ALLOC_ARRAY(midx_pack_names, m->num_packs + m->num_packs_in_base); @@ -1631,7 +1632,7 @@ int cmd_repack(int argc, xstrdup(m->pack_names[i]); } - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. @@ -1688,7 +1689,7 @@ int cmd_repack(int argc, goto cleanup; } - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; @@ -1704,18 +1705,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } -- GitLab From 73ba2c9b39e56829fd9a114f7bbe425e69ebf7d8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:18 -0400 Subject: [PATCH 02/65] builtin/repack.c: avoid "the_repository" in existing packs API There are a number of spots within builtin/repack.c which refer to "the_repository", and either make use of the "existing packs" API or otherwise have a 'struct existing_packs *' in scope. Add a "repo" member to "struct existing_packs" and use that instead of "the_repository" in such locations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 305782b2c9f..7223553bed8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -126,6 +126,7 @@ static void pack_objects_args_release(struct pack_objects_args *args) } struct existing_packs { + struct repository *repo; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; @@ -265,7 +266,7 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -498,7 +499,7 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -1139,7 +1140,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; @@ -1405,6 +1406,7 @@ int cmd_repack(int argc, packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); + existing.repo = repo; collect_pack_filenames(&existing, &keep_pack_list); if (geometry.split_factor) { -- GitLab From 2f425fe2fcc6aef33576acc99335e57ffd3a217f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:22 -0400 Subject: [PATCH 03/65] builtin/repack.c: avoid "the_repository" when taking a ref snapshot Avoid using "the_repository" in various MIDX-related ref snapshotting functions. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7223553bed8..113f5fc67f9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -771,6 +771,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, } struct midx_snapshot_ref_data { + struct repository *repo; struct tempfile *f; struct oidset seen; int preferred; @@ -784,13 +785,13 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, struct midx_snapshot_ref_data *data = _data; struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(data->repo, oid, &peeled)) oid = &peeled; if (oidset_insert(&data->seen, oid)) return 0; /* already seen */ - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", @@ -799,11 +800,12 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, return 0; } -static void midx_snapshot_refs(struct tempfile *f) +static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) { struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); + const struct string_list *preferred = bitmap_preferred_tips(repo); + data.repo = repo; data.f = f; data.preferred = 0; oidset_init(&data.seen, 0); @@ -817,13 +819,13 @@ static void midx_snapshot_refs(struct tempfile *f) data.preferred = 1; for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), + refs_for_each_ref_in(get_main_ref_store(repo), item->string, midx_snapshot_ref_one, &data); data.preferred = 0; } - refs_for_each_ref(get_main_ref_store(the_repository), + refs_for_each_ref(get_main_ref_store(repo), midx_snapshot_ref_one, &data); if (close_tempfile_gently(f)) { @@ -1397,7 +1399,7 @@ int cmd_repack(int argc, "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } -- GitLab From c97715584399e494245dd25c3012dc210a02be8b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:26 -0400 Subject: [PATCH 04/65] builtin/repack.c: avoid "the_repository" when removing packs The 'remove_redundant_pack()' function uses "the_repository" to obtain, and optionally remove, the repository's MIDX. Instead of relying on "the_repository", pass around a "struct repository *" parameter through its callers, and use that instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 113f5fc67f9..93802531e1a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,33 +221,35 @@ static void mark_packs_for_deletion(struct existing_packs *existing, mark_packs_for_deletion_1(names, &existing->cruft_packs); } -static void remove_redundant_pack(const char *dir_name, const char *base_name) +static void remove_redundant_pack(struct repository *repo, + const char *dir_name, const char *base_name) { struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; + struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); + clear_midx_file(repo); strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } -static void remove_redundant_packs_1(struct string_list *packs) +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(packdir, item->string); + remove_redundant_pack(repo, packdir, item->string); } } static void remove_redundant_existing_packs(struct existing_packs *existing) { - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs); } static void existing_packs_release(struct existing_packs *existing) @@ -685,7 +687,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(packdir, buf.buf); + remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); -- GitLab From 14cae948432bae0f753b9ffd8afc84dd73063991 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:30 -0400 Subject: [PATCH 05/65] builtin/repack.c: avoid "the_repository" when repacking promisor objects Pass a "struct repository" pointer to the 'repack_promisor_objects()' function to avoid using "the_repository". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 93802531e1a..4f08b57ddbd 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -407,7 +407,8 @@ static int has_pack_ext(const struct generated_pack_data *data, BUG("unknown pack extension: '%s'", ext); } -static void repack_promisor_objects(const struct pack_objects_args *args, +static void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, struct string_list *names) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -424,7 +425,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(the_repository, write_oid, &cmd, + for_each_packed_object(repo, write_oid, &cmd, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { @@ -1458,7 +1459,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names); if (has_existing_non_kept_packs(&existing) && delete_redundant && -- GitLab From d5649bbff5b9a2b00bd8a325af1420ce3246fd9a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:33 -0400 Subject: [PATCH 06/65] builtin/repack.c: avoid "the_hash_algo" when deleting packs The "mark_packs_for_deletion_1" function uses "the_hash_algo->hexsz" to isolate a pack's checksum before deleting it to avoid deleting a newly written pack having the same checksum (that is, some generated pack wound up identical to an existing pack). Avoid this by passing down a "struct git_hash_algo" pointer, and refer to the hash algorithm through it instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4f08b57ddbd..094f5a0cc26 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -168,11 +168,12 @@ static int pack_is_retained(struct string_list_item *item) return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(struct string_list *names, +static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, struct string_list *list) { struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; + const int hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -217,8 +218,9 @@ static void mark_packs_for_deletion(struct existing_packs *existing, struct string_list *names) { - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); + const struct git_hash_algo *algop = existing->repo->hash_algo; + mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); + mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, -- GitLab From 9347979fb0f4c68ef27c8360096161ed69b5715c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:37 -0400 Subject: [PATCH 07/65] builtin/repack.c: avoid "the_hash_algo" in `write_oid()` In a similar spirit as the previous commit, avoid referring directly to "the_hash_algo" within builtin/repack.c::write_oid(). Unlike the previous commit, we are within a callback function, so must introduce a new struct to pass additional data through its "data" pointer. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 094f5a0cc26..7d62959dc28 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -339,6 +339,11 @@ static void prepare_pack_objects(struct child_process *cmd, cmd->out = -1; } +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + /* * Write oid to the given struct child_process's stdin, starting it first if * necessary. @@ -347,14 +352,15 @@ static int write_oid(const struct object_id *oid, struct packed_git *pack UNUSED, uint32_t pos UNUSED, void *data) { - struct child_process *cmd = data; + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; if (cmd->in == -1) { if (start_command(cmd)) die(_("could not start pack-objects to repack promisor objects")); } - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || write_in_full(cmd->in, "\n", 1) < 0) die(_("failed to feed promisor objects to pack-objects")); return 0; @@ -413,6 +419,7 @@ static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) { + struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; FILE *out; struct strbuf line = STRBUF_INIT; @@ -427,7 +434,9 @@ static void repack_promisor_objects(struct repository *repo, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(repo, write_oid, &cmd, + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { -- GitLab From 62b7efe90300ef718d6e89dfae9394b86ba72b81 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:41 -0400 Subject: [PATCH 08/65] builtin/repack: avoid "the_hash_algo" in `repack_promisor_objects()` In a similar spirit as the previous commits, avoid referring directly to "the_hash_algo" within builtin/repack.c::repack_promisor_objects(). Since there is already a repository pointer in scope, use its hash_algo value instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7d62959dc28..a7e94ed03c3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -452,7 +452,7 @@ static void repack_promisor_objects(struct repository *repo, struct string_list_item *item; char *promisor_name; - if (line.len != the_hash_algo->hexsz) + if (line.len != repo->hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only from pack-objects.")); item = string_list_append(names, line.buf); -- GitLab From 29b820b234127270d9cb021ba511039dd701df74 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:45 -0400 Subject: [PATCH 09/65] builtin/repack.c: avoid "the_hash_algo" in `finish_pack_objects_cmd()` In a similar spirit as previous commits, avoid referring directly to "the_hash_algo" in builtin/repack.c::finish_pack_objects_cmd() and instead accept one as a parameter to the function. Since this function has a number of callers throughout the builtin, the diff is a little noisier than previous commits. However, each hunk is limited to passing the hash_algo parameter from a repository pointer that is already in scope. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a7e94ed03c3..a043704aa83 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1073,7 +1073,8 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } -static int finish_pack_objects_cmd(struct child_process *cmd, +static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct child_process *cmd, struct string_list *names, int local) { @@ -1084,7 +1085,7 @@ static int finish_pack_objects_cmd(struct child_process *cmd, while (strbuf_getline_lf(&line, out) != EOF) { struct string_list_item *item; - if (line.len != the_hash_algo->hexsz) + if (line.len != algop->hexsz) die(_("repack: Expecting full hex object ID lines only " "from pack-objects.")); /* @@ -1150,7 +1151,8 @@ static int write_filtered_pack(const struct pack_objects_args *args, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -1247,7 +1249,8 @@ static int write_cruft_pack(const struct pack_objects_args *args, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static const char *find_pack_prefix(const char *packdir, const char *packtmp) @@ -1534,7 +1537,7 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); if (ret) goto cleanup; -- GitLab From b908981ce8047c92b2229943412c28c12c1b7aee Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:49 -0400 Subject: [PATCH 10/65] builtin/repack.c: avoid using `hash_to_hex()` in pack geometry In previous commits, we started passing either repository or git_hash_algo pointers around to various spots within builtin/repack.c to reduce our dependency on the_repository in the hope of undef'ing USE_THE_REPOSITORY_VARIABLE. This commit takes us as far as we can (easily) go in that direction by removing the only use of a convenience function that only exists when USE_THE_REPOSITORY_VARIABLE is defined. Unfortunately, the only other such function is "is_bare_repository()", which is less than straightforward to convert into, say, "repo_is_bare()", the latter of the two accepting a repository pointer. Punt on that for now, and declare this commit as the stopping point for our efforts in the direction of undef'ing USE_THE_REPOSITORY_VARIABLE. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index a043704aa83..0d35f15b4b1 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -683,12 +683,14 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing) { + const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; uint32_t i; for (i = 0; i < geometry->split; i++) { struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) continue; strbuf_reset(&buf); -- GitLab From 0ec811ac1868845e1dea1dac3b0de8f3a4c64f14 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:53 -0400 Subject: [PATCH 11/65] repack: introduce new compilation unit Over the years, builtin/repack.c has turned into a grab-bag of functionality powering the 'git repack' builtin. Among its many capabilities, it: - can build and spawn 'git pack-objects' commands, which in turn generate new packs - has infrastructure to manage the set of existing packs in a repository - has infrastructure to split a sequence of packs into a geometric progression based on object size - can manage both generating and combining cruft packs together - can write new MIDXs to name a few. As a result, this builtin has accumulated a lot of code, making adding new functionality difficult. In the future, 'repack' will learn how to manage a chain of incremental MIDXs, adding yet more functionality into the builtin. As a prerequisite step, let's first move some of the functionality in the builtin into its own repack.[ch]. This will be done over the course of many steps, since there are many individual components, some of which will end up in other, yet-to-exist compilation units of their own. Some of the code movement here is also non-trivial, so performing it in individual steps will make it easier to verify. Let's start by migrating 'struct pack_objects_args' (and the related corresponding pack_objects_args_release() function) into repack.h, and teach both the Makefile and Meson how to build the new compilation unit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 25 +------------------------ meson.build | 1 + repack.c | 11 +++++++++++ repack.h | 23 +++++++++++++++++++++++ 5 files changed, 37 insertions(+), 24 deletions(-) create mode 100644 repack.c create mode 100644 repack.h diff --git a/Makefile b/Makefile index 4c95affadb5..c0df6da2371 100644 --- a/Makefile +++ b/Makefile @@ -1136,6 +1136,7 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index 0d35f15b4b1..6dfcb3327e6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -19,6 +19,7 @@ #include "prune-packed.h" #include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" #include "pack.h" #include "pack-bitmap.h" @@ -53,21 +54,6 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; -}; - static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { @@ -116,15 +102,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - struct existing_packs { struct repository *repo; struct string_list kept_packs; diff --git a/meson.build b/meson.build index b3dfcc04972..993e8f368ff 100644 --- a/meson.build +++ b/meson.build @@ -462,6 +462,7 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack.c b/repack.c new file mode 100644 index 00000000000..a1f5b796fba --- /dev/null +++ b/repack.c @@ -0,0 +1,11 @@ +#include "git-compat-util.h" +#include "repack.h" + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} diff --git a/repack.h b/repack.h new file mode 100644 index 00000000000..421d439d5a5 --- /dev/null +++ b/repack.h @@ -0,0 +1,23 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + struct list_objects_filter_options filter_options; +}; + +void pack_objects_args_release(struct pack_objects_args *args); + +#endif /* REPACK_H */ -- GitLab From a953609865ac4d0875cc878146c6b8e0c7c28643 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:56 -0400 Subject: [PATCH 12/65] builtin/repack.c: pass both pack_objects args to repack_config A subsequent commit will remove 'delta_base_offset' as a static variable within builtin/repack.c, and reintroduce it as a member of the 'struct pack_objects_args'. As a result, the repack_config callback will need to have both the cruft- and non-cruft 'struct pack_objects_args's in scope. Introduce a new 'struct repack_config_ctx' to allow the callee to provide both pointers to the callback. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6dfcb3327e6..af6de8d77ae 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -54,10 +54,16 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; +}; + static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { delta_base_offset = git_config_bool(var, value); return 0; @@ -1260,6 +1266,7 @@ int cmd_repack(int argc, size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; @@ -1343,7 +1350,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(repo, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); -- GitLab From 17ee881425c68c58f08a5bc62b8074b3e04c6296 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:00 -0400 Subject: [PATCH 13/65] repack: move 'delta_base_offset' to 'struct pack_objects_args' The static variable 'delta_base_offset' determines whether or not we pass the "--delta-base-offset" command-line argument when spawning pack-objects as a child process. Its introduction dates back to when repack was rewritten in C, all the way back in a1bbc6c017 (repack: rewrite the shell script in C, 2013-09-15). 'struct pack_objects_args' was introduced much later on in 4571324b99 (builtin/repack.c: allow configuring cruft pack generation, 2022-05-20), but did not move the 'delta_base_offset' variable. Since the 'delta_base_offset' is a property of an individual pack-objects command, re-introduce that variable as a member of 'struct pack_objects_args', which will enable further code movement in the subsequent commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 ++++++----- repack.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index af6de8d77ae..f4af8303532 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -34,7 +34,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; @@ -63,9 +62,10 @@ static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { @@ -315,7 +315,7 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--local"); if (args->quiet) strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) + if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; @@ -1271,8 +1271,8 @@ int cmd_repack(int argc, const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1567,6 +1567,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, cruft_expiration, diff --git a/repack.h b/repack.h index 421d439d5a5..12632d7fec1 100644 --- a/repack.h +++ b/repack.h @@ -15,9 +15,12 @@ struct pack_objects_args { int local; int name_hash_version; int path_walk; + int delta_base_offset; struct list_objects_filter_options filter_options; }; +#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } + void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ -- GitLab From 1ebaa9376eea424a9bd955a0782547a040a8b44e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:04 -0400 Subject: [PATCH 14/65] repack: remove 'prepare_pack_objects' from the builtin Now that the 'prepare_pack_objects' function no longer refers to external, static variables, move it out to repack.h as generic functionality. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 34 ---------------------------------- repack.c | 35 +++++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f4af8303532..ff93654cfed 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -288,40 +288,6 @@ static void collect_pack_filenames(struct existing_packs *existing, strbuf_release(&buf); } -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (args->delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index a1f5b796fba..91b6e1cc09c 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,40 @@ #include "git-compat-util.h" #include "repack.h" +#include "run-command.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} void pack_objects_args_release(struct pack_objects_args *args) { diff --git a/repack.h b/repack.h index 12632d7fec1..3f7ec207355 100644 --- a/repack.h +++ b/repack.h @@ -21,6 +21,11 @@ struct pack_objects_args { #define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ -- GitLab From f989b706d1fafd60e2f37dfc2d3f99e093b980be Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:08 -0400 Subject: [PATCH 15/65] builtin/repack.c: rename many 'struct existing_packs' functions Rename many of the 'struct existing_packs'-related functions according to the convention introduced in and described by 541204aabe (Documentation: document naming schema for structs and their functions, 2024-07-30). Note that some functions which operate over an individual entry in the list of existing packs are prefixed with "existing_pack_" instead of the plural form. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 +++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index ff93654cfed..f82e6c39307 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -121,39 +121,39 @@ struct existing_packs { .cruft_packs = STRING_LIST_INIT_DUP, \ } -static int has_existing_non_kept_packs(const struct existing_packs *existing) +static int existing_packs_has_non_kept(const struct existing_packs *existing) { return existing->non_kept_packs.nr || existing->cruft_packs.nr; } -static void pack_mark_for_deletion(struct string_list_item *item) +static void existing_pack_mark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | DELETE_PACK); } -static void pack_unmark_for_deletion(struct string_list_item *item) +static void existing_pack_unmark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); } -static int pack_is_marked_for_deletion(struct string_list_item *item) +static int existing_pack_is_marked_for_deletion(struct string_list_item *item) { return (uintptr_t)item->util & DELETE_PACK; } -static void pack_mark_retained(struct string_list_item *item) +static void existing_packs_mark_retained(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); } -static int pack_is_retained(struct string_list_item *item) +static int existing_pack_is_retained(struct string_list_item *item) { return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) { struct string_list_item *item; const int hexsz = algop->hexsz; @@ -165,8 +165,8 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, continue; sha1 = item->string + len - hexsz; - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); } else if (!string_list_has_string(names, sha1)) { /* * Mark this pack for deletion, which ensures @@ -175,13 +175,13 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, * will actually delete this pack (if `-d` was * given). */ - pack_mark_for_deletion(item); + existing_pack_mark_for_deletion(item); } } } -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) +static void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) { struct strbuf buf = STRBUF_INIT; struct string_list_item *item; @@ -193,17 +193,19 @@ static void retain_cruft_pack(struct existing_packs *existing, if (!item) BUG("could not find cruft pack '%s'", pack_basename(cruft)); - pack_mark_retained(item); + existing_packs_mark_retained(item); strbuf_release(&buf); } -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) +static void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) { const struct git_hash_algo *algop = existing->repo->hash_algo; - mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); - mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, @@ -225,13 +227,13 @@ static void remove_redundant_packs_1(struct repository *repo, { struct string_list_item *item; for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) + if (!existing_pack_is_marked_for_deletion(item)) continue; remove_redundant_pack(repo, packdir, item->string); } } -static void remove_redundant_existing_packs(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); remove_redundant_packs_1(existing->repo, &existing->cruft_packs); @@ -250,7 +252,7 @@ static void existing_packs_release(struct existing_packs *existing) * .keep file or not. Packs without a .keep file are not to be kept * if we are going to pack everything into one file. */ -static void collect_pack_filenames(struct existing_packs *existing, +static void existing_packs_collect(struct existing_packs *existing, const struct string_list *extra_keep) { struct packfile_store *packs = existing->repo->objects->packfiles; @@ -721,7 +723,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, item = string_list_lookup(&existing->non_kept_packs, pack_name); - if (item && !pack_is_marked_for_deletion(item)) + if (item && !existing_pack_is_marked_for_deletion(item)) continue; } @@ -851,7 +853,7 @@ static void midx_included_packs(struct string_list *include, } } else { for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -888,10 +890,10 @@ static void midx_included_packs(struct string_list *include, * --geometric case, but doing so is unnecessary * since no packs are marked as pending * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). */ - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -1128,7 +1130,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, if (p->pack_size < combine_cruft_below_size) { fprintf(in, "-%s\n", pack_basename(p)); } else { - retain_cruft_pack(existing, p); + existing_packs_retain_cruft(existing, p); fprintf(in, "%s\n", pack_basename(p)); } } @@ -1382,7 +1384,7 @@ int cmd_repack(int argc, packtmp = mkpathdup("%s/%s", packdir, packtmp_name); existing.repo = repo; - collect_pack_filenames(&existing, &keep_pack_list); + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) @@ -1431,7 +1433,7 @@ int cmd_repack(int argc, if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(repo, &po_args, &names); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1647,7 +1649,7 @@ int cmd_repack(int argc, /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; @@ -1671,7 +1673,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, -- GitLab From 7be7f73cf21356a2feb701e5968a82e2a762879c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:12 -0400 Subject: [PATCH 16/65] repack: remove 'remove_redundant_pack' from the builtin Extract "remove_redundant_pack()" as generic repack-related functionality by moving its implementation to the repack.[ch] compilation unit. This is a prerequisite to moving the "existing_packs" API, which is one of the callers of this function. (The remaining caller in the pack geometry code will eventually move to its own compilation unit as well, and will likewise rely on this function.) While moving it over, prefix the function name with "repack_" to indicate that it belongs to the repack-subsystem. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++---------------- repack.c | 18 ++++++++++++++++++ repack.h | 3 +++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f82e6c39307..31137cf7112 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -208,20 +208,6 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } -static void remove_redundant_pack(struct repository *repo, - const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = repo->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(repo); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs) { @@ -229,7 +215,7 @@ static void remove_redundant_packs_1(struct repository *repo, for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string); } } @@ -652,7 +638,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); diff --git a/repack.c b/repack.c index 91b6e1cc09c..3aaa351b5b5 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,9 @@ #include "git-compat-util.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" #include "repack.h" +#include "repository.h" #include "run-command.h" void prepare_pack_objects(struct child_process *cmd, @@ -44,3 +48,17 @@ void pack_objects_args_release(struct pack_objects_args *args) free(args->threads); list_objects_filter_release(&args->filter_options); } + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} diff --git a/repack.h b/repack.h index 3f7ec207355..a62bfa2ff97 100644 --- a/repack.h +++ b/repack.h @@ -28,4 +28,7 @@ void prepare_pack_objects(struct child_process *cmd, const char *out); void pack_objects_args_release(struct pack_objects_args *args); +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + #endif /* REPACK_H */ -- GitLab From fc808b39a40939714e26d355f2a01e995e65f871 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:16 -0400 Subject: [PATCH 17/65] builtin/repack.c: pass "packdir" when removing packs builtin/repack.c defines a static "packdir" to instruct pack-objects on where to write any new packfiles. This is also the directory scanned when removing any packfiles which were made redundant by the latest repack. Prepare to move the "existing_packs_remove_redundant" function to its own compilation unit by passing in this information as a parameter to that function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 31137cf7112..c5a88eda129 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,7 +209,8 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, } static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs) + struct string_list *packs, + const char *packdir) { struct string_list_item *item; for_each_string_list_item(item, packs) { @@ -219,10 +220,13 @@ static void remove_redundant_packs_1(struct repository *repo, } } -static void existing_packs_remove_redundant(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) { - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); } static void existing_packs_release(struct existing_packs *existing) @@ -1659,7 +1663,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, -- GitLab From 92e1e725f1d165a5cfd94fcab98e359b86388a4c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:19 -0400 Subject: [PATCH 18/65] builtin/repack.c: avoid unnecessary numeric casts in existing_packs There are a couple of spots that cause warnings within the existing_packs API without DISABLE_SIGN_COMPARE_WARNINGS under DEVELOPER=1 mode. In both cases, we have int values that are being compared against size_t ones. Neither of these two cases are incorrect, and the cast is completely OK in practice. But both are unnecessary, since: - in existing_packs_mark_for_deletion_1(), 'hexsz' should be defined as a size_t anyway, since algop->hexsz is. - in existing_packs_collect(), 'i' should be defined as a size_t since it is counting up to the value of a string_list's 'nr' field. (This patch is a little bit of noise, but I would rather see us squelch these warnings ahead of moving the existing_packs API into a separate compilation unit to avoid having to define DISABLE_SIGN_COMPARE_WARNINGS in repack.c.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index c5a88eda129..e13943b6377 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -156,7 +156,7 @@ static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop struct string_list *list) { struct string_list_item *item; - const int hexsz = algop->hexsz; + const size_t hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -250,7 +250,7 @@ static void existing_packs_collect(struct existing_packs *existing, struct strbuf buf = STRBUF_INIT; for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - int i; + size_t i; const char *base; if (!p->pack_local) -- GitLab From f2bfe705d9cbffa61f9fd129d0a7b913b832440e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:23 -0400 Subject: [PATCH 19/65] repack: remove 'existing_packs' API from the builtin The repack builtin defines an API for keeping track of which packs were found in the repository at the beginning of the repack operation. This is used to classify what state a pack was in (kept, non-kept, or cruft), and is also used to mark which packs to delete (or keep) at the end of a repack operation. Now that the prerequisite refactoring is complete, this API is isolated enough that it can be moved out to repack.ch and removed from the builtin entirely. As a result, some of its functions become static within repack.c, cleaning up the visible API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 173 ----------------------------------------------- repack.c | 157 ++++++++++++++++++++++++++++++++++++++++++ repack.h | 35 ++++++++++ 3 files changed, 192 insertions(+), 173 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e13943b6377..a168c887911 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,7 +3,6 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" @@ -108,178 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct existing_packs { - struct repository *repo; - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int existing_packs_has_non_kept(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void existing_pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void existing_pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int existing_pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void existing_packs_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int existing_pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const size_t hexsz = algop->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (existing_pack_is_retained(item)) { - existing_pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - existing_pack_mark_for_deletion(item); - } - } -} - -static void existing_packs_retain_cruft(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - existing_packs_mark_retained(item); - strbuf_release(&buf); -} - -static void existing_packs_mark_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - existing_packs_mark_for_deletion_1(algop, names, - &existing->non_kept_packs); - existing_packs_mark_for_deletion_1(algop, names, - &existing->cruft_packs); -} - -static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs, - const char *packdir) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!existing_pack_is_marked_for_deletion(item)) - continue; - repack_remove_redundant_pack(repo, packdir, item->string); - } -} - -static void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) -{ - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void existing_packs_collect(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - size_t i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index 3aaa351b5b5..9182e1c50bc 100644 --- a/repack.c +++ b/repack.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "dir.h" #include "midx.h" #include "odb.h" #include "packfile.h" @@ -62,3 +63,159 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + size_t i; + const char *base; + + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); +} diff --git a/repack.h b/repack.h index a62bfa2ff97..19796e22432 100644 --- a/repack.h +++ b/repack.h @@ -2,6 +2,7 @@ #define REPACK_H #include "list-objects-filter-options.h" +#include "string-list.h" struct pack_objects_args { char *window; @@ -31,4 +32,38 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + #endif /* REPACK_H */ -- GitLab From 93876709017308bfe5fbdd026778425018aa3a7f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:27 -0400 Subject: [PATCH 20/65] builtin/repack.c: rename "struct generated_pack_data" The name "generated_pack_data" is somewhat redundant, since the contents of the struct *is* the data associated with the generated pack. Rename the structure to just "generated_pack", resulting in less awkward function names, like "generated_pack_has_ext()" which is preferable to "generated_pack_data_has_ext()". Rename a few related functions to align with the convention that functions to do with a struct "S" should be prefixed with "S_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a168c887911..a4d80b6b044 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -146,15 +146,15 @@ static struct { {".idx"}, }; -struct generated_pack_data { +struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack_data *populate_pack_exts(const char *name) +static struct generated_pack *generated_pack_populate(const char *name) { struct stat statbuf; struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { @@ -164,21 +164,21 @@ static struct generated_pack_data *populate_pack_exts(const char *name) if (stat(path.buf, &statbuf)) continue; - data->tempfiles[i] = register_tempfile(path.buf); + pack->tempfiles[i] = register_tempfile(path.buf); } strbuf_release(&path); - return data; + return pack; } -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) +static int generated_pack_has_ext(const struct generated_pack *pack, + const char *ext) { int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { if (strcmp(exts[i].name, ext)) continue; - return !!data->tempfiles[i]; + return !!pack->tempfiles[i]; } BUG("unknown pack extension: '%s'", ext); } @@ -239,7 +239,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = populate_pack_exts(item->string); + item->util = generated_pack_populate(item->string); free(promisor_name); } @@ -780,8 +780,8 @@ static int write_midx_included_packs(struct string_list *include, * will suffice, so pick the first one.) */ for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) continue; strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", @@ -864,7 +864,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); + item->util = generated_pack_populate(line.buf); } } fclose(out); @@ -1435,7 +1435,7 @@ int cmd_repack(int argc, * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; + struct generated_pack *pack = item->util; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname; @@ -1443,8 +1443,8 @@ int cmd_repack(int argc, fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); struct stat statbuffer; if (!stat(fname_old, &statbuffer)) { @@ -1452,7 +1452,7 @@ int cmd_repack(int argc, chmod(fname_old, statbuffer.st_mode); } - if (rename_tempfile(&data->tempfiles[ext], fname)) + if (rename_tempfile(&pack->tempfiles[ext], fname)) die_errno(_("renaming pack to '%s' failed"), fname); } else if (!exts[ext].optional) die(_("pack-objects did not write a '%s' file for pack %s-%s"), -- GitLab From 59f0759d260d705a0e3ad0397c2412d2c16cddb3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:31 -0400 Subject: [PATCH 21/65] builtin/repack.c: factor our "generated_pack_install" Once all new packs are known to exist, 'repack' installs their contents from their temporary location into their permanent one. This is a semi-involved procedure for each pack, since for each extension (e.g., ".idx", ".pack", ".mtimes", and so on) we have to either: - adjust the filemode of the temporary file before renaming it into place, or - die() if we are missing a non-optional extension, or - unlink() any existing file for extensions that we did not generate (e.g., if a non-cruft pack we generated was identical to, say, a cruft pack which existed at the beginning of the process, we have to remove the ".mtimes" file). Extract this procedure into its own function, and call it "generated_pack_install"(). This will set us up for pulling this function out of the builtin entirely and making it part of the repack.h API, which will be done in a future commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 ++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a4d80b6b044..8c3a5f4f80b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -183,6 +183,38 @@ static int generated_pack_has_ext(const struct generated_pack *pack, BUG("unknown pack extension: '%s'", ext); } +static void generated_pack_install(struct generated_pack *pack, + const char *name) +{ + int ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} + static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) @@ -1045,7 +1077,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + int i, ret; int show_progress; char **midx_pack_names = NULL; size_t midx_pack_names_nr = 0; @@ -1434,35 +1466,9 @@ int cmd_repack(int argc, /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack *pack = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install((struct generated_pack *)item->util, + item->string); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) -- GitLab From d99a1faa9d99d9c6d3be80fd52cb59c136b477bb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:35 -0400 Subject: [PATCH 22/65] builtin/repack.c: pass "packtmp" to `generated_pack_populate()` In a similar spirit as previous commits, this function needs to know the temporary pack prefix, which it currently accesses through the static "packtmp" variable within builtin/repack.c. Pass it explicitly as a function parameter to facilitate moving this function out of builtin/repack.c entirely. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8c3a5f4f80b..2141c43bd27 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -150,7 +150,8 @@ struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack *generated_pack_populate(const char *name) +static struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) { struct stat statbuf; struct strbuf path = STRBUF_INIT; @@ -271,7 +272,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = generated_pack_populate(item->string); + item->util = generated_pack_populate(item->string, packtmp); free(promisor_name); } @@ -896,7 +897,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf); + item->util = generated_pack_populate(line.buf, packtmp); } } fclose(out); -- GitLab From 4bf53de93eedaf33fd9937da68b6e9d487a3b6d7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:39 -0400 Subject: [PATCH 23/65] builtin/repack.c: provide pack locations to `generated_pack_install()` Repeat what was done in the preceding commit for the `generated_pack_install()` function, which needs both "packdir" and "packtmp". (As an aside, it is somewhat unfortunate that the final three parameters to this function are all "const char *", making errors like passing "packdir" and "packtmp" in the wrong order easy. We could define a new structure here, but that may be too heavy-handed.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2141c43bd27..a4f0a194534 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -185,7 +185,8 @@ static int generated_pack_has_ext(const struct generated_pack *pack, } static void generated_pack_install(struct generated_pack *pack, - const char *name) + const char *name, + const char *packdir, const char *packtmp) { int ext; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { @@ -1469,7 +1470,7 @@ int cmd_repack(int argc, */ for_each_string_list_item(item, &names) generated_pack_install((struct generated_pack *)item->util, - item->string); + item->string, packdir, packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) -- GitLab From 494fee74dc4ea9561d5bbda63026bb1eb641c85c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:43 -0400 Subject: [PATCH 24/65] repack: remove 'generated_pack' API from the builtin Now that we have factored the "generated_pack" API, we can move it to repack.ch, further slimming down builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 83 ------------------------------------------------ repack.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 8 +++++ 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a4f0a194534..b7826e676b3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -134,89 +134,6 @@ static int write_oid(const struct object_id *oid, return 0; } -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack *generated_pack_populate(const char *name, - const char *packtmp) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack *pack = xcalloc(1, sizeof(*pack)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - pack->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return pack; -} - -static int generated_pack_has_ext(const struct generated_pack *pack, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!pack->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void generated_pack_install(struct generated_pack *pack, - const char *name, - const char *packdir, const char *packtmp) -{ - int ext; - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", packdir, name, - exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), - fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, name); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } -} - static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) diff --git a/repack.c b/repack.c index 9182e1c50bc..d8afdd352d4 100644 --- a/repack.c +++ b/repack.c @@ -3,9 +3,11 @@ #include "midx.h" #include "odb.h" #include "packfile.h" +#include "path.h" #include "repack.h" #include "repository.h" #include "run-command.h" +#include "tempfile.h" void prepare_pack_objects(struct child_process *cmd, const struct pack_objects_args *args, @@ -219,3 +221,84 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); } + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h index 19796e22432..f37eb495249 100644 --- a/repack.h +++ b/repack.h @@ -66,4 +66,12 @@ void existing_packs_remove_redundant(struct existing_packs *existing, const char *packdir); void existing_packs_release(struct existing_packs *existing); +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + #endif /* REPACK_H */ -- GitLab From f4f36b2990f15806d499fa6aaaed5459694e87a9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:47 -0400 Subject: [PATCH 25/65] builtin/repack.c: pass "packtmp" to `repack_promisor_objects()` In a similar spirit as previous commit(s), pass the "packtmp" variable to "repack_promisor_objects()" as an explicit parameter of the function, preparing us to move this function in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index b7826e676b3..ca7658e38fa 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int write_oid(const struct object_id *oid, static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, - struct string_list *names) + struct string_list *names, + const char *packtmp) { struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; @@ -1199,7 +1200,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(repo, &po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); if (existing_packs_has_non_kept(&existing) && delete_redundant && -- GitLab From 9b063b13e28a1ac153c1c54a283164900bb09624 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:51 -0400 Subject: [PATCH 26/65] builtin/repack.c: remove "repack_promisor_objects()" from the builtin Now that we have properly factored the portion of the builtin which is responsible for repacking promisor objects, we can move that function (and associated dependencies) out of the builtin entirely. Similar to previous extractions, this function is declared in repack.h, but implemented in a separate repack-promisor.c file. This is done to separate promisor-specific repacking functionality from generic repack utilities (like "existing_packs", and "generated_pack" APIs). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 95 ------------------------------------------ meson.build | 1 + repack-promisor.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++ 5 files changed, 108 insertions(+), 95 deletions(-) create mode 100644 repack-promisor.c diff --git a/Makefile b/Makefile index c0df6da2371..2a01bd92dc7 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index ca7658e38fa..18c3df72001 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,101 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct write_oid_context { - struct child_process *cmd; - const struct git_hash_algo *algop; -}; - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct write_oid_context *ctx = data; - struct child_process *cmd = ctx->cmd; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static void repack_promisor_objects(struct repository *repo, - const struct pack_objects_args *args, - struct string_list *names, - const char *packtmp) -{ - struct write_oid_context ctx; - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - ctx.cmd = &cmd; - ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != repo->hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = generated_pack_populate(item->string, packtmp); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - struct pack_geometry { struct packed_git **pack; uint32_t pack_nr, pack_alloc; diff --git a/meson.build b/meson.build index 993e8f368ff..1fbb8c52a64 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 00000000000..8bf42fc7157 --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "run-command.h" +#include "hex.h" +#include "repository.h" +#include "packfile.h" +#include "path.h" +#include "pack.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.h b/repack.h index f37eb495249..19dc4fd738d 100644 --- a/repack.h +++ b/repack.h @@ -74,4 +74,8 @@ int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); void generated_pack_install(struct generated_pack *pack, const char *name, const char *packdir, const char *packtmp); +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + #endif /* REPACK_H */ -- GitLab From ee590bc1f23df2a0034c09bb0779c3c0e8fe25b5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:54 -0400 Subject: [PATCH 27/65] builtin/repack.c: rename various pack_geometry functions Rename functions which work with 'struct pack_geometry' to begin with "pack_geometry_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 18c3df72001..2ce1ae33642 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -115,17 +115,17 @@ struct pack_geometry { int split_factor; }; -static uint32_t geometry_pack_weight(struct packed_git *p) +static uint32_t pack_geometry_weight(struct packed_git *p) { if (open_pack_index(p)) die(_("cannot open index for %s"), p->pack_name); return p->num_objects; } -static int geometry_cmp(const void *va, const void *vb) +static int pack_geometry_cmp(const void *va, const void *vb) { - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); if (aw < bw) return -1; @@ -134,7 +134,7 @@ static int geometry_cmp(const void *va, const void *vb) return 0; } -static void init_pack_geometry(struct pack_geometry *geometry, +static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { @@ -184,11 +184,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, geometry->pack_nr++; } - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); strbuf_release(&buf); } -static void split_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_split(struct pack_geometry *geometry) { uint32_t i; uint32_t split; @@ -208,13 +208,13 @@ static void split_pack_geometry(struct pack_geometry *geometry) struct packed_git *prev = geometry->pack[i - 1]; if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) + pack_geometry_weight(prev))) die(_("pack %s too large to consider in geometric " "progression"), prev->pack_name); - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) break; } @@ -242,9 +242,9 @@ static void split_pack_geometry(struct pack_geometry *geometry) for (i = 0; i < split; i++) { struct packed_git *p = geometry->pack[i]; - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); + total_size += pack_geometry_weight(p); } for (i = split; i < geometry->pack_nr; i++) { struct packed_git *ours = geometry->pack[i]; @@ -253,15 +253,15 @@ static void split_pack_geometry(struct pack_geometry *geometry) total_size)) die(_("pack %s too large to roll up"), ours->pack_name); - if (geometry_pack_weight(ours) < + if (pack_geometry_weight(ours) < geometry->split_factor * total_size) { if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) + pack_geometry_weight(ours))) die(_("pack %s too large to roll up"), ours->pack_name); split++; - total_size += geometry_pack_weight(ours); + total_size += pack_geometry_weight(ours); } else break; } @@ -269,7 +269,7 @@ static void split_pack_geometry(struct pack_geometry *geometry) geometry->split = split; } -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) +static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) { uint32_t i; @@ -304,9 +304,9 @@ static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) return NULL; } -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) +static void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -332,7 +332,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, strbuf_release(&buf); } -static void free_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_release(struct pack_geometry *geometry) { if (!geometry) return; @@ -599,7 +599,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(geometry); FILE *in; int ret; @@ -1063,8 +1063,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1324,8 +1324,8 @@ int cmd_repack(int argc, existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1352,7 +1352,7 @@ int cmd_repack(int argc, string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); + pack_geometry_release(&geometry); for (size_t i = 0; i < midx_pack_names_nr; i++) free(midx_pack_names[i]); free(midx_pack_names); -- GitLab From ce852b95800c81c17c08f7a45425cac52d208279 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:58 -0400 Subject: [PATCH 28/65] builtin/repack.c: pass 'pack_kept_objects' to `pack_geometry_init()` Prepare to move pack_geometry-related APIs to their own compilation unit by passing in the static "pack_kept_objects" variable directly as a parameter to the 'pack_geometry_init()' function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2ce1ae33642..60dce455567 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int pack_geometry_cmp(const void *va, const void *vb) static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args) + const struct pack_objects_args *args, + int pack_kept_objects) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -1063,7 +1064,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_init(&geometry, &existing, &po_args, + pack_kept_objects); pack_geometry_split(&geometry); } -- GitLab From 4738f57237927d34c7e13a1ff29c1000eda2ffca Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:02 -0400 Subject: [PATCH 29/65] builtin/repack.c: pass 'packdir' to `pack_geometry_remove_redundant()` For similar reasons as the preceding commit, pass the "packdir" variable directly to `pack_geometry_remove_redundant()` as a parameter to the function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 60dce455567..9e2523d9483 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -307,7 +307,8 @@ static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geo static void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, - struct existing_packs *existing) + struct existing_packs *existing, + const char *packdir) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -1327,7 +1328,7 @@ int cmd_repack(int argc, if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing); + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); -- GitLab From eab133767233eaeb9bd2c78108f20059e4ae7675 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:06 -0400 Subject: [PATCH 30/65] repack: remove pack_geometry API from the builtin Now that the pack_geometry API is fully factored and isolated from the rest of the builtin, declare it within repack.h and move its implementation to "repack-geometry.c" as a separate component. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 235 ---------------------------------------------- meson.build | 1 + repack-geometry.c | 233 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 20 ++++ 5 files changed, 255 insertions(+), 235 deletions(-) create mode 100644 repack-geometry.c diff --git a/Makefile b/Makefile index 2a01bd92dc7..3ee8d27dba2 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-geometry.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index 9e2523d9483..b49e2dab9a6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,241 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t pack_geometry_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int pack_geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), - bw = pack_geometry_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void pack_geometry_init(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); - strbuf_release(&buf); -} - -static void pack_geometry_split(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - pack_geometry_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * pack_geometry_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += pack_geometry_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - pack_geometry_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += pack_geometry_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void pack_geometry_remove_redundant(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing, - const char *packdir) -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex_algop(p->hash, - algop))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void pack_geometry_release(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, diff --git a/meson.build b/meson.build index 1fbb8c52a64..47b05089eec 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-geometry.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 00000000000..a879f2fe49c --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,233 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "repack.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack.h b/repack.h index 19dc4fd738d..cea7969ae47 100644 --- a/repack.h +++ b/repack.h @@ -78,4 +78,24 @@ void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names, const char *packtmp); +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + #endif /* REPACK_H */ -- GitLab From d5adcba5ee1e78d85ff60520862c2c4a3dfb3d9c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:10 -0400 Subject: [PATCH 31/65] builtin/repack.c: remove ref snapshotting from builtin When writing a MIDX, 'git repack' takes a snapshot of the repository's references and writes the result out to a file, which it then passes to 'git multi-pack-index write' via the '--refs-snapshot'. This is done in order to make bitmap selections with respect to what we are packing, thus avoiding a race where an incoming reference update causes us to try and write a bitmap for a commit not present in the MIDX. Extract this functionality out into a new repack-midx.c compilation unit, and expose the necessary functions via the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 68 ------------------------------------------ meson.build | 1 + repack-midx.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 +++ 5 files changed, 83 insertions(+), 68 deletions(-) create mode 100644 repack-midx.c diff --git a/Makefile b/Makefile index 3ee8d27dba2..b2142771635 100644 --- a/Makefile +++ b/Makefile @@ -1138,6 +1138,7 @@ LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index b49e2dab9a6..5a1f1cb5629 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -178,74 +178,6 @@ static int midx_has_unknown_packs(char **midx_pack_names, return 0; } -struct midx_snapshot_ref_data { - struct repository *repo; - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(repo); - - data.repo = repo; - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(repo), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(repo), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - static void midx_included_packs(struct string_list *include, struct existing_packs *existing, char **midx_pack_names, diff --git a/meson.build b/meson.build index 47b05089eec..0423ed30c41 100644 --- a/meson.build +++ b/meson.build @@ -464,6 +464,7 @@ libgit_sources = [ 'remote.c', 'repack.c', 'repack-geometry.c', + 'repack-midx.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 00000000000..354df729a5e --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,77 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} diff --git a/repack.h b/repack.h index cea7969ae47..803e1292240 100644 --- a/repack.h +++ b/repack.h @@ -98,4 +98,8 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry, const char *packdir); void pack_geometry_release(struct pack_geometry *geometry); +struct tempfile; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); + #endif /* REPACK_H */ -- GitLab From 3fc50b4f4f667f2c47d0581d879a3953a51cc592 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:14 -0400 Subject: [PATCH 32/65] builtin/repack.c: extract opts struct for 'write_midx_included_packs()' The function 'write_midx_included_packs()', which is responsible for writing a new MIDX with a given set of included packs, currently takes a list of six arguments. In order to extract this function out of the builtin, we have to pass in a few additional parameters, like 'midx_must_contain_cruft' and 'packdir', which are currently declared as static variables within the builtin/repack.c compilation unit. Instead of adding additional parameters to `write_midx_included_packs()` extract out an "opts" struct that names these parameters, and pass a pointer to that, making it less cumbersome to add additional parameters. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5a1f1cb5629..e4b8aa2c6f0 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,6 +107,17 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +struct repack_write_midx_opts { + struct string_list *include; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, @@ -290,19 +301,15 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret; - if (!include->nr) + if (!opts->include->nr) return 0; cmd.in = -1; @@ -311,18 +318,18 @@ static int write_midx_included_packs(struct string_list *include, strvec_push(&cmd.args, "multi-pack-index"); strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - if (show_progress) + if (opts->show_progress) strvec_push(&cmd.args, "--progress"); else strvec_push(&cmd.args, "--no-progress"); - if (write_bitmaps) + if (opts->write_bitmaps) strvec_push(&cmd.args, "--bitmap"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", pack_basename(preferred)); - else if (names->nr) { + else if (opts->names->nr) { /* The largest pack was repacked, meaning that either * one or two packs exist depending on whether the * repository has a cruft pack or not. @@ -335,7 +342,7 @@ static int write_midx_included_packs(struct string_list *include, * `--max-pack-size` was given, but any one of them * will suffice, so pick the first one.) */ - for_each_string_list_item(item, names) { + for_each_string_list_item(item, opts->names) { struct generated_pack *pack = item->util; if (generated_pack_has_ext(pack, ".mtimes")) continue; @@ -355,15 +362,16 @@ static int write_midx_included_packs(struct string_list *include, ; } - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); ret = start_command(&cmd); if (ret) return ret; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) + for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); @@ -1001,15 +1009,23 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; + struct repack_write_midx_opts opts = { + .include = &include, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; midx_included_packs(&include, &existing, midx_pack_names, midx_pack_names_nr, &names, &geometry); - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); + ret = write_midx_included_packs(&opts); if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); + remove_redundant_bitmaps(&include, opts.packdir); string_list_clear(&include, 0); -- GitLab From c64f873571b85878d0451d74c19d96109fbb3e0c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:18 -0400 Subject: [PATCH 33/65] builtin/repack.c: use a string_list for 'midx_pack_names' When writing a new MIDX, repack must determine whether or not there are any packs in the MIDX it is replacing (if one exists) that are not somehow represented in the new MIDX (e.g., either by preserving the pack verbatim, or rolling it up as part of a geometric repack, etc.). In order to do this, it keeps track of a list of pack names from the MIDX present in the repository at the start of the repack operation. Since we manipulate and close the object store, we cannot rely on the repository's in-core representation of the MIDX, since this is subject to change and/or go away. When this behavior was introduced in 5ee86c273b (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), we maintained an array of character pointers instead of using a convenience API, such as string-list.h. Store the list of MIDX pack names in a string_list, thereby reducing the number of parameters we have to pass to `midx_has_unknown_packs()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e4b8aa2c6f0..8ae56b05e96 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,18 +118,17 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, +static int midx_has_unknown_packs(struct string_list *midx_pack_names, struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { - size_t i; + struct string_list_item *item; string_list_sort(include); - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; + for_each_string_list_item(item, midx_pack_names) { + const char *pack_name = item->string; /* * Determine whether or not each MIDX'd pack from the existing @@ -191,8 +190,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, + struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -247,8 +245,8 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { + midx_has_unknown_packs(midx_pack_names, include, geometry, + existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -606,13 +604,12 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; + struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ struct repack_config_ctx config_ctx; @@ -985,13 +982,12 @@ int cmd_repack(int argc, struct multi_pack_index *m = get_multi_pack_index(repo->objects->sources); - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); + for (; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + string_list_append(&midx_pack_names, + m->pack_names[i]); + } + } } close_object_store(repo->objects); @@ -1019,8 +1015,8 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); + midx_included_packs(&include, &existing, &midx_pack_names, + &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1067,11 +1063,9 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); + string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); -- GitLab From 3e4f895ce8853f859d3b15633a8f8a64f1f52ba8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:22 -0400 Subject: [PATCH 34/65] repack: keep track of MIDX pack names using existing_packs Instead of storing the list of MIDX pack names separately, let's inline it into the existing_packs struct, further reducing the number of parameters we have to pass around. This amounts to adding a new string_list to the existing_packs struct, and populating it via `existing_packs_collect()`. This is fairly straightforward to do, since we are already looping over all packs, all we need to do is: if (p->multi_pack_index) string_list_append(&existing->midx_packs, pack_basename(p)); Note, however, that this check *must* come before other conditions where we discard and do not keep track of a pack, including the condition "if (!p->pack_local)" immediately below. This is because the existing routine which collects MIDX pack names does so blindly, and does not discard, for example, non-local packs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 ++++---------------------- repack.c | 5 +++++ repack.h | 1 + 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8ae56b05e96..251dd08b0ad 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,8 +118,7 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(struct string_list *midx_pack_names, - struct string_list *include, +static int midx_has_unknown_packs(struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { @@ -127,7 +126,7 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, string_list_sort(include); - for_each_string_list_item(item, midx_pack_names) { + for_each_string_list_item(item, &existing->midx_packs) { const char *pack_name = item->string; /* @@ -190,7 +189,6 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -245,8 +243,7 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, include, geometry, - existing)) { + midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -604,7 +601,6 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; - struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; @@ -978,18 +974,6 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(repo->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(repo->objects->sources); - - for (; m; m = m->base_midx) { - for (uint32_t i = 0; i < m->num_packs; i++) { - string_list_append(&midx_pack_names, - m->pack_names[i]); - } - } - } - close_object_store(repo->objects); /* @@ -1015,8 +999,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &midx_pack_names, - &names, &geometry); + midx_included_packs(&include, &existing, &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1063,7 +1046,6 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); - string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); pack_objects_args_release(&po_args); diff --git a/repack.c b/repack.c index d8afdd352d4..1d485e01124 100644 --- a/repack.c +++ b/repack.c @@ -80,6 +80,9 @@ void existing_packs_collect(struct existing_packs *existing, size_t i; const char *base; + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); if (!p->pack_local) continue; @@ -104,6 +107,7 @@ void existing_packs_collect(struct existing_packs *existing, string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); strbuf_release(&buf); } @@ -220,6 +224,7 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->kept_packs, 0); string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); } static struct { diff --git a/repack.h b/repack.h index 803e1292240..6aa5b4e0f0f 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct existing_packs { struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; + struct string_list midx_packs; }; #define EXISTING_PACKS_INIT { \ -- GitLab From d13c9522c1683c21a4aa485c8a6aa40e4b9c06af Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:26 -0400 Subject: [PATCH 35/65] builtin/repack.c: reorder `remove_redundant_bitmaps()` The next commit will inline the call to `remove_redundant_bitmaps()` into `write_midx_included_packs()`. Reorder these two functions to avoid a forward declaration to `remove_redundant_bitmaps()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 58 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 251dd08b0ad..957a9b5f9e8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -296,6 +296,35 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -373,35 +402,6 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) return finish_command(&cmd); } -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, -- GitLab From 168c70c6899debf1eace0bd7e94ef8ad20c8521f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:30 -0400 Subject: [PATCH 36/65] builtin/repack.c: inline `remove_redundant_bitmaps()` After writing a new MIDX, the repack command removes any bitmaps belonging to packs which were written into the MIDX. This is currently done in a separate function outside of `write_midx_included_packs()`, which forces the caller to keep track of the set of packs written into the MIDX. Prepare to no longer require the caller to keep track of such information by inlining the clean-up into `write_midx_included_packs()`. Future commits will make the caller oblivious to the set of packs included in the MIDX altogether. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 957a9b5f9e8..b55c8934e88 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -331,10 +331,10 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; - int ret; + int ret = 0; if (!opts->include->nr) - return 0; + goto done; cmd.in = -1; cmd.git_cmd = 1; @@ -392,14 +392,18 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) ret = start_command(&cmd); if (ret) - return ret; + goto done; in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); - return finish_command(&cmd); + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(opts->include, opts->packdir); + return ret; } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, @@ -1003,9 +1007,6 @@ int cmd_repack(int argc, ret = write_midx_included_packs(&opts); - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, opts.packdir); - string_list_clear(&include, 0); if (ret) -- GitLab From e500d06af50bc3c879fda8139fcb23177d4318dc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:35 -0400 Subject: [PATCH 37/65] builtin/repack.c: pass `repack_write_midx_opts` to `midx_included_packs` Instead of passing individual parameters (in this case, "existing", "names", and "geometry") to `midx_included_packs()`, pass a pointer to a `repack_write_midx_opts` structure instead. Besides reducing the number of parameters necessary to call the `midx_included_packs` function, this refactoring sets us up nicely to inline the call to `midx_included_packs()` into `write_midx_included_packs()`, thus making the caller (in this case, `cmd_repack()`) oblivious to the set of packs being written into the MIDX. In order to do this, `repack_write_midx_opts` has to keep track of the set of existing packs, so add an additional field to point to that set. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index b55c8934e88..bd3034a4f07 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,6 +108,7 @@ static int repack_config(const char *var, const char *value, } struct repack_write_midx_opts { + struct existing_packs *existing; struct string_list *include; struct pack_geometry *geometry; struct string_list *names; @@ -188,10 +189,11 @@ static int midx_has_unknown_packs(struct string_list *include, } static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - struct string_list *names, - struct pack_geometry *geometry) + struct repack_write_midx_opts *opts) { + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; struct string_list_item *item; struct strbuf buf = STRBUF_INIT; @@ -242,7 +244,7 @@ static void midx_included_packs(struct string_list *include, } } - if (midx_must_contain_cruft || + if (opts->midx_must_contain_cruft || midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see @@ -994,6 +996,7 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { + .existing = &existing, .include = &include, .geometry = &geometry, .names = &names, @@ -1003,7 +1006,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); -- GitLab From fff699f45d590619939d16d36c44e4a5a099744b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:40 -0400 Subject: [PATCH 38/65] builtin/repack.c: inline packs within `write_midx_included_packs()` To write a MIDX at the end of a repack operation, 'git repack' presently computes the set of packs to write into the MIDX, before invoking `write_midx_included_packs()` with a `string_list` containing those packs. The logic for computing which packs are supposed to appear in the resulting MIDX is within `midx_included_packs()`, where it is aware of details like which cruft pack(s) were written/combined, if/how we did a geometric repack, etc. Computing this list ourselves before providing it to the sole function to make use of that list `write_midx_included_packs()` is somewhat awkward. In the future, repack will learn how to write incremental MIDXs, which will use a very different pack selection routine. Instead of doing something like: struct string_list included_packs = STRING_LIST_INIT_DUP; if (incremental) { midx_incremental_included_packs(&included_packs, ...): write_midx_incremental_included_packs(&included_packs, ...); } else { midx_included_packs(&included_packs, ...): write_midx_included_packs(&included_packs, ...); } in the future, let's have each function which writes a MIDX be responsible for itself computing the list of included packs. Inline the declaration and initialization of `included_packs` into the `write_midx_included_packs()` function itself, and repeat that pattern in the future when we introduce new ways to write MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index bd3034a4f07..e2acba63124 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -109,7 +109,6 @@ static int repack_config(const char *var, const char *value, struct repack_write_midx_opts { struct existing_packs *existing; - struct string_list *include; struct pack_geometry *geometry; struct string_list *names; const char *refs_snapshot; @@ -330,12 +329,14 @@ static void remove_redundant_bitmaps(struct string_list *include, static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret = 0; - if (!opts->include->nr) + midx_included_packs(&include, opts); + if (!include.nr) goto done; cmd.in = -1; @@ -397,14 +398,17 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, opts->include) + for_each_string_list_item(item, &include) fprintf(in, "%s\n", item->string); fclose(in); ret = finish_command(&cmd); done: if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(opts->include, opts->packdir); + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + return ret; } @@ -994,10 +998,8 @@ int cmd_repack(int argc, existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { .existing = &existing, - .include = &include, .geometry = &geometry, .names = &names, .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1006,12 +1008,9 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); - string_list_clear(&include, 0); - if (ret) goto cleanup; } -- GitLab From 2f8fb7d1da04f09b3b66ebffe9405db8587f8eae Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:46 -0400 Subject: [PATCH 39/65] repack: 'write_midx_included_packs' API from the builtin Now that we have sufficiently cleaned up the write_midx_included_packs() function, we can move it (along with the struct repack_write_midx_opts) out of the builtin, and into the repack.h header. Since this function (and the static ones that it depends on) are MIDX-specific details of the repacking process, move them to the repack-midx.c compilation unit instead of the general repack.c one. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 305 ----------------------------------------------- repack-midx.c | 295 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 12 ++ 3 files changed, 307 insertions(+), 305 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e2acba63124..5fed79e8262 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,311 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct repack_write_midx_opts { - struct existing_packs *existing; - struct pack_geometry *geometry; - struct string_list *names; - const char *refs_snapshot; - const char *packdir; - int show_progress; - int write_bitmaps; - int midx_must_contain_cruft; -}; - -static int midx_has_unknown_packs(struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - struct string_list_item *item; - - string_list_sort(include); - - for_each_string_list_item(item, &existing->midx_packs) { - const char *pack_name = item->string; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !existing_pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -static void midx_included_packs(struct string_list *include, - struct repack_write_midx_opts *opts) -{ - struct existing_packs *existing = opts->existing; - struct pack_geometry *geometry = opts->geometry; - struct string_list *names = opts->names; - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (opts->midx_must_contain_cruft || - midx_has_unknown_packs(include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `existing_packs_mark_for_deletion()` when - * doing an all-into-one repack). - */ - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int write_midx_included_packs(struct repack_write_midx_opts *opts) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list include = STRING_LIST_INIT_DUP; - struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; - int ret = 0; - - midx_included_packs(&include, opts); - if (!include.nr) - goto done; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (opts->names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, opts->names) { - struct generated_pack *pack = item->util; - if (generated_pack_has_ext(pack, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (opts->refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", - opts->refs_snapshot); - - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); -done: - if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(&include, opts->packdir); - - string_list_clear(&include, 0); - - return ret; -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, diff --git a/repack-midx.c b/repack-midx.c index 354df729a5e..6f6202c5bcc 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -6,6 +6,7 @@ #include "oidset.h" #include "pack-bitmap.h" #include "refs.h" +#include "run-command.h" #include "tempfile.h" struct midx_snapshot_ref_data { @@ -75,3 +76,297 @@ void midx_snapshot_refs(struct repository *repo, struct tempfile *f) oidset_clear(&data.seen); } + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack.h b/repack.h index 6aa5b4e0f0f..25a31ac0a05 100644 --- a/repack.h +++ b/repack.h @@ -101,6 +101,18 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); #endif /* REPACK_H */ -- GitLab From 9251edd257129ad3b77c63260a77b5bbd27e674a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:51 -0400 Subject: [PATCH 40/65] builtin/repack.c: introduce `struct write_pack_opts` There are various functions within the 'repack' builtin which are responsible for writing different kinds of packs. They include: - `static int write_filtered_pack(...)` - `static int write_cruft_pack(...)` as well as the function `finish_pack_objects_cmd()`, which is responsible for finalizing a new pack write, and recording the checksum of its contents in the 'names' list. Both of these `write_` functions have a few things in common. They both take a pointer to the 'pack_objects_args' struct, as well as a pair of character pointers for `destination` and `pack_prefix`. Instead of repeating those arguments for each function, let's extract an options struct called "write_pack_opts" which has these three parameters as member fields. While we're at it, add fields for "packdir," and "packtmp", both of which are static variables within the builtin, and need to be read from within these two functions. This will shorten the list of parameters that callers have to provide to `write_filtered_pack()`, avoid ambiguity when passing multiple variables of the same type, and provide a unified interface for the two functions mentioned earlier. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 30 ++++++++++++++++-------------- repack.h | 8 ++++++++ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5fed79e8262..6df7c880853 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -138,9 +138,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, return finish_command(cmd); } -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) { @@ -150,9 +148,9 @@ static int write_filtered_pack(const struct pack_objects_args *args, int ret; const char *caret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--stdin-packs"); @@ -175,7 +173,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -665,14 +663,18 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; - - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .pack_prefix = find_pack_prefix(packdir, packtmp), + .packdir = packdir, + .packtmp = packtmp, + }; + + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } diff --git a/repack.h b/repack.h index 25a31ac0a05..6ef503f6237 100644 --- a/repack.h +++ b/repack.h @@ -32,6 +32,14 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *pack_prefix; + const char *packdir; + const char *packtmp; +}; + struct repository; struct packed_git; -- GitLab From f9568ff63ff2770a44cea7a996f9d19ca1de414c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:57 -0400 Subject: [PATCH 41/65] builtin/repack.c: use `write_pack_opts` within `write_cruft_pack()` Similar to the changes made in the previous commit to `write_filtered_pack()`, teach `write_cruft_pack()` to take a `write_pack_opts` struct and use that where possible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6df7c880853..501359c580b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,9 +221,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, strbuf_release(&buf); } -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_cruft_pack(struct write_pack_opts *opts, const char *cruft_expiration, unsigned long combine_cruft_below_size, struct string_list *names, @@ -234,9 +232,9 @@ static int write_cruft_pack(const struct pack_objects_args *args, FILE *in; int ret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--cruft"); if (cruft_expiration) @@ -267,7 +265,7 @@ static int write_cruft_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -599,6 +597,13 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .pack_prefix = pack_prefix, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -615,8 +620,7 @@ int cmd_repack(int argc, cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -651,11 +655,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; -- GitLab From 3b7102b6ed9127730ec4e01bf9ee4c383b421081 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:02 -0400 Subject: [PATCH 42/65] repack: move `find_pack_prefix()` out of the builtin Both callers within the repack builtin which call functions that take a 'write_pack_opts' structure have the following pattern: struct write_pack_opts opts = { .packdir = packdir, .packtmp = packtmp, .pack_prefix = find_pack_prefix(packdir, packtmp), /* ... */ }; int ret = write_some_kind_of_pack(&opts, /* ... */); , but both "packdir" and "packtmp" are fields within the write_pack_opts struct itself! Instead of also computing the pack_prefix ahead of time, let's have the callees compute it themselves by moving `find_pack_prefix()` out of the repack builtin, and have it take a write_pack_opts pointer instead of the "packdir" and "packtmp" fields directly. This avoids the callers having to do some prep work that is common between the two of them, but also avoids the potential pitfall of accidentally writing: .pack_prefix = find_pack_prefix(packtmp, packdir), (which is well-typed) when the caller meant to instead write: .pack_prefix = find_pack_prefix(packdir, packtmp), Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 ++++---------------- repack.c | 11 +++++++++++ repack.h | 3 ++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 501359c580b..4d869206187 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -149,6 +149,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, const char *caret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -173,7 +174,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -233,6 +234,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, int ret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -265,7 +267,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -283,17 +285,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, local); } -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, @@ -596,11 +587,9 @@ int cmd_repack(int argc, } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); struct write_pack_opts opts = { .po_args = &cruft_po_args, .destination = packtmp, - .pack_prefix = pack_prefix, .packtmp = packtmp, .packdir = packdir, }; @@ -667,7 +656,6 @@ int cmd_repack(int argc, struct write_pack_opts opts = { .po_args = &po_args, .destination = filter_to, - .pack_prefix = find_pack_prefix(packdir, packtmp), .packdir = packdir, .packtmp = packtmp, }; diff --git a/repack.c b/repack.c index 1d485e01124..c4326a532df 100644 --- a/repack.c +++ b/repack.c @@ -66,6 +66,17 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, strbuf_release(&buf); } +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 6ef503f6237..46d2312fa96 100644 --- a/repack.h +++ b/repack.h @@ -35,11 +35,12 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, struct write_pack_opts { struct pack_objects_args *po_args; const char *destination; - const char *pack_prefix; const char *packdir; const char *packtmp; }; +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); + struct repository; struct packed_git; -- GitLab From cbd91c8d15ca552604c9489371dd77c652b009ef Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:08 -0400 Subject: [PATCH 43/65] repack: extract `write_pack_opts_is_local()` Similar to the previous commit, the functions `write_cruft_pack()` and `write_filtered_pack()` both compute a "local" variable via the exact same mechanism: const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); Not only does this cause us to repeat the same pair of lines, it also introduces an unnecessary "scratch" variable that is common between both functions. Instead of repeating ourselves, let's extract that functionality into a new function in the repack.h API called "write_pack_opts_is_local()". That function takes a pointer to a "struct write_pack_opts" (which has as fields both "destination" and "packdir"), and can encapsulate the dangling "scratch" field. Extract that function and make it visible within the repack.h API, and use it within both `write_cruft_pack()` and `write_filtered_pack()`. The remaining duplication (that is, that both `write_cruft_pack()` and `write_filtered_pack()` still both call `write_pack_opts_is_local()`) will be addressed in the following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++---- repack.c | 6 ++++++ repack.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4d869206187..be8e6689fc0 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -147,8 +147,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -232,8 +231,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); diff --git a/repack.c b/repack.c index c4326a532df..7af297ae48a 100644 --- a/repack.c +++ b/repack.c @@ -77,6 +77,12 @@ const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) return pack_prefix; } +int write_pack_opts_is_local(struct write_pack_opts *opts) +{ + const char *scratch; + return skip_prefix(opts->destination, opts->packdir, &scratch); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 46d2312fa96..16f2de2ea9a 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct write_pack_opts { }; const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); +int write_pack_opts_is_local(struct write_pack_opts *opts); struct repository; struct packed_git; -- GitLab From 6c5b90171fae963581ecfd9e30ed2e04bd0b02c3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:14 -0400 Subject: [PATCH 44/65] builtin/repack.c: pass `write_pack_opts` to `finish_pack_objects_cmd()` To prepare to move the `finish_pack_objects_cmd()` function out of the builtin and into the repack.h API, there are a couple of things we need to do first: - First, let's take advantage of `write_pack_opts_is_local()` function introduced in the previous commit instead of passing "local" explicitly. - Let's also avoid referring to the static 'packtmp' field within builtin/repack.c by instead accessing it through the write_pack_opts argument. There are three callers which need to adjust themselves in order to account for this change. The callers which reside in write_cruft_pack() and write_filtered_pack() both already have an "opts" in scope, so they can pass it through transparently. The other call (at the bottom of `cmd_repack()`) needs to initialize its own write_pack_opts to pass the necessary fields over to the direct call to `finish_pack_objects_cmd()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index be8e6689fc0..8db95305c8d 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,11 +108,12 @@ static int repack_config(const char *var, const char *value, } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, struct child_process *cmd, - struct string_list *names, - int local) + struct string_list *names) { FILE *out; + int local = write_pack_opts_is_local(opts); struct strbuf line = STRBUF_INIT; out = xfdopen(cmd->out, "r"); @@ -128,7 +129,8 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, packtmp); + item->util = generated_pack_populate(line.buf, + opts->packtmp); } } fclose(out); @@ -147,7 +149,6 @@ static int write_filtered_pack(struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -183,8 +184,8 @@ static int write_filtered_pack(struct write_pack_opts *opts, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -231,7 +232,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -279,8 +279,8 @@ static int write_cruft_pack(struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } int cmd_repack(int argc, @@ -294,6 +294,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; + struct write_pack_opts opts = { 0 }; int i, ret; int show_progress; @@ -560,7 +561,10 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); + opts.packdir = packdir; + opts.destination = packdir; + opts.packtmp = packtmp; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, &names); if (ret) goto cleanup; -- GitLab From f6cc2aed8a9b9124719ba4bbf4f779eda298d2d8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:19 -0400 Subject: [PATCH 45/65] repack: move `finish_pack_objects_cmd()` out of the builtin In a similar spirit as the previous commit(s), now that the function `finish_pack_objects_cmd()` has no explicit dependencies within the repack builtin, let's extract it. This prepares us to extract the remaining two functions within the repack builtin that explicitly write packfiles, which are `write_cruft_pack()` and `write_filtered_pack()`, which will be done in the future commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 33 --------------------------------- repack.c | 33 +++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8db95305c8d..836a0066072 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,39 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int finish_pack_objects_cmd(const struct git_hash_algo *algop, - struct write_pack_opts *opts, - struct child_process *cmd, - struct string_list *names) -{ - FILE *out; - int local = write_pack_opts_is_local(opts); - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != algop->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, - opts->packtmp); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - static int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) diff --git a/repack.c b/repack.c index 7af297ae48a..8a0e4789fad 100644 --- a/repack.c +++ b/repack.c @@ -83,6 +83,39 @@ int write_pack_opts_is_local(struct write_pack_opts *opts) return skip_prefix(opts->destination, opts->packdir, &scratch); } +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + int local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 16f2de2ea9a..9351293233c 100644 --- a/repack.h +++ b/repack.h @@ -42,6 +42,11 @@ struct write_pack_opts { const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); int write_pack_opts_is_local(struct write_pack_opts *opts); +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + struct repository; struct packed_git; -- GitLab From 7db10535cc5ed1eb18a07f9ce3639bbc7706610e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:25 -0400 Subject: [PATCH 46/65] repack: move `pack_kept_objects` to `struct pack_objects_args` The "pack_kept_objects" variable is defined as static to the repack builtin, but is inherently related to the pack-objects arguments that the builtin uses when generating new packs. Move that field into the "struct pack_objects_args", and shuffle around where we append the corresponding command-line option when preparing a pack-objects process. Specifically: - `write_cruft_pack()` always wants to pass "--honor-pack-keep", so explicitly set the `pack_kept_objects` field to "0" when initializing the `write_pack_opts` struct before calling `write_cruft_pack()`. - `write_filtered_pack()` no longer needs to handle writing the command-line option "--honor-pack-keep" when preparing a pack-objects process, since its call to `prepare_pack_objects()` will have already taken care of that. `write_filtered_pack()` also reads the `pack_kept_objects` field to determine whether to write the existing kept packs with a leading "^" character, so update that to read through the `po_args` pointer instead. - `cmd_repack()` also no longer has to write the "--honor-pack-keep" flag explicitly, since this is also handled via its call to `prepare_pack_objects()`. Since there is a default value for "pack_kept_objects" that relies on whether or not we are writing a bitmap (and not writing a MIDX), extract a default initializer for `struct pack_objects_args` that keeps this conditional default behavior. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 +++++++------------- repack-geometry.c | 5 ++--- repack.c | 2 ++ repack.h | 9 ++++++--- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 836a0066072..9d89217b772 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -68,7 +67,7 @@ static int repack_config(const char *var, const char *value, return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -122,8 +121,6 @@ static int write_filtered_pack(struct write_pack_opts *opts, strvec_push(&cmd.args, "--stdin-packs"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for_each_string_list_item(item, &existing->kept_packs) strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); @@ -146,7 +143,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; + caret = opts->po_args->pack_kept_objects ? "" : "^"; for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); @@ -208,7 +205,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, strvec_pushf(&cmd.args, "--cruft-expiration=%s", cruft_expiration); - strvec_push(&cmd.args, "--honor-pack-keep"); strvec_push(&cmd.args, "--non-empty"); cmd.in = -1; @@ -333,7 +329,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -379,8 +375,8 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); @@ -421,8 +417,7 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args, - pack_kept_objects); + pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -431,8 +426,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -577,6 +570,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, diff --git a/repack-geometry.c b/repack-geometry.c index a879f2fe49c..fd4a89a1159 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -26,8 +26,7 @@ static int pack_geometry_cmp(const void *va, const void *vb) void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) + const struct pack_objects_args *args) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -42,7 +41,7 @@ void pack_geometry_init(struct pack_geometry *geometry, */ continue; - if (!pack_kept_objects) { + if (!args->pack_kept_objects) { /* * Any pack that has its pack_keep bit set will * appear in existing->kept_packs below, but diff --git a/repack.c b/repack.c index 8a0e4789fad..1982a481650 100644 --- a/repack.c +++ b/repack.c @@ -38,6 +38,8 @@ void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--quiet"); if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; cmd->out = -1; diff --git a/repack.h b/repack.h index 9351293233c..4a1c4eb6066 100644 --- a/repack.h +++ b/repack.h @@ -17,10 +17,14 @@ struct pack_objects_args { int name_hash_version; int path_walk; int delta_base_offset; + int pack_kept_objects; struct list_objects_filter_options filter_options; }; -#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} struct child_process; @@ -104,8 +108,7 @@ struct pack_geometry { void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects); + const struct pack_objects_args *args); void pack_geometry_split(struct pack_geometry *geometry); struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, -- GitLab From d13115475862d9f699ffd75a661227e198bdf805 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:30 -0400 Subject: [PATCH 47/65] repack: move `write_filtered_pack()` out of the builtin In a similar fashion as in previous commits, move the function `write_filtered_pack()` out of the builtin and into its own compilation unit. This function is now part of the repack.h API, but implemented in its own "repack-filtered.c" unit as it is a separate component from other kinds of repacking operations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 46 ------------------------------------------ meson.build | 1 + repack-filtered.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 5 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 repack-filtered.c diff --git a/Makefile b/Makefile index b2142771635..ba4f3bbfa27 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o diff --git a/builtin/repack.c b/builtin/repack.c index 9d89217b772..a9fc09a24d0 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,52 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int write_filtered_pack(struct write_pack_opts *opts, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = opts->po_args->pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { diff --git a/meson.build b/meson.build index 0423ed30c41..7124b158ae8 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', 'repack-promisor.c', diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 00000000000..96c7b02bb6c --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index 4a1c4eb6066..a7ddbe784b9 100644 --- a/repack.h +++ b/repack.h @@ -133,4 +133,8 @@ struct repack_write_midx_opts { void midx_snapshot_refs(struct repository *repo, struct tempfile *f); int write_midx_included_packs(struct repack_write_midx_opts *opts); +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + #endif /* REPACK_H */ -- GitLab From 6f8838e96ba9ccdacbbe4cb009f2e9beef43ca88 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:34 -0400 Subject: [PATCH 48/65] repack: move `write_cruft_pack()` out of the builtin In an identical fashion as the previous commit, move the function `write_cruft_pack()` into its own compilation unit, and make the function visible through the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 94 --------------------------------------------- meson.build | 1 + repack-cruft.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 6 +++ 5 files changed, 107 insertions(+), 94 deletions(-) create mode 100644 repack-cruft.c diff --git a/Makefile b/Makefile index ba4f3bbfa27..e3c4bf1b4aa 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o diff --git a/builtin/repack.c b/builtin/repack.c index a9fc09a24d0..9171ca66a73 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,100 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - existing_packs_retain_cruft(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(struct write_pack_opts *opts, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - int cmd_repack(int argc, const char **argv, const char *prefix, diff --git a/meson.build b/meson.build index 7124b158ae8..39152b37ba1 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-cruft.c', 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 00000000000..accb98bcdbc --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,99 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index a7ddbe784b9..15886c34886 100644 --- a/repack.h +++ b/repack.h @@ -137,4 +137,10 @@ int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names); +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + #endif /* REPACK_H */ -- GitLab From e056dec3d6d09bba280b85d47ac1cd589c013e9f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:38 -0400 Subject: [PATCH 49/65] builtin/repack.c: clean up unused `#include`s Over the past several dozen commits, we have moved a large amount of functionality out of the repack builtin and into other files like repack.c, repack-cruft.c, repack-filtered.c, repack-midx.c, and repack-promisor.c. These files specify the minimal set of `#include`s that they need to compile successfully, but we did not change the set of `#include`s in the repack builtin itself. Now that the code movement is complete, let's clean up that set of `#include`s and trim down the builtin to include the minimal amount of external headers necessary to compile. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 9171ca66a73..ad60c4290d4 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -4,26 +4,17 @@ #include "builtin.h" #include "config.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" #include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 -- GitLab From c886af90f8d25bd73c841a9925b8f03e1b232910 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Sep 2025 15:40:59 -0700 Subject: [PATCH 50/65] SQUASH??? play well with other topics by preemptively including "repository.h" --- repack-geometry.c | 1 + 1 file changed, 1 insertion(+) diff --git a/repack-geometry.c b/repack-geometry.c index fd4a89a1159..56d651c9cee 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -1,6 +1,7 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" +#include #include "repack.h" #include "hex.h" #include "packfile.h" -- GitLab From fd319f689cda39531d475635729ae26edc92c096 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 9 Oct 2025 10:01:35 +0200 Subject: [PATCH 51/65] object-name: convert to use `packfile_store_get_all_packs()` When searching for abbreviated or when trying to disambiguate object IDs we do this in two steps: 1. We search through the multi-pack index. 2. We search through all packfiles not part of any multi-pack index. The second step uses `packfile_store_get_packs()`, which knows to skip loading any packfiles that are indexed by an MIDX; this is exactly what we want. But that function is somewhat problematic, as its behaviour is stateful and is influenced by `packfile_store_get_all_packs()`. This function basically does the same as `packfile_store_get_packs()`, but in addition it also loads all packfiles indexed by an MIDX. The problem here is that both of these functions act on the same linked list of packfiles, and thus depending on whether or not `get_all_packs()` was called the result returned by `get_packs()` will be different. Consequently, all callers of `get_packs()` need to be prepared to see MIDX'd packs even though these should in theory be excluded. This interface is confusing and thus potentially dangerous, which is why we're converting all callers of `get_packs()` to use `get_all_packs()` instead. Do so for the above functions in "object-name.c". As explained, we already know to skip any MIDX'd packs in both `find_abbrev_len_packed()` and `find_short_packed_object()`, so it's fine to start loading MIDX'd packfiles. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/object-name.c b/object-name.c index f6902e140dd..4e62bfa330e 100644 --- a/object-name.c +++ b/object-name.c @@ -213,7 +213,7 @@ static void find_short_packed_object(struct disambiguate_state *ds) unique_in_midx(m, ds); } - for (p = packfile_store_get_packs(ds->repo->objects->packfiles); p && !ds->ambiguous; + for (p = packfile_store_get_all_packs(ds->repo->objects->packfiles); p && !ds->ambiguous; p = p->next) unique_in_pack(p, ds); } @@ -805,7 +805,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) find_abbrev_len_for_midx(m, mad); } - for (p = packfile_store_get_packs(mad->repo->objects->packfiles); p; p = p->next) + for (p = packfile_store_get_all_packs(mad->repo->objects->packfiles); p; p = p->next) find_abbrev_len_for_pack(p, mad); } -- GitLab From c1d293e987e845b245dac5f7b9716258472202fd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 9 Oct 2025 10:01:36 +0200 Subject: [PATCH 52/65] builtin/gc: convert to use `packfile_store_get_all_packs()` When running maintenance tasks via git-maintenance(1) we have a couple of auto-conditions that check whether or not a specific task should be running. One such check is for incremental repacks, which essentially use `git multi-pack-index repack` to repack a set of smaller packfiles into one larger packfile. The auto-condition for this task checks how many packfiles there are that aren't indexed by any multi-pack index. If there is a sufficient number then we execute the above command to combine those into a single pack and add that pack to the MIDX. As we don't care about MIDX'd packs we use `packfile_store_get_packs()`, which knows to not load any packs that are indexed by a MIDX. But as explained in the preceding commit, we want to get rid of that function. We already handle packfiles that have a MIDX by the very nature of this function, as we explicitly count non-MIDX'd packs. As such, we can trivially switch over to use `packfile_store_get_all_packs()` instead. Do so. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d9788..ab6d6d3bd1b 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1425,7 +1425,7 @@ static int incremental_repack_auto_condition(struct gc_config *cfg UNUSED) if (incremental_repack_auto_limit < 0) return 1; - for (p = packfile_store_get_packs(the_repository->objects->packfiles); + for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); count < incremental_repack_auto_limit && p; p = p->next) { if (!p->multi_pack_index) -- GitLab From 70164b92df30b6548b918cc4764f14fb2a9ceb30 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 9 Oct 2025 10:01:37 +0200 Subject: [PATCH 53/65] builtin/grep: simplify how we preload packs When using multiple threads in git-grep(1) we eagerly preload both the gitmodules file as well as the packfiles so that the threads won't race with one another to initialize these data structures. For packfiles, this is done by calling `packfile_store_get_packs()`, which first loads our packfiles and then returns a pointer to the first such packfile. This pointer is ignored though, as all we really care about is that `packfile_store_prepare()` was called. Historically, that function was file-local to "packfile.c", but that changed with 4188332569 (packfile: move `get_multi_pack_index()` into "midx.c", 2025-09-02). We can thus simplify the code by calling that function directly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index 13841fbf00f..53cccf2d250 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1214,7 +1214,7 @@ int cmd_grep(int argc, if (recurse_submodules) repo_read_gitmodules(the_repository, 1); if (startup_info->have_repository) - (void)packfile_store_get_packs(the_repository->objects->packfiles); + packfile_store_prepare(the_repository->objects->packfiles); start_threads(&opt); } else { -- GitLab From 4bf92a12f5a81a24d94455cf8465aa3cbcd75234 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 9 Oct 2025 10:01:38 +0200 Subject: [PATCH 54/65] packfile: drop `packfile_store_get_packs()` In the preceding commits we have removed all remaining callers of `packfile_store_get_packs()`, the function is thus unused now. Remove it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 6 ------ packfile.h | 6 ------ 2 files changed, 12 deletions(-) diff --git a/packfile.c b/packfile.c index 5a7caec2925..db748b0bd48 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,12 +1027,6 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *packfile_store_get_packs(struct packfile_store *store) -{ - packfile_store_prepare(store); - return store->packs; -} - struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) { packfile_store_prepare(store); diff --git a/packfile.h b/packfile.h index e7a5792b6cf..3f38c63476d 100644 --- a/packfile.h +++ b/packfile.h @@ -136,12 +136,6 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); -/* - * Get packs managed by the given store. Does not load the MIDX or any packs - * referenced by it. - */ -struct packed_git *packfile_store_get_packs(struct packfile_store *store); - /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. -- GitLab From 0da80dda3c7d1f79167a5afe4503fed9a23e1a1d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 9 Oct 2025 10:01:39 +0200 Subject: [PATCH 55/65] packfile: introduce macro to iterate through packs We have a bunch of different sites that want to iterate through all packs of a given `struct packfile_store`. This pattern is somewhat verbose and repetitive, which makes it somewhat cumbersome. Introduce a new macro `repo_for_each_pack()` that removes some of the boilerplate. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 3 +-- builtin/count-objects.c | 3 +-- builtin/fsck.c | 15 ++++++--------- builtin/gc.c | 16 +++++++--------- builtin/pack-objects.c | 22 +++++++--------------- builtin/pack-redundant.c | 14 ++++---------- connected.c | 3 +-- http-backend.c | 5 ++--- http.c | 3 +-- object-name.c | 8 +++++--- pack-bitmap.c | 6 +++--- pack-objects.c | 5 ++--- packfile.c | 4 ++-- packfile.h | 8 ++++++++ repack-cruft.c | 3 +-- repack-geometry.c | 3 +-- repack.c | 3 +-- server-info.c | 3 +-- t/helper/test-find-pack.c | 3 ++- t/helper/test-pack-mtimes.c | 2 +- 20 files changed, 57 insertions(+), 75 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index ee6715fa523..0ab076aeb30 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -852,10 +852,9 @@ static void batch_each_object(struct batch_options *opt, if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *pack; - for (pack = packfile_store_get_all_packs(packs); pack; pack = pack->next) { + repo_for_each_pack(the_repository, pack) { if (bitmap_index_contains_pack(bitmap, pack) || open_pack_index(pack)) continue; diff --git a/builtin/count-objects.c b/builtin/count-objects.c index f2f407c2a78..18f6e33b6f9 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -122,7 +122,6 @@ int cmd_count_objects(int argc, count_loose, count_cruft, NULL, NULL); if (verbose) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; unsigned long num_pack = 0; off_t size_pack = 0; @@ -130,7 +129,7 @@ int cmd_count_objects(int argc, struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fsck.c b/builtin/fsck.c index 8ee95e0d67c..b1a650c6731 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -867,20 +867,20 @@ static int mark_packed_for_connectivity(const struct object_id *oid, static int check_pack_rev_indexes(struct repository *r, int show_progress) { - struct packfile_store *packs = r->objects->packfiles; struct progress *progress = NULL; + struct packed_git *p; uint32_t pack_count = 0; int res = 0; if (show_progress) { - for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) + repo_for_each_pack(r, p) pack_count++; progress = start_delayed_progress(the_repository, "Verifying reverse pack-indexes", pack_count); pack_count = 0; } - for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(r, p) { int load_error = load_pack_revindex_from_disk(p); if (load_error < 0) { @@ -1000,8 +1000,6 @@ int cmd_fsck(int argc, for_each_packed_object(the_repository, mark_packed_for_connectivity, NULL, 0); } else { - struct packfile_store *packs = the_repository->objects->packfiles; - odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) fsck_source(source); @@ -1012,8 +1010,7 @@ int cmd_fsck(int argc, struct progress *progress = NULL; if (show_progress) { - for (p = packfile_store_get_all_packs(packs); p; - p = p->next) { + repo_for_each_pack(the_repository, p) { if (open_pack_index(p)) continue; total += p->num_objects; @@ -1022,8 +1019,8 @@ int cmd_fsck(int argc, progress = start_progress(the_repository, _("Checking objects"), total); } - for (p = packfile_store_get_all_packs(packs); p; - p = p->next) { + + repo_for_each_pack(the_repository, p) { /* verify gives error messages itself */ if (verify_pack(the_repository, p, fsck_obj_buffer, diff --git a/builtin/gc.c b/builtin/gc.c index ab6d6d3bd1b..541d7471f19 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -487,10 +487,9 @@ static int too_many_loose_objects(struct gc_config *cfg) static struct packed_git *find_base_packs(struct string_list *packs, unsigned long limit) { - struct packfile_store *packfiles = the_repository->objects->packfiles; struct packed_git *p, *base = NULL; - for (p = packfile_store_get_all_packs(packfiles); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->is_cruft) continue; if (limit) { @@ -509,14 +508,13 @@ static struct packed_git *find_base_packs(struct string_list *packs, static int too_many_packs(struct gc_config *cfg) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - int cnt; + int cnt = 0; if (cfg->gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) continue; if (p->pack_keep) @@ -1425,9 +1423,9 @@ static int incremental_repack_auto_condition(struct gc_config *cfg UNUSED) if (incremental_repack_auto_limit < 0) return 1; - for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); - count < incremental_repack_auto_limit && p; - p = p->next) { + repo_for_each_pack(the_repository, p) { + if (count >= incremental_repack_auto_limit) + break; if (!p->multi_pack_index) count++; } @@ -1494,7 +1492,7 @@ static off_t get_auto_pack_size(void) struct repository *r = the_repository; odb_reprepare(r->objects); - for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { + repo_for_each_pack(r, p) { if (p->pack_size > max_size) { second_largest_size = max_size; max_size = p->pack_size; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index fe987fbb8b6..50618e1073e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3831,12 +3831,10 @@ static int pack_mtime_cmp(const void *_a, const void *_b) static void read_packs_list_from_stdin(struct rev_info *revs) { - struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; struct string_list_item *item = NULL; - struct packed_git *p; while (strbuf_getline(&buf, stdin) != EOF) { @@ -3856,7 +3854,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -4077,7 +4075,6 @@ static void enumerate_cruft_objects(void) static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct rev_info revs; int ret; @@ -4107,7 +4104,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = packfile_store_get_all_packs(packs); p; p = p->next) + repo_for_each_pack(the_repository, p) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); @@ -4124,7 +4121,6 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs static void read_cruft_objects(void) { - struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list discard_packs = STRING_LIST_INIT_DUP; struct string_list fresh_packs = STRING_LIST_INIT_DUP; @@ -4145,7 +4141,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -4440,13 +4436,12 @@ static int loosened_object_can_be_discarded(const struct object_id *oid, static void loosen_unused_packed_objects(void) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; uint32_t i; uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4747,13 +4742,12 @@ static void get_object_list(struct rev_info *revs, struct strvec *argv) static void add_extra_kept_packs(const struct string_list *names) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; if (!names->nr) return; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *name = basename(p->pack_name); int i; @@ -5191,10 +5185,9 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) + repo_for_each_pack(the_repository, p) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -5206,10 +5199,9 @@ int cmd_pack_objects(int argc, * want to unset "local" based on looking at packs, as * it also covers non-local objects */ - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index dd28171f0a1..fca7f195d6d 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -566,29 +566,23 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p = packfile_store_get_all_packs(packs); + struct packed_git *p; if (strlen(filename) < 40) die("Bad pack filename: %s", filename); - while (p) { + repo_for_each_pack(the_repository, p) if (strstr(p->pack_name, filename)) return add_pack(p); - p = p->next; - } die("Filename %s not found in packed_git", filename); } static void load_all(void) { - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p = packfile_store_get_all_packs(packs); + struct packed_git *p; - while (p) { + repo_for_each_pack(the_repository, p) add_pack(p); - p = p->next; - } } int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) { diff --git a/connected.c b/connected.c index b288a18b17c..79403108dd8 100644 --- a/connected.c +++ b/connected.c @@ -74,10 +74,9 @@ int check_connected(oid_iterate_fn fn, void *cb_data, */ odb_reprepare(the_repository->objects); do { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_promisor) continue; if (find_pack_entry_one(oid, p)) diff --git a/http-backend.c b/http-backend.c index 9084058f1e9..52f0483dd30 100644 --- a/http-backend.c +++ b/http-backend.c @@ -603,19 +603,18 @@ static void get_head(struct strbuf *hdr, char *arg UNUSED) static void get_info_packs(struct strbuf *hdr, char *arg UNUSED) { size_t objdirlen = strlen(repo_get_object_directory(the_repository)); - struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct packed_git *p; size_t cnt = 0; select_getanyfile(hdr); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/http.c b/http.c index 7e3af1e72f7..17130823f00 100644 --- a/http.c +++ b/http.c @@ -2416,7 +2416,6 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) static int fetch_and_setup_pack_index(struct packed_git **packs_head, unsigned char *sha1, const char *base_url) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *new_pack, *p; char *tmp_idx = NULL; int ret; @@ -2425,7 +2424,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, * If we already have the pack locally, no need to fetch its index or * even add it to list; we already have all of its objects. */ - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (hasheq(p->hash, sha1, the_repository->hash_algo)) return 0; } diff --git a/object-name.c b/object-name.c index 4e62bfa330e..766c757042a 100644 --- a/object-name.c +++ b/object-name.c @@ -213,9 +213,11 @@ static void find_short_packed_object(struct disambiguate_state *ds) unique_in_midx(m, ds); } - for (p = packfile_store_get_all_packs(ds->repo->objects->packfiles); p && !ds->ambiguous; - p = p->next) + repo_for_each_pack(ds->repo, p) { + if (ds->ambiguous) + break; unique_in_pack(p, ds); + } } static int finish_object_disambiguation(struct disambiguate_state *ds, @@ -805,7 +807,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) find_abbrev_len_for_midx(m, mad); } - for (p = packfile_store_get_all_packs(mad->repo->objects->packfiles); p; p = p->next) + repo_for_each_pack(mad->repo, p) find_abbrev_len_for_pack(p, mad); } diff --git a/pack-bitmap.c b/pack-bitmap.c index ac71035d771..291e1a9cf47 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -664,7 +664,7 @@ static int open_pack_bitmap(struct repository *r, struct packed_git *p; int ret = -1; - for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { + repo_for_each_pack(r, p) { if (open_pack_bitmap_1(bitmap_git, p) == 0) { ret = 0; /* @@ -3347,6 +3347,7 @@ static int verify_bitmap_file(const struct git_hash_algo *algop, int verify_bitmap_files(struct repository *r) { struct odb_source *source; + struct packed_git *p; int res = 0; odb_prepare_alternates(r->objects); @@ -3362,8 +3363,7 @@ int verify_bitmap_files(struct repository *r) free(midx_bitmap_name); } - for (struct packed_git *p = packfile_store_get_all_packs(r->objects->packfiles); - p; p = p->next) { + repo_for_each_pack(r, p) { char *pack_bitmap_name = pack_bitmap_filename(p); res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); free(pack_bitmap_name); diff --git a/pack-objects.c b/pack-objects.c index d8eb6797354..d6adf0759cc 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -86,7 +86,6 @@ struct object_entry *packlist_find(struct packing_data *pdata, static void prepare_in_pack_by_idx(struct packing_data *pdata) { - struct packfile_store *packs = pdata->repo->objects->packfiles; struct packed_git **mapping, *p; int cnt = 0, nr = 1U << OE_IN_PACK_BITS; @@ -96,13 +95,13 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = packfile_store_get_all_packs(packs); p; p = p->next, cnt++) { + repo_for_each_pack(pdata->repo, p) { if (cnt == nr) { free(mapping); return; } p->index = cnt; - mapping[cnt] = p; + mapping[cnt++] = p; } pdata->in_pack_by_idx = mapping; } diff --git a/packfile.c b/packfile.c index db748b0bd48..ab5859518d4 100644 --- a/packfile.c +++ b/packfile.c @@ -2099,7 +2099,7 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { + repo_for_each_pack(r, p) { if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { ALLOC_GROW(packs, nr + 1, alloc); @@ -2196,7 +2196,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; - for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) { + repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && diff --git a/packfile.h b/packfile.h index 3f38c63476d..49484a9b09b 100644 --- a/packfile.h +++ b/packfile.h @@ -136,6 +136,14 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Load and iterate through all packs of the given repository. This helper + * function will yield packfiles from all object sources connected to the + * repository. + */ +#define repo_for_each_pack(repo, p) \ + for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. diff --git a/repack-cruft.c b/repack-cruft.c index accb98bcdbc..544a6f2df3a 100644 --- a/repack-cruft.c +++ b/repack-cruft.c @@ -7,12 +7,11 @@ static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, struct existing_packs *existing) { - struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(existing->repo, p) { if (!(p->is_cruft && p->pack_local)) continue; diff --git a/repack-geometry.c b/repack-geometry.c index 56d651c9cee..4e51003eccc 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -29,11 +29,10 @@ void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { - struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(existing->repo, p) { if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip diff --git a/repack.c b/repack.c index 1982a481650..d690e687184 100644 --- a/repack.c +++ b/repack.c @@ -124,11 +124,10 @@ int finish_pack_objects_cmd(const struct git_hash_algo *algop, void existing_packs_collect(struct existing_packs *existing, const struct string_list *extra_keep) { - struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(existing->repo, p) { size_t i; const char *base; diff --git a/server-info.c b/server-info.c index 1d33de821e9..b9a710544ab 100644 --- a/server-info.c +++ b/server-info.c @@ -287,13 +287,12 @@ static int compare_info(const void *a_, const void *b_) static void init_pack_info(struct repository *r, const char *infofile, int force) { - struct packfile_store *packs = r->objects->packfiles; struct packed_git *p; int stale; int i; size_t alloc = 0; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(r, p) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index e001dc3066d..fc4b8a77b30 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -39,11 +39,12 @@ int cmd__find_pack(int argc, const char **argv) if (repo_get_oid(the_repository, argv[0], &oid)) die("cannot parse %s as an object name", argv[0]); - for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) + repo_for_each_pack(the_repository, p) { if (find_pack_entry_one(&oid, p)) { printf("%s\n", p->pack_name); actual_count++; } + } if (count > -1 && count != actual_count) die("bad packfile count %d instead of %d", actual_count, count); diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index 7c428c16011..7a8ee1de24b 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -37,7 +37,7 @@ int cmd__pack_mtimes(int argc, const char **argv) if (argc != 2) usage(pack_mtimes_usage); - for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) { + repo_for_each_pack(the_repository, p) { strbuf_addstr(&buf, basename(p->pack_name)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".mtimes"); -- GitLab From b009b55297aa8a771f7c49c223be85ca75d6f78f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 9 Oct 2025 10:01:40 +0200 Subject: [PATCH 56/65] packfile: rename `packfile_store_get_all_packs()` In a preceding commit we have removed `packfile_store_get_packs()`. With this function removed it's somewhat useless to still have the "all" infix in `packfile_store_get_all_packs()`. Rename the latter to drop that infix. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 4 ++-- builtin/pack-objects.c | 4 ++-- packfile.c | 2 +- packfile.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index b1d5549815a..fea914cf9eb 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -976,7 +976,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { + } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1177,7 +1177,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { + } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 50618e1073e..3a19bddd574 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4394,7 +4394,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) struct packed_git *p; p = (last_found != (void *)1) ? last_found : - packfile_store_get_all_packs(packs); + packfile_store_get_packs(packs); while (p) { if ((!p->pack_local || p->pack_keep || @@ -4404,7 +4404,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = packfile_store_get_all_packs(packs); + p = packfile_store_get_packs(packs); else p = p->next; if (p == last_found) diff --git a/packfile.c b/packfile.c index ab5859518d4..1ae2b2fe1ed 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,7 +1027,7 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) +struct packed_git *packfile_store_get_packs(struct packfile_store *store) { packfile_store_prepare(store); diff --git a/packfile.h b/packfile.h index 49484a9b09b..c9d0b93446b 100644 --- a/packfile.h +++ b/packfile.h @@ -142,13 +142,13 @@ void packfile_store_add_pack(struct packfile_store *store, * repository. */ #define repo_for_each_pack(repo, p) \ - for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) + for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. */ -struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); +struct packed_git *packfile_store_get_packs(struct packfile_store *store); /* * Get all packs in most-recently-used order. -- GitLab From 6aec1babc35736047dfc2ecd2517527bc33d5fbb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Oct 2025 09:01:09 +0200 Subject: [PATCH 57/65] packfiles: track pack lists via the packfile store Hi, while the recently-introduced packfile store tracks the head of the pack lists, the actual lists themselves are still stored in a globally linked list via the `struct packed_git::next` pointer. This makes it quite hard to split up that list into per-object-source lists, as the assumption is embedded in many places that one packfile will identify all the others. This patch series thus moves the ownership of the lists into the packfile store. This prepares us for a subsequent change where we can push the packfile store one level down, from the object database into the object source. Note: I'd like to have some extra careful eyes on the last patch. This patch merges the two packfile lists we currently have (MRU and mtime-sorted). This patch is not needed to achieve my goal in this series, but there was some discussion around whether we really need both lists. I don't think we do, and in fact I think it causes confusion which of these one should really use. The default is to use the mtime-sorted list, which I think is the wrong choice in many cases, but that is only by gut feeling. So I'm dropping that list in favor of the MRU list, but there is one gotcha here: when iterating through packfiles and then reading their respective objects, we end up in an infinite loop because we end up moving the respective packfile to the front of the list again. I'm fixing that with a new field that skips the MRU update, but I'm not quite sure wheter I think that this is too fragile or not. The series is built on top of 60f3f52f17 (The sixteenth batch, 2025-10-08) with ps/remove-packfile-store-get-packs at b009b55297 (packfile: rename `packfile_store_get_all_packs()`, 2025-10-09) merged into it. Thanks! Patrick To: git@vger.kernel.org --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 1, "change-id": "20251010-pks-packfiles-store-drop-list-64ea0a4c9a3b", "prefixes": [] } } -- GitLab From c19f00750d8f2417e87bbc2a65cc4798be4b1347 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Oct 2025 08:56:26 +0200 Subject: [PATCH 58/65] packfile: use a `strmap` to store packs by name To allow fast lookups of a packfile by name we use a hashmap that has the packfile name as key and the pack itself as value. But while this is the perfect use case for a `strmap`, we instead use `struct hashmap` and store the hashmap entry in the packfile itself. Simplify the code by using a `strmap` instead. Signed-off-by: Patrick Steinhardt --- packfile.c | 24 ++++-------------------- packfile.h | 4 ++-- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/packfile.c b/packfile.c index 1ae2b2fe1ed..04649e52920 100644 --- a/packfile.c +++ b/packfile.c @@ -788,8 +788,7 @@ void packfile_store_add_pack(struct packfile_store *store, pack->next = store->packs; store->packs = pack; - hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&store->map, &pack->packmap_ent); + strmap_put(&store->packs_by_path, pack->pack_name, pack); } struct packed_git *packfile_store_load_pack(struct packfile_store *store, @@ -806,8 +805,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, strbuf_strip_suffix(&key, ".idx"); strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, - struct packed_git, packmap_ent); + p = strmap_get(&store->packs_by_path, key.buf); if (!p) { p = add_packed_git(store->odb->repo, idx_path, strlen(idx_path), local); @@ -2311,27 +2309,13 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -static int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct packfile_store *packfile_store_new(struct object_database *odb) { struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; INIT_LIST_HEAD(&store->mru); - hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); + strmap_init(&store->packs_by_path); return store; } @@ -2341,7 +2325,7 @@ void packfile_store_free(struct packfile_store *store) next = p->next; free(p); } - hashmap_clear(&store->map); + strmap_clear(&store->packs_by_path, 0); free(store); } diff --git a/packfile.h b/packfile.h index c9d0b93446b..9da7f14317b 100644 --- a/packfile.h +++ b/packfile.h @@ -5,12 +5,12 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "strmap.h" /* in odb.h */ struct object_info; struct packed_git { - struct hashmap_entry packmap_ent; struct packed_git *next; struct list_head mru; struct pack_window *windows; @@ -85,7 +85,7 @@ struct packfile_store { * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. */ - struct hashmap map; + struct strmap packs_by_path; /* * Whether packfiles have already been populated with this store's -- GitLab From 033fafe0f4af27a01d30924b73455266cf2d4481 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 15 Sep 2025 09:09:40 +0200 Subject: [PATCH 59/65] packfile: move the MRU list into the packfile store Packfiles have two lists associated to them: - A list that keeps track of packfiles in the order that they were added to a packfile store. - A list that keeps track of packfiles in most-recently-used order so that packfiles that are more likely to contain a specific object are ordered towards the front. Both of these lists are hosted by `struct packed_git` itself, So to identify all packfiles in a repository you simply need to grab the first packfile and then iterate the `->next` pointers or the MRU list. This pattern has the problem that all packfiles are part of the same list, regardless of whether or not they belong to the same object source. With the upcoming pluggable object database effort this needs to change: packfiles should be contained by a single object source, and reading an object from any such packfile should use that source to look up the object. Consequently, we need to break up the global lists of packfiles into per-object-source lists. A first step towards this goal is to move those lists ouf of `struct packed_git` and into the packfile store. While the packfile store is currently sitting on the `struct object_database` level, the intent is to push it down one level into the `struct odb_source` in a subsequent patch series. Introduce a new `struct packfile_list` that is used to manage lists of packfiles and use it to store the list of most-recently-used packfiles in `struct packfile_store`. For now, the new list type is only used in a single spot, but we'll expand its usage in subsequent patches. Signed-off-by: Patrick Steinhardt --- builtin/pack-objects.c | 9 ++--- midx.c | 2 +- packfile.c | 92 +++++++++++++++++++++++++++++++++++++----- packfile.h | 19 +++++++-- 4 files changed, 104 insertions(+), 18 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df13..5348aebbe9f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1706,8 +1706,8 @@ static int want_object_in_pack_mtime(const struct object_id *oid, uint32_t found_mtime) { int want; + struct packfile_list_entry *e; struct odb_source *source; - struct list_head *pos; if (!exclude && local) { /* @@ -1748,12 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - list_move(&p->mru, - packfile_store_get_packs_mru(the_repository->objects->packfiles)); + packfile_list_prepend(&the_repository->objects->packfiles->mru, p); if (want != -1) return want; } diff --git a/midx.c b/midx.c index 1d6269f957e..8022be9a45e 100644 --- a/midx.c +++ b/midx.c @@ -463,7 +463,7 @@ int prepare_midx_pack(struct multi_pack_index *m, p = packfile_store_load_pack(r->objects->packfiles, pack_name.buf, m->source->local); if (p) - list_add_tail(&p->mru, &r->objects->packfiles->mru); + packfile_list_append(&m->source->odb->packfiles->mru, p); strbuf_release(&pack_name); if (!p) { diff --git a/packfile.c b/packfile.c index 04649e52920..4d2d3b674f3 100644 --- a/packfile.c +++ b/packfile.c @@ -47,6 +47,80 @@ static size_t pack_mapped; #define SZ_FMT PRIuMAX static inline uintmax_t sz_fmt(size_t s) { return s; } +void packfile_list_clear(struct packfile_list *list) +{ + struct packfile_list_entry *e, *next; + + for (e = list->head; e; e = next) { + next = e->next; + free(e); + } + + list->head = list->tail = NULL; +} + +static struct packfile_list_entry *packfile_list_remove_internal(struct packfile_list *list, + struct packed_git *pack) +{ + struct packfile_list_entry *e, *prev; + + for (e = list->head, prev = NULL; e; prev = e, e = e->next) { + if (e->pack != pack) + continue; + + if (prev) + prev->next = e->next; + if (list->head == e) + list->head = e->next; + if (list->tail == e) + list->tail = prev; + + return e; + } + + return NULL; +} + +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack) +{ + free(packfile_list_remove_internal(list, pack)); +} + +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = list->head; + + list->head = entry; + if (!list->tail) + list->tail = entry; +} + +void packfile_list_append(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = NULL; + + if (list->tail) { + list->tail->next = entry; + list->tail = entry; + } else { + list->head = list->tail = entry; + } +} + void pack_report(struct repository *repo) { fprintf(stderr, @@ -995,10 +1069,10 @@ static void packfile_store_prepare_mru(struct packfile_store *store) { struct packed_git *p; - INIT_LIST_HEAD(&store->mru); + packfile_list_clear(&store->mru); for (p = store->packs; p; p = p->next) - list_add_tail(&p->mru, &store->mru); + packfile_list_append(&store->mru, p); } void packfile_store_prepare(struct packfile_store *store) @@ -1040,10 +1114,10 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) return store->packs; } -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) +struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) { packfile_store_prepare(store); - return &store->mru; + return store->mru.head; } /* @@ -2048,7 +2122,7 @@ static int fill_pack_entry(const struct object_id *oid, int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct list_head *pos; + struct packfile_list_entry *l; packfile_store_prepare(r->objects->packfiles); @@ -2059,10 +2133,11 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs) return 0; - list_for_each(pos, &r->objects->packfiles->mru) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (l = r->objects->packfiles->mru.head; l; l = l->next) { + struct packed_git *p = l->pack; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packfiles->mru); + packfile_list_prepend(&r->objects->packfiles->mru, p); return 1; } } @@ -2314,7 +2389,6 @@ struct packfile_store *packfile_store_new(struct object_database *odb) struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; - INIT_LIST_HEAD(&store->mru); strmap_init(&store->packs_by_path); return store; } diff --git a/packfile.h b/packfile.h index 9da7f14317b..39ed1073e4a 100644 --- a/packfile.h +++ b/packfile.h @@ -12,7 +12,6 @@ struct object_info; struct packed_git { struct packed_git *next; - struct list_head mru; struct pack_window *windows; off_t pack_size; const void *index_data; @@ -52,6 +51,20 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +struct packfile_list { + struct packfile_list_entry *head, *tail; +}; + +struct packfile_list_entry { + struct packfile_list_entry *next; + struct packed_git *pack; +}; + +void packfile_list_clear(struct packfile_list *list); +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack); +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack); +void packfile_list_append(struct packfile_list *list, struct packed_git *pack); + /* * A store that manages packfiles for a given object database. */ @@ -79,7 +92,7 @@ struct packfile_store { } kept_cache; /* A most-recently-used ordered version of the packs list. */ - struct list_head mru; + struct packfile_list mru; /* * A map of packfile names to packed_git structs for tracking which @@ -153,7 +166,7 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store); /* * Get all packs in most-recently-used order. */ -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); +struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); /* * Open the packfile and add it to the store if it isn't yet known. Returns -- GitLab From 3be3f88c35511a4a93cfa5fcc495d5edc5b98368 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 09:33:08 +0200 Subject: [PATCH 60/65] http: refactor subsystem to use `packfile_list`s The dumb HTTP protocol directly fetches packfiles from the remote server and temporarily stores them in a list of packfiles. Those packfiles are not yet added to the repository's packfile store until we finalize the whole fetch. Refactor the code to instead use a `struct packfile_list` to store those packs. This prepares us for a subsequent change where the `->next` pointer of `struct packed_git` will go away. Note that this refactoring creates some temporary duplication of code, as we now have both `packfile_list_find_oid()` and `find_oid_pack()`. The latter function will be removed in a subsequent commit though. Signed-off-by: Patrick Steinhardt --- http-push.c | 6 +++--- http-walker.c | 26 +++++++++----------------- http.c | 21 ++++++++------------- http.h | 5 +++-- packfile.c | 9 +++++++++ packfile.h | 8 ++++++++ 6 files changed, 40 insertions(+), 35 deletions(-) diff --git a/http-push.c b/http-push.c index a1c01e3b9b9..d86ce771198 100644 --- a/http-push.c +++ b/http-push.c @@ -104,7 +104,7 @@ struct repo { int has_info_refs; int can_update_info_refs; int has_info_packs; - struct packed_git *packs; + struct packfile_list packs; struct remote_lock *locks; }; @@ -311,7 +311,7 @@ static void start_fetch_packed(struct transfer_request *request) struct transfer_request *check_request = request_queue_head; struct http_pack_request *preq; - target = find_oid_pack(&request->obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &request->obj->oid); if (!target) { fprintf(stderr, "Unable to fetch %s, will not be able to update server info refs\n", oid_to_hex(&request->obj->oid)); repo->can_update_info_refs = 0; @@ -683,7 +683,7 @@ static int add_send_request(struct object *obj, struct remote_lock *lock) get_remote_object_list(obj->oid.hash[0]); if (obj->flags & (REMOTE | PUSHING)) return 0; - target = find_oid_pack(&obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &obj->oid); if (target) { obj->flags |= REMOTE; return 0; diff --git a/http-walker.c b/http-walker.c index 0f7ae46d7f1..e886e648664 100644 --- a/http-walker.c +++ b/http-walker.c @@ -15,7 +15,7 @@ struct alt_base { char *base; int got_indices; - struct packed_git *packs; + struct packfile_list packs; struct alt_base *next; }; @@ -324,11 +324,8 @@ static void process_alternates_response(void *callback_data) } else if (is_alternate_allowed(target.buf)) { warning("adding alternate object store: %s", target.buf); - newalt = xmalloc(sizeof(*newalt)); - newalt->next = NULL; + CALLOC_ARRAY(newalt, 1); newalt->base = strbuf_detach(&target, NULL); - newalt->got_indices = 0; - newalt->packs = NULL; while (tail->next != NULL) tail = tail->next; @@ -435,7 +432,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, if (fetch_indices(walker, repo)) return -1; - target = find_oid_pack(oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, oid); if (!target) return -1; close_pack_index(target); @@ -584,17 +581,15 @@ static void cleanup(struct walker *walker) if (data) { alt = data->alt; while (alt) { - struct packed_git *pack; + struct packfile_list_entry *e; alt_next = alt->next; - pack = alt->packs; - while (pack) { - struct packed_git *pack_next = pack->next; - close_pack(pack); - free(pack); - pack = pack_next; + for (e = alt->packs.head; e; e = e->next) { + close_pack(e->pack); + free(e->pack); } + packfile_list_clear(&alt->packs); free(alt->base); free(alt); @@ -612,14 +607,11 @@ struct walker *get_http_walker(const char *url) struct walker_data *data = xmalloc(sizeof(struct walker_data)); struct walker *walker = xmalloc(sizeof(struct walker)); - data->alt = xmalloc(sizeof(*data->alt)); + CALLOC_ARRAY(data->alt, 1); data->alt->base = xstrdup(url); for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s) *s = 0; - data->alt->got_indices = 0; - data->alt->packs = NULL; - data->alt->next = NULL; data->got_alternates = -1; walker->corrupt_object_found = 0; diff --git a/http.c b/http.c index 17130823f00..41f850db16d 100644 --- a/http.c +++ b/http.c @@ -2413,8 +2413,9 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) return tmp; } -static int fetch_and_setup_pack_index(struct packed_git **packs_head, - unsigned char *sha1, const char *base_url) +static int fetch_and_setup_pack_index(struct packfile_list *packs, + unsigned char *sha1, + const char *base_url) { struct packed_git *new_pack, *p; char *tmp_idx = NULL; @@ -2448,12 +2449,11 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, if (ret) return -1; - new_pack->next = *packs_head; - *packs_head = new_pack; + packfile_list_prepend(packs, new_pack); return 0; } -int http_get_info_packs(const char *base_url, struct packed_git **packs_head) +int http_get_info_packs(const char *base_url, struct packfile_list *packs) { struct http_get_options options = {0}; int ret = 0; @@ -2477,7 +2477,7 @@ int http_get_info_packs(const char *base_url, struct packed_git **packs_head) !parse_oid_hex(data, &oid, &data) && skip_prefix(data, ".pack", &data) && (*data == '\n' || *data == '\0')) { - fetch_and_setup_pack_index(packs_head, oid.hash, base_url); + fetch_and_setup_pack_index(packs, oid.hash, base_url); } else { data = strchrnul(data, '\n'); } @@ -2541,14 +2541,9 @@ int finish_http_pack_request(struct http_pack_request *preq) } void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from) + struct packfile_list *list_to_remove_from) { - struct packed_git **lst = list_to_remove_from; - - while (*lst != p) - lst = &((*lst)->next); - *lst = (*lst)->next; - + packfile_list_remove(list_to_remove_from, p); packfile_store_add_pack(the_repository->objects->packfiles, p); } diff --git a/http.h b/http.h index 553e16205ce..f9d45934047 100644 --- a/http.h +++ b/http.h @@ -2,6 +2,7 @@ #define HTTP_H struct packed_git; +struct packfile_list; #include "git-zlib.h" @@ -190,7 +191,7 @@ struct curl_slist *http_append_auth_header(const struct credential *c, /* Helpers for fetching packs */ int http_get_info_packs(const char *base_url, - struct packed_git **packs_head); + struct packfile_list *packs); /* Helper for getting Accept-Language header */ const char *http_get_accept_language_header(void); @@ -226,7 +227,7 @@ void release_http_pack_request(struct http_pack_request *preq); * from http_get_info_packs() and have chosen a specific pack to fetch. */ void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from); + struct packfile_list *list_to_remove_from); /* Helpers for fetching object */ struct http_object_request { diff --git a/packfile.c b/packfile.c index 4d2d3b674f3..6aa2ca8ac9e 100644 --- a/packfile.c +++ b/packfile.c @@ -121,6 +121,15 @@ void packfile_list_append(struct packfile_list *list, struct packed_git *pack) } } +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid) +{ + for (; packs; packs = packs->next) + if (find_pack_entry_one(oid, packs->pack)) + return packs->pack; + return NULL; +} + void pack_report(struct repository *repo) { fprintf(stderr, diff --git a/packfile.h b/packfile.h index 39ed1073e4a..a53336d722a 100644 --- a/packfile.h +++ b/packfile.h @@ -65,6 +65,14 @@ void packfile_list_remove(struct packfile_list *list, struct packed_git *pack); void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack); void packfile_list_append(struct packfile_list *list, struct packed_git *pack); +/* + * Find the pack within the "packs" list whose index contains the object + * "oid". For general object lookups, you probably don't want this; use + * find_pack_entry() instead. + */ +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid); + /* * A store that manages packfiles for a given object database. */ -- GitLab From c868f3fd6cccb546b774f142d3f779496e14e944 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Oct 2025 09:45:56 +0200 Subject: [PATCH 61/65] packfile: fix approximation of object counts When approximating the number of objects in a repository we only take into account two data sources, the multi-pack index and the packfile indices, as both of these data structures allow us to easily figure out how many objects they contain. But the way we currently approximate the number of objects is broken in presence of a multi-pack index. This is due to two separate reasons: - We have recently introduced initial infrastructure for incremental multi-pack indices. Starting with that series, `num_objects` only counts the number of objects of a specific layer of the MIDX chain, so we do not take into account objects from parent layers. This issue is fixed by adding `num_objects_in_base`, which contains the sum of all objects in previous layers. - When using the multi-pack index we may count objects contained in packfiles twice: once via the multi-pack index, but then we again count them via the packfile itself. This issue is fixed by skipping any packfiles that have an MIDX. Overall, given that we _always_ count the packs, we can only end up overestimating the number of objects, and the overestimation is limited to a factor of two at most. The consequences of those issues are very limited though, as we only approximate object counts in a small number of cases: - When writing a commit-graph we use the approximate object count to display the upper limit of a progress display. - In `repo_find_unique_abbrev_r()` we use it to specify a lower limit of how many hex digits we want to abbreviate to. Given that we use power-of-two here to derive the lower limit we may end up with an abbreviated hash that is one digit longer than required. - In `estimate_repack_memory()` we may end up overestimating how much memory a repack needs to pack objects. Conseuqently, we may end up dropping some packfiles from a repack. None of these are really game-changing. But it's nice to fix those issues regardless. While at it, convert the code to use `repo_for_each_pack()`. Furthermore, use `odb_prepare_alternates()` instead of explicitly preparing the packfile store. We really only want to prepare the object database sources, and `get_multi_pack_index()` already knows to prepare the packfile store for us. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt --- packfile.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index 6aa2ca8ac9e..6722c3b2b88 100644 --- a/packfile.c +++ b/packfile.c @@ -1143,16 +1143,16 @@ unsigned long repo_approximate_object_count(struct repository *r) unsigned long count = 0; struct packed_git *p; - packfile_store_prepare(r->objects->packfiles); + odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = get_multi_pack_index(source); if (m) - count += m->num_objects; + count += m->num_objects + m->num_objects_in_base; } - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (open_pack_index(p)) + repo_for_each_pack(r, p) { + if (open_pack_index(p) || p->multi_pack_index) continue; count += p->num_objects; } -- GitLab From df4329ca23256dc7228206f6f919e36a1c84bf8c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 10:52:17 +0200 Subject: [PATCH 62/65] builtin/pack-objects: simplify logic to find kept or nonlocal objects The function `has_sha1_pack_kept_or_nonlocal()` takes an object ID and then searches through packed objects to figure out whether the object exists is a kept or non-local pack. As a performance optimization we remember the packfile that contains a given object ID so that the next call to the function first checks that same packfile again. The way this is written is rather hard to follow though, as the caching mechanism is intertwined with the loop that iterates through the packs. Consequently, we need to do some gymnastics to re-start the iteration if the cached pack does not contain the objects. Refactor this so that we check the cached packfile at the beginning. We don't have to re-verify whether the packfile meets the properties as we have already verified those when storing the pack in `last_found` in the first place. So all we need to do is to use `find_pack_entry_one()` to check whether the pack contains the object ID, and to skip the cached pack in the loop so that we don't search it twice. This refactoring significantly simplifies the logic and makes it much easier to follow. Signed-off-by: Patrick Steinhardt --- builtin/pack-objects.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5348aebbe9f..861fef3f38a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4388,27 +4388,27 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { - struct packfile_store *packs = the_repository->objects->packfiles; static struct packed_git *last_found = (void *)1; struct packed_git *p; - p = (last_found != (void *)1) ? last_found : - packfile_store_get_packs(packs); + if (last_found != (void *)1 && find_pack_entry_one(oid, last_found)) + return 1; - while (p) { - if ((!p->pack_local || p->pack_keep || - p->pack_keep_in_core) && - find_pack_entry_one(oid, p)) { + repo_for_each_pack(the_repository, p) { + if ((!p->pack_local || p->pack_keep || p->pack_keep_in_core) && + find_pack_entry_one(oid, p)) { last_found = p; return 1; } - if (p == last_found) - p = packfile_store_get_packs(packs); - else - p = p->next; - if (p == last_found) - p = p->next; + + /* + * We have already checked `last_found`, so there is no need to + * re-check here. + */ + if (p == last_found && last_found != (void *)1) + continue; } + return 0; } -- GitLab From 7564cf184196745678e110ef57126d31e21e3aee Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Oct 2025 10:04:51 +0200 Subject: [PATCH 63/65] packfile: move list of packs into the packfile store Move the list of packs into the packfile store. This follows the same logic as in a previous commit, where we moved the most-recently-used list of packs, as well. Signed-off-by: Patrick Steinhardt --- builtin/fast-import.c | 4 +-- packfile.c | 83 +++++++++++++++++++------------------------ packfile.h | 16 +++------ 3 files changed, 43 insertions(+), 60 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 215295c1561..6fe6e9bc61d 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -978,7 +978,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1179,7 +1179,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/packfile.c b/packfile.c index 6722c3b2b88..f8158c1aa52 100644 --- a/packfile.c +++ b/packfile.c @@ -356,13 +356,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct packed_git *current) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packfiles->packs; p; p = p->next) - scan_windows(p, &lru_p, &lru_w, &lru_l); + for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -542,14 +543,15 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (p->pack_fd == -1) + for (e = r->objects->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) continue; - find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); } if (lru_p) @@ -868,8 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - pack->next = store->packs; - store->packs = pack; + packfile_list_prepend(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1046,9 +1047,10 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); +DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next); -static int sort_pack(const struct packed_git *a, const struct packed_git *b) +static int sort_pack(const struct packfile_list_entry *a, + const struct packfile_list_entry *b) { int st; @@ -1058,7 +1060,7 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * remote ones could be on a network mounted filesystem. * Favor local ones for these reasons. */ - st = a->pack_local - b->pack_local; + st = a->pack->pack_local - b->pack->pack_local; if (st) return -st; @@ -1067,21 +1069,19 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * and more recent objects tend to get accessed more * often. */ - if (a->mtime < b->mtime) + if (a->pack->mtime < b->pack->mtime) return 1; - else if (a->mtime == b->mtime) + else if (a->pack->mtime == b->pack->mtime) return 0; return -1; } static void packfile_store_prepare_mru(struct packfile_store *store) { - struct packed_git *p; - packfile_list_clear(&store->mru); - for (p = store->packs; p; p = p->next) - packfile_list_append(&store->mru, p); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + packfile_list_append(&store->mru, e->pack); } void packfile_store_prepare(struct packfile_store *store) @@ -1096,7 +1096,11 @@ void packfile_store_prepare(struct packfile_store *store) prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - sort_packs(&store->packs, sort_pack); + + sort_packs(&store->packs.head, sort_pack); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + if (!e->next) + store->packs.tail = e; packfile_store_prepare_mru(store); store->initialized = true; @@ -1108,7 +1112,7 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *packfile_store_get_packs(struct packfile_store *store) +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store) { packfile_store_prepare(store); @@ -1120,7 +1124,7 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) prepare_midx_pack(m, i); } - return store->packs; + return store->packs.head; } struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) @@ -1276,11 +1280,11 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packed_git *p; + struct packfile_list_entry *e; - for (p = r->objects->packfiles->packs; p; p = p->next) - if (oidset_contains(&p->bad_objects, oid)) - return p; + for (e = r->objects->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; return NULL; } @@ -2088,19 +2092,6 @@ int is_pack_valid(struct packed_git *p) return !open_packed_git(p); } -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs) -{ - struct packed_git *p; - - for (p = packs; p; p = p->next) { - if (find_pack_entry_one(oid, p)) - return p; - } - return NULL; - -} - static int fill_pack_entry(const struct object_id *oid, struct pack_entry *e, struct packed_git *p) @@ -2139,7 +2130,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs) + if (!r->objects->packfiles->packs.head) return 0; for (l = r->objects->packfiles->mru.head; l; l = l->next) { @@ -2404,19 +2395,19 @@ struct packfile_store *packfile_store_new(struct object_database *odb) void packfile_store_free(struct packfile_store *store) { - for (struct packed_git *p = store->packs, *next; p; p = next) { - next = p->next; - free(p); - } + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + free(e->pack); + packfile_list_clear(&store->packs); + strmap_clear(&store->packs_by_path, 0); free(store); } void packfile_store_close(struct packfile_store *store) { - for (struct packed_git *p = store->packs; p; p = p->next) { - if (p->do_not_close) + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) { + if (e->pack->do_not_close) BUG("want to close pack marked 'do-not-close'"); - close_pack(p); + close_pack(e->pack); } } diff --git a/packfile.h b/packfile.h index a53336d722a..d95275e666c 100644 --- a/packfile.h +++ b/packfile.h @@ -11,7 +11,6 @@ struct object_info; struct packed_git { - struct packed_git *next; struct pack_window *windows; off_t pack_size; const void *index_data; @@ -83,7 +82,7 @@ struct packfile_store { * The list of packfiles in the order in which they are being added to * the store. */ - struct packed_git *packs; + struct packfile_list packs; /* * Cache of packfiles which are marked as "kept", either because there @@ -163,13 +162,14 @@ void packfile_store_add_pack(struct packfile_store *store, * repository. */ #define repo_for_each_pack(repo, p) \ - for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) + for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ + ((p) = (e ? e->pack : NULL)); e = e->next) /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. */ -struct packed_git *packfile_store_get_packs(struct packfile_store *store); +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); /* * Get all packs in most-recently-used order. @@ -266,14 +266,6 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); */ unsigned long repo_approximate_object_count(struct repository *r); -/* - * Find the pack within the "packs" list whose index contains the object "oid". - * For general object lookups, you probably don't want this; use - * find_pack_entry() instead. - */ -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs); - void pack_report(struct repository *repo); /* -- GitLab From 87c2fbb0d642f591a278c7e8afc7c088c5b9b1a8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 15 Sep 2025 09:15:09 +0200 Subject: [PATCH 64/65] packfile: always add packfiles to MRU when adding a pack When adding a packfile to it store we add it both to the list and map of packfiles, but we don't append it to the most-recently-used list of packs. We do know to add the packfile to the MRU list as soon as we access any of its objects, but in between we're being inconistent. It doesn't help that there are some subsystems that _do_ add the packfile to the MRU after having added it, which only adds to the confusion. Refactor the code so that we unconditionally add packfiles to the MRU when adding them to a packfile store. Note that this does not allow us to drop `packfile_store_prepare_mru()` just yet: while the MRU list is already populated with all packs now, the order in which we add these packs is indeterministic for most of the part. So by first calling `sort_pack()` on the other packfile list and then re-preparing the MRU list we inherit its sorting. Signed-off-by: Patrick Steinhardt --- midx.c | 2 -- packfile.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/midx.c b/midx.c index 8022be9a45e..24e1e721754 100644 --- a/midx.c +++ b/midx.c @@ -462,8 +462,6 @@ int prepare_midx_pack(struct multi_pack_index *m, m->pack_names[pack_int_id]); p = packfile_store_load_pack(r->objects->packfiles, pack_name.buf, m->source->local); - if (p) - packfile_list_append(&m->source->odb->packfiles->mru, p); strbuf_release(&pack_name); if (!p) { diff --git a/packfile.c b/packfile.c index f8158c1aa52..79d2b27c42c 100644 --- a/packfile.c +++ b/packfile.c @@ -871,6 +871,7 @@ void packfile_store_add_pack(struct packfile_store *store, pack_open_fds++; packfile_list_prepend(&store->packs, pack); + packfile_list_append(&store->mru, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } -- GitLab From a335062de7b2b97f99cabe407096fcec495357f5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Oct 2025 08:59:27 +0200 Subject: [PATCH 65/65] packfile: track packs via the MRU list exclusively We track packfiles via two different lists: - `struct packfile_store::packs` is a list that sorts local packs first. In addition, these packs are sorted so that younger packs are sorted towards the front. - `struct packfile_store::mru` is a list that sorts packs so that most-recently used packs are at the front. The reasoning behind the ordering in the `packs` list is that younger objects stored in the local object store tend to be accessed more frequently, and that is certainly true for some cases. But there are going to be lots of cases where that isn't true. Especially when traversing history it is likely that one needs to access many older objects, and due to our housekeeping it is very likely that almost all of those older objects will be contained in one large pack that is oldest. So whether or not the ordering makes sense really depends on the use case at hand. A flexible approach like our MRU list addresses that need, as it will sort packs towards the front that are accessed all the time. Intuitively, this approach is thus able to satisfy more use cases more efficiently. This reasoning casts some doubt on whether or not it really makes sense to track packs via two different lists. It causes confusion, and it is not clear whether there are use cases where the `packs` list really is such an obvious choice. Merge these two lists into one most-recently-used list. Note that there is one important edge case: `for_each_packed_object()` uses the MRU list to iterate through packs, and then it lists each object in those packs. This would have the effect that we now sort the current pack towards the front, thus modifying the list of packfiles we are iterating over, with the consequence that we'll see an infinite loop. This edge case is worked around by introducing a new field that allows us to skip updating the MRU. Signed-off-by: Patrick Steinhardt --- builtin/pack-objects.c | 4 ++-- packfile.c | 27 +++++++-------------------- packfile.h | 27 +++++++++++++++++---------- 3 files changed, 26 insertions(+), 32 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 861fef3f38a..3b73ab9f614 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,11 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->mru, p); + packfile_list_prepend(&the_repository->objects->packfiles->packs, p); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index 79d2b27c42c..8785e397104 100644 --- a/packfile.c +++ b/packfile.c @@ -870,9 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - packfile_list_prepend(&store->packs, pack); - packfile_list_append(&store->mru, pack); - + packfile_list_append(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1077,14 +1075,6 @@ static int sort_pack(const struct packfile_list_entry *a, return -1; } -static void packfile_store_prepare_mru(struct packfile_store *store) -{ - packfile_list_clear(&store->mru); - - for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) - packfile_list_append(&store->mru, e->pack); -} - void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1103,7 +1093,6 @@ void packfile_store_prepare(struct packfile_store *store) if (!e->next) store->packs.tail = e; - packfile_store_prepare_mru(store); store->initialized = true; } @@ -1128,12 +1117,6 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) -{ - packfile_store_prepare(store); - return store->mru.head; -} - /* * Give a fast, rough count of the number of objects in the repository. This * ignores loose objects completely. If you have a lot of them, then either @@ -2134,11 +2117,12 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs.head) return 0; - for (l = r->objects->packfiles->mru.head; l; l = l->next) { + for (l = r->objects->packfiles->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - packfile_list_prepend(&r->objects->packfiles->mru, p); + if (!r->objects->packfiles->skip_mru_updates) + packfile_list_prepend(&r->objects->packfiles->packs, p); return 1; } } @@ -2270,6 +2254,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; + repo->objects->packfiles->skip_mru_updates = true; repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; @@ -2290,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, if (r) break; } + repo->objects->packfiles->skip_mru_updates = false; + return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index d95275e666c..27ba607e7c5 100644 --- a/packfile.h +++ b/packfile.h @@ -79,8 +79,8 @@ struct packfile_store { struct object_database *odb; /* - * The list of packfiles in the order in which they are being added to - * the store. + * The list of packfiles in the order in which they have been most + * recently used. */ struct packfile_list packs; @@ -98,9 +98,6 @@ struct packfile_store { unsigned flags; } kept_cache; - /* A most-recently-used ordered version of the packs list. */ - struct packfile_list mru; - /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. @@ -112,6 +109,21 @@ struct packfile_store { * packs. */ bool initialized; + + /* + * Usually, packfiles will be reordered to the front of the `packs` + * list whenever an object is looked up via them. This has the effect + * that packs that contain a lot of accessed objects will be located + * towards the front. + * + * This is usually desireable, but there are exceptions. One exception + * is when the looking up multiple objects in a loop for each packfile. + * In that case, we may easily end up with an infinite loop as the + * packfiles get reordered to the front repeatedly. + * + * Setting this field to `true` thus disables these reorderings. + */ + bool skip_mru_updates; }; /* @@ -171,11 +183,6 @@ void packfile_store_add_pack(struct packfile_store *store, */ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); -/* - * Get all packs in most-recently-used order. - */ -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a -- GitLab