From af1ddcb2b55e7e91894fb911c04a5a4a509e2dc7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:14 -0400 Subject: [PATCH 01/60] builtin/repack.c: avoid "the_repository" in `cmd_repack()` Reduce builtin/repack.c's reliance on `the_repository` by using the currently-UNUSED "repo" parameter within cmd_repack(). The following commits will continue to reduce the usage of the_repository in other places within builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e8730808c53..305782b2c9f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1247,7 +1247,7 @@ static const char *find_pack_prefix(const char *packdir, const char *packtmp) int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1344,7 +1344,7 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + repo_config(repo, repack_config, &cruft_po_args); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1354,7 +1354,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1376,7 +1376,7 @@ int cmd_repack(int argc, die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1391,7 +1391,8 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); @@ -1400,7 +1401,7 @@ int cmd_repack(int argc, strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); @@ -1439,7 +1440,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1535,7 +1536,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } @@ -1618,9 +1619,9 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { + if (get_multi_pack_index(repo->objects->sources)) { struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); + get_multi_pack_index(repo->objects->sources); ALLOC_ARRAY(midx_pack_names, m->num_packs + m->num_packs_in_base); @@ -1631,7 +1632,7 @@ int cmd_repack(int argc, xstrdup(m->pack_names[i]); } - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. @@ -1688,7 +1689,7 @@ int cmd_repack(int argc, goto cleanup; } - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; @@ -1704,18 +1705,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } -- GitLab From 73ba2c9b39e56829fd9a114f7bbe425e69ebf7d8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:18 -0400 Subject: [PATCH 02/60] builtin/repack.c: avoid "the_repository" in existing packs API There are a number of spots within builtin/repack.c which refer to "the_repository", and either make use of the "existing packs" API or otherwise have a 'struct existing_packs *' in scope. Add a "repo" member to "struct existing_packs" and use that instead of "the_repository" in such locations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 305782b2c9f..7223553bed8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -126,6 +126,7 @@ static void pack_objects_args_release(struct pack_objects_args *args) } struct existing_packs { + struct repository *repo; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; @@ -265,7 +266,7 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -498,7 +499,7 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -1139,7 +1140,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; @@ -1405,6 +1406,7 @@ int cmd_repack(int argc, packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); + existing.repo = repo; collect_pack_filenames(&existing, &keep_pack_list); if (geometry.split_factor) { -- GitLab From 2f425fe2fcc6aef33576acc99335e57ffd3a217f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:22 -0400 Subject: [PATCH 03/60] builtin/repack.c: avoid "the_repository" when taking a ref snapshot Avoid using "the_repository" in various MIDX-related ref snapshotting functions. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7223553bed8..113f5fc67f9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -771,6 +771,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, } struct midx_snapshot_ref_data { + struct repository *repo; struct tempfile *f; struct oidset seen; int preferred; @@ -784,13 +785,13 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, struct midx_snapshot_ref_data *data = _data; struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(data->repo, oid, &peeled)) oid = &peeled; if (oidset_insert(&data->seen, oid)) return 0; /* already seen */ - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", @@ -799,11 +800,12 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, return 0; } -static void midx_snapshot_refs(struct tempfile *f) +static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) { struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); + const struct string_list *preferred = bitmap_preferred_tips(repo); + data.repo = repo; data.f = f; data.preferred = 0; oidset_init(&data.seen, 0); @@ -817,13 +819,13 @@ static void midx_snapshot_refs(struct tempfile *f) data.preferred = 1; for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), + refs_for_each_ref_in(get_main_ref_store(repo), item->string, midx_snapshot_ref_one, &data); data.preferred = 0; } - refs_for_each_ref(get_main_ref_store(the_repository), + refs_for_each_ref(get_main_ref_store(repo), midx_snapshot_ref_one, &data); if (close_tempfile_gently(f)) { @@ -1397,7 +1399,7 @@ int cmd_repack(int argc, "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } -- GitLab From c97715584399e494245dd25c3012dc210a02be8b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:26 -0400 Subject: [PATCH 04/60] builtin/repack.c: avoid "the_repository" when removing packs The 'remove_redundant_pack()' function uses "the_repository" to obtain, and optionally remove, the repository's MIDX. Instead of relying on "the_repository", pass around a "struct repository *" parameter through its callers, and use that instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 113f5fc67f9..93802531e1a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,33 +221,35 @@ static void mark_packs_for_deletion(struct existing_packs *existing, mark_packs_for_deletion_1(names, &existing->cruft_packs); } -static void remove_redundant_pack(const char *dir_name, const char *base_name) +static void remove_redundant_pack(struct repository *repo, + const char *dir_name, const char *base_name) { struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; + struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); + clear_midx_file(repo); strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } -static void remove_redundant_packs_1(struct string_list *packs) +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(packdir, item->string); + remove_redundant_pack(repo, packdir, item->string); } } static void remove_redundant_existing_packs(struct existing_packs *existing) { - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs); } static void existing_packs_release(struct existing_packs *existing) @@ -685,7 +687,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(packdir, buf.buf); + remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); -- GitLab From 14cae948432bae0f753b9ffd8afc84dd73063991 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:30 -0400 Subject: [PATCH 05/60] builtin/repack.c: avoid "the_repository" when repacking promisor objects Pass a "struct repository" pointer to the 'repack_promisor_objects()' function to avoid using "the_repository". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 93802531e1a..4f08b57ddbd 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -407,7 +407,8 @@ static int has_pack_ext(const struct generated_pack_data *data, BUG("unknown pack extension: '%s'", ext); } -static void repack_promisor_objects(const struct pack_objects_args *args, +static void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, struct string_list *names) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -424,7 +425,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(the_repository, write_oid, &cmd, + for_each_packed_object(repo, write_oid, &cmd, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { @@ -1458,7 +1459,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names); if (has_existing_non_kept_packs(&existing) && delete_redundant && -- GitLab From d5649bbff5b9a2b00bd8a325af1420ce3246fd9a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:33 -0400 Subject: [PATCH 06/60] builtin/repack.c: avoid "the_hash_algo" when deleting packs The "mark_packs_for_deletion_1" function uses "the_hash_algo->hexsz" to isolate a pack's checksum before deleting it to avoid deleting a newly written pack having the same checksum (that is, some generated pack wound up identical to an existing pack). Avoid this by passing down a "struct git_hash_algo" pointer, and refer to the hash algorithm through it instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4f08b57ddbd..094f5a0cc26 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -168,11 +168,12 @@ static int pack_is_retained(struct string_list_item *item) return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(struct string_list *names, +static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, struct string_list *list) { struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; + const int hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -217,8 +218,9 @@ static void mark_packs_for_deletion(struct existing_packs *existing, struct string_list *names) { - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); + const struct git_hash_algo *algop = existing->repo->hash_algo; + mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); + mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, -- GitLab From 9347979fb0f4c68ef27c8360096161ed69b5715c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:37 -0400 Subject: [PATCH 07/60] builtin/repack.c: avoid "the_hash_algo" in `write_oid()` In a similar spirit as the previous commit, avoid referring directly to "the_hash_algo" within builtin/repack.c::write_oid(). Unlike the previous commit, we are within a callback function, so must introduce a new struct to pass additional data through its "data" pointer. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 094f5a0cc26..7d62959dc28 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -339,6 +339,11 @@ static void prepare_pack_objects(struct child_process *cmd, cmd->out = -1; } +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + /* * Write oid to the given struct child_process's stdin, starting it first if * necessary. @@ -347,14 +352,15 @@ static int write_oid(const struct object_id *oid, struct packed_git *pack UNUSED, uint32_t pos UNUSED, void *data) { - struct child_process *cmd = data; + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; if (cmd->in == -1) { if (start_command(cmd)) die(_("could not start pack-objects to repack promisor objects")); } - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || write_in_full(cmd->in, "\n", 1) < 0) die(_("failed to feed promisor objects to pack-objects")); return 0; @@ -413,6 +419,7 @@ static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) { + struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; FILE *out; struct strbuf line = STRBUF_INIT; @@ -427,7 +434,9 @@ static void repack_promisor_objects(struct repository *repo, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(repo, write_oid, &cmd, + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { -- GitLab From 62b7efe90300ef718d6e89dfae9394b86ba72b81 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:41 -0400 Subject: [PATCH 08/60] builtin/repack: avoid "the_hash_algo" in `repack_promisor_objects()` In a similar spirit as the previous commits, avoid referring directly to "the_hash_algo" within builtin/repack.c::repack_promisor_objects(). Since there is already a repository pointer in scope, use its hash_algo value instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7d62959dc28..a7e94ed03c3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -452,7 +452,7 @@ static void repack_promisor_objects(struct repository *repo, struct string_list_item *item; char *promisor_name; - if (line.len != the_hash_algo->hexsz) + if (line.len != repo->hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only from pack-objects.")); item = string_list_append(names, line.buf); -- GitLab From 29b820b234127270d9cb021ba511039dd701df74 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:45 -0400 Subject: [PATCH 09/60] builtin/repack.c: avoid "the_hash_algo" in `finish_pack_objects_cmd()` In a similar spirit as previous commits, avoid referring directly to "the_hash_algo" in builtin/repack.c::finish_pack_objects_cmd() and instead accept one as a parameter to the function. Since this function has a number of callers throughout the builtin, the diff is a little noisier than previous commits. However, each hunk is limited to passing the hash_algo parameter from a repository pointer that is already in scope. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a7e94ed03c3..a043704aa83 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1073,7 +1073,8 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } -static int finish_pack_objects_cmd(struct child_process *cmd, +static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct child_process *cmd, struct string_list *names, int local) { @@ -1084,7 +1085,7 @@ static int finish_pack_objects_cmd(struct child_process *cmd, while (strbuf_getline_lf(&line, out) != EOF) { struct string_list_item *item; - if (line.len != the_hash_algo->hexsz) + if (line.len != algop->hexsz) die(_("repack: Expecting full hex object ID lines only " "from pack-objects.")); /* @@ -1150,7 +1151,8 @@ static int write_filtered_pack(const struct pack_objects_args *args, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -1247,7 +1249,8 @@ static int write_cruft_pack(const struct pack_objects_args *args, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static const char *find_pack_prefix(const char *packdir, const char *packtmp) @@ -1534,7 +1537,7 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); if (ret) goto cleanup; -- GitLab From b908981ce8047c92b2229943412c28c12c1b7aee Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:49 -0400 Subject: [PATCH 10/60] builtin/repack.c: avoid using `hash_to_hex()` in pack geometry In previous commits, we started passing either repository or git_hash_algo pointers around to various spots within builtin/repack.c to reduce our dependency on the_repository in the hope of undef'ing USE_THE_REPOSITORY_VARIABLE. This commit takes us as far as we can (easily) go in that direction by removing the only use of a convenience function that only exists when USE_THE_REPOSITORY_VARIABLE is defined. Unfortunately, the only other such function is "is_bare_repository()", which is less than straightforward to convert into, say, "repo_is_bare()", the latter of the two accepting a repository pointer. Punt on that for now, and declare this commit as the stopping point for our efforts in the direction of undef'ing USE_THE_REPOSITORY_VARIABLE. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index a043704aa83..0d35f15b4b1 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -683,12 +683,14 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing) { + const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; uint32_t i; for (i = 0; i < geometry->split; i++) { struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) continue; strbuf_reset(&buf); -- GitLab From 0ec811ac1868845e1dea1dac3b0de8f3a4c64f14 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:53 -0400 Subject: [PATCH 11/60] repack: introduce new compilation unit Over the years, builtin/repack.c has turned into a grab-bag of functionality powering the 'git repack' builtin. Among its many capabilities, it: - can build and spawn 'git pack-objects' commands, which in turn generate new packs - has infrastructure to manage the set of existing packs in a repository - has infrastructure to split a sequence of packs into a geometric progression based on object size - can manage both generating and combining cruft packs together - can write new MIDXs to name a few. As a result, this builtin has accumulated a lot of code, making adding new functionality difficult. In the future, 'repack' will learn how to manage a chain of incremental MIDXs, adding yet more functionality into the builtin. As a prerequisite step, let's first move some of the functionality in the builtin into its own repack.[ch]. This will be done over the course of many steps, since there are many individual components, some of which will end up in other, yet-to-exist compilation units of their own. Some of the code movement here is also non-trivial, so performing it in individual steps will make it easier to verify. Let's start by migrating 'struct pack_objects_args' (and the related corresponding pack_objects_args_release() function) into repack.h, and teach both the Makefile and Meson how to build the new compilation unit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 25 +------------------------ meson.build | 1 + repack.c | 11 +++++++++++ repack.h | 23 +++++++++++++++++++++++ 5 files changed, 37 insertions(+), 24 deletions(-) create mode 100644 repack.c create mode 100644 repack.h diff --git a/Makefile b/Makefile index 4c95affadb5..c0df6da2371 100644 --- a/Makefile +++ b/Makefile @@ -1136,6 +1136,7 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index 0d35f15b4b1..6dfcb3327e6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -19,6 +19,7 @@ #include "prune-packed.h" #include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" #include "pack.h" #include "pack-bitmap.h" @@ -53,21 +54,6 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; -}; - static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { @@ -116,15 +102,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - struct existing_packs { struct repository *repo; struct string_list kept_packs; diff --git a/meson.build b/meson.build index b3dfcc04972..993e8f368ff 100644 --- a/meson.build +++ b/meson.build @@ -462,6 +462,7 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack.c b/repack.c new file mode 100644 index 00000000000..a1f5b796fba --- /dev/null +++ b/repack.c @@ -0,0 +1,11 @@ +#include "git-compat-util.h" +#include "repack.h" + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} diff --git a/repack.h b/repack.h new file mode 100644 index 00000000000..421d439d5a5 --- /dev/null +++ b/repack.h @@ -0,0 +1,23 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + struct list_objects_filter_options filter_options; +}; + +void pack_objects_args_release(struct pack_objects_args *args); + +#endif /* REPACK_H */ -- GitLab From a953609865ac4d0875cc878146c6b8e0c7c28643 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:56 -0400 Subject: [PATCH 12/60] builtin/repack.c: pass both pack_objects args to repack_config A subsequent commit will remove 'delta_base_offset' as a static variable within builtin/repack.c, and reintroduce it as a member of the 'struct pack_objects_args'. As a result, the repack_config callback will need to have both the cruft- and non-cruft 'struct pack_objects_args's in scope. Introduce a new 'struct repack_config_ctx' to allow the callee to provide both pointers to the callback. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6dfcb3327e6..af6de8d77ae 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -54,10 +54,16 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; +}; + static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { delta_base_offset = git_config_bool(var, value); return 0; @@ -1260,6 +1266,7 @@ int cmd_repack(int argc, size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; @@ -1343,7 +1350,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(repo, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); -- GitLab From 17ee881425c68c58f08a5bc62b8074b3e04c6296 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:00 -0400 Subject: [PATCH 13/60] repack: move 'delta_base_offset' to 'struct pack_objects_args' The static variable 'delta_base_offset' determines whether or not we pass the "--delta-base-offset" command-line argument when spawning pack-objects as a child process. Its introduction dates back to when repack was rewritten in C, all the way back in a1bbc6c017 (repack: rewrite the shell script in C, 2013-09-15). 'struct pack_objects_args' was introduced much later on in 4571324b99 (builtin/repack.c: allow configuring cruft pack generation, 2022-05-20), but did not move the 'delta_base_offset' variable. Since the 'delta_base_offset' is a property of an individual pack-objects command, re-introduce that variable as a member of 'struct pack_objects_args', which will enable further code movement in the subsequent commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 ++++++----- repack.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index af6de8d77ae..f4af8303532 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -34,7 +34,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; @@ -63,9 +62,10 @@ static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { @@ -315,7 +315,7 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--local"); if (args->quiet) strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) + if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; @@ -1271,8 +1271,8 @@ int cmd_repack(int argc, const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1567,6 +1567,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, cruft_expiration, diff --git a/repack.h b/repack.h index 421d439d5a5..12632d7fec1 100644 --- a/repack.h +++ b/repack.h @@ -15,9 +15,12 @@ struct pack_objects_args { int local; int name_hash_version; int path_walk; + int delta_base_offset; struct list_objects_filter_options filter_options; }; +#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } + void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ -- GitLab From 1ebaa9376eea424a9bd955a0782547a040a8b44e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:04 -0400 Subject: [PATCH 14/60] repack: remove 'prepare_pack_objects' from the builtin Now that the 'prepare_pack_objects' function no longer refers to external, static variables, move it out to repack.h as generic functionality. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 34 ---------------------------------- repack.c | 35 +++++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f4af8303532..ff93654cfed 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -288,40 +288,6 @@ static void collect_pack_filenames(struct existing_packs *existing, strbuf_release(&buf); } -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (args->delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index a1f5b796fba..91b6e1cc09c 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,40 @@ #include "git-compat-util.h" #include "repack.h" +#include "run-command.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} void pack_objects_args_release(struct pack_objects_args *args) { diff --git a/repack.h b/repack.h index 12632d7fec1..3f7ec207355 100644 --- a/repack.h +++ b/repack.h @@ -21,6 +21,11 @@ struct pack_objects_args { #define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ -- GitLab From f989b706d1fafd60e2f37dfc2d3f99e093b980be Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:08 -0400 Subject: [PATCH 15/60] builtin/repack.c: rename many 'struct existing_packs' functions Rename many of the 'struct existing_packs'-related functions according to the convention introduced in and described by 541204aabe (Documentation: document naming schema for structs and their functions, 2024-07-30). Note that some functions which operate over an individual entry in the list of existing packs are prefixed with "existing_pack_" instead of the plural form. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 +++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index ff93654cfed..f82e6c39307 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -121,39 +121,39 @@ struct existing_packs { .cruft_packs = STRING_LIST_INIT_DUP, \ } -static int has_existing_non_kept_packs(const struct existing_packs *existing) +static int existing_packs_has_non_kept(const struct existing_packs *existing) { return existing->non_kept_packs.nr || existing->cruft_packs.nr; } -static void pack_mark_for_deletion(struct string_list_item *item) +static void existing_pack_mark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | DELETE_PACK); } -static void pack_unmark_for_deletion(struct string_list_item *item) +static void existing_pack_unmark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); } -static int pack_is_marked_for_deletion(struct string_list_item *item) +static int existing_pack_is_marked_for_deletion(struct string_list_item *item) { return (uintptr_t)item->util & DELETE_PACK; } -static void pack_mark_retained(struct string_list_item *item) +static void existing_packs_mark_retained(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); } -static int pack_is_retained(struct string_list_item *item) +static int existing_pack_is_retained(struct string_list_item *item) { return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) { struct string_list_item *item; const int hexsz = algop->hexsz; @@ -165,8 +165,8 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, continue; sha1 = item->string + len - hexsz; - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); } else if (!string_list_has_string(names, sha1)) { /* * Mark this pack for deletion, which ensures @@ -175,13 +175,13 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, * will actually delete this pack (if `-d` was * given). */ - pack_mark_for_deletion(item); + existing_pack_mark_for_deletion(item); } } } -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) +static void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) { struct strbuf buf = STRBUF_INIT; struct string_list_item *item; @@ -193,17 +193,19 @@ static void retain_cruft_pack(struct existing_packs *existing, if (!item) BUG("could not find cruft pack '%s'", pack_basename(cruft)); - pack_mark_retained(item); + existing_packs_mark_retained(item); strbuf_release(&buf); } -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) +static void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) { const struct git_hash_algo *algop = existing->repo->hash_algo; - mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); - mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, @@ -225,13 +227,13 @@ static void remove_redundant_packs_1(struct repository *repo, { struct string_list_item *item; for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) + if (!existing_pack_is_marked_for_deletion(item)) continue; remove_redundant_pack(repo, packdir, item->string); } } -static void remove_redundant_existing_packs(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); remove_redundant_packs_1(existing->repo, &existing->cruft_packs); @@ -250,7 +252,7 @@ static void existing_packs_release(struct existing_packs *existing) * .keep file or not. Packs without a .keep file are not to be kept * if we are going to pack everything into one file. */ -static void collect_pack_filenames(struct existing_packs *existing, +static void existing_packs_collect(struct existing_packs *existing, const struct string_list *extra_keep) { struct packfile_store *packs = existing->repo->objects->packfiles; @@ -721,7 +723,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, item = string_list_lookup(&existing->non_kept_packs, pack_name); - if (item && !pack_is_marked_for_deletion(item)) + if (item && !existing_pack_is_marked_for_deletion(item)) continue; } @@ -851,7 +853,7 @@ static void midx_included_packs(struct string_list *include, } } else { for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -888,10 +890,10 @@ static void midx_included_packs(struct string_list *include, * --geometric case, but doing so is unnecessary * since no packs are marked as pending * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). */ - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -1128,7 +1130,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, if (p->pack_size < combine_cruft_below_size) { fprintf(in, "-%s\n", pack_basename(p)); } else { - retain_cruft_pack(existing, p); + existing_packs_retain_cruft(existing, p); fprintf(in, "%s\n", pack_basename(p)); } } @@ -1382,7 +1384,7 @@ int cmd_repack(int argc, packtmp = mkpathdup("%s/%s", packdir, packtmp_name); existing.repo = repo; - collect_pack_filenames(&existing, &keep_pack_list); + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) @@ -1431,7 +1433,7 @@ int cmd_repack(int argc, if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(repo, &po_args, &names); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1647,7 +1649,7 @@ int cmd_repack(int argc, /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; @@ -1671,7 +1673,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, -- GitLab From 7be7f73cf21356a2feb701e5968a82e2a762879c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:12 -0400 Subject: [PATCH 16/60] repack: remove 'remove_redundant_pack' from the builtin Extract "remove_redundant_pack()" as generic repack-related functionality by moving its implementation to the repack.[ch] compilation unit. This is a prerequisite to moving the "existing_packs" API, which is one of the callers of this function. (The remaining caller in the pack geometry code will eventually move to its own compilation unit as well, and will likewise rely on this function.) While moving it over, prefix the function name with "repack_" to indicate that it belongs to the repack-subsystem. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++---------------- repack.c | 18 ++++++++++++++++++ repack.h | 3 +++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f82e6c39307..31137cf7112 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -208,20 +208,6 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } -static void remove_redundant_pack(struct repository *repo, - const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = repo->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(repo); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs) { @@ -229,7 +215,7 @@ static void remove_redundant_packs_1(struct repository *repo, for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string); } } @@ -652,7 +638,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); diff --git a/repack.c b/repack.c index 91b6e1cc09c..3aaa351b5b5 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,9 @@ #include "git-compat-util.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" #include "repack.h" +#include "repository.h" #include "run-command.h" void prepare_pack_objects(struct child_process *cmd, @@ -44,3 +48,17 @@ void pack_objects_args_release(struct pack_objects_args *args) free(args->threads); list_objects_filter_release(&args->filter_options); } + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} diff --git a/repack.h b/repack.h index 3f7ec207355..a62bfa2ff97 100644 --- a/repack.h +++ b/repack.h @@ -28,4 +28,7 @@ void prepare_pack_objects(struct child_process *cmd, const char *out); void pack_objects_args_release(struct pack_objects_args *args); +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + #endif /* REPACK_H */ -- GitLab From fc808b39a40939714e26d355f2a01e995e65f871 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:16 -0400 Subject: [PATCH 17/60] builtin/repack.c: pass "packdir" when removing packs builtin/repack.c defines a static "packdir" to instruct pack-objects on where to write any new packfiles. This is also the directory scanned when removing any packfiles which were made redundant by the latest repack. Prepare to move the "existing_packs_remove_redundant" function to its own compilation unit by passing in this information as a parameter to that function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 31137cf7112..c5a88eda129 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,7 +209,8 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, } static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs) + struct string_list *packs, + const char *packdir) { struct string_list_item *item; for_each_string_list_item(item, packs) { @@ -219,10 +220,13 @@ static void remove_redundant_packs_1(struct repository *repo, } } -static void existing_packs_remove_redundant(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) { - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); } static void existing_packs_release(struct existing_packs *existing) @@ -1659,7 +1663,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, -- GitLab From 92e1e725f1d165a5cfd94fcab98e359b86388a4c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:19 -0400 Subject: [PATCH 18/60] builtin/repack.c: avoid unnecessary numeric casts in existing_packs There are a couple of spots that cause warnings within the existing_packs API without DISABLE_SIGN_COMPARE_WARNINGS under DEVELOPER=1 mode. In both cases, we have int values that are being compared against size_t ones. Neither of these two cases are incorrect, and the cast is completely OK in practice. But both are unnecessary, since: - in existing_packs_mark_for_deletion_1(), 'hexsz' should be defined as a size_t anyway, since algop->hexsz is. - in existing_packs_collect(), 'i' should be defined as a size_t since it is counting up to the value of a string_list's 'nr' field. (This patch is a little bit of noise, but I would rather see us squelch these warnings ahead of moving the existing_packs API into a separate compilation unit to avoid having to define DISABLE_SIGN_COMPARE_WARNINGS in repack.c.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index c5a88eda129..e13943b6377 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -156,7 +156,7 @@ static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop struct string_list *list) { struct string_list_item *item; - const int hexsz = algop->hexsz; + const size_t hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -250,7 +250,7 @@ static void existing_packs_collect(struct existing_packs *existing, struct strbuf buf = STRBUF_INIT; for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - int i; + size_t i; const char *base; if (!p->pack_local) -- GitLab From f2bfe705d9cbffa61f9fd129d0a7b913b832440e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:23 -0400 Subject: [PATCH 19/60] repack: remove 'existing_packs' API from the builtin The repack builtin defines an API for keeping track of which packs were found in the repository at the beginning of the repack operation. This is used to classify what state a pack was in (kept, non-kept, or cruft), and is also used to mark which packs to delete (or keep) at the end of a repack operation. Now that the prerequisite refactoring is complete, this API is isolated enough that it can be moved out to repack.ch and removed from the builtin entirely. As a result, some of its functions become static within repack.c, cleaning up the visible API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 173 ----------------------------------------------- repack.c | 157 ++++++++++++++++++++++++++++++++++++++++++ repack.h | 35 ++++++++++ 3 files changed, 192 insertions(+), 173 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e13943b6377..a168c887911 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,7 +3,6 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" @@ -108,178 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct existing_packs { - struct repository *repo; - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int existing_packs_has_non_kept(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void existing_pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void existing_pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int existing_pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void existing_packs_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int existing_pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const size_t hexsz = algop->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (existing_pack_is_retained(item)) { - existing_pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - existing_pack_mark_for_deletion(item); - } - } -} - -static void existing_packs_retain_cruft(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - existing_packs_mark_retained(item); - strbuf_release(&buf); -} - -static void existing_packs_mark_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - existing_packs_mark_for_deletion_1(algop, names, - &existing->non_kept_packs); - existing_packs_mark_for_deletion_1(algop, names, - &existing->cruft_packs); -} - -static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs, - const char *packdir) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!existing_pack_is_marked_for_deletion(item)) - continue; - repack_remove_redundant_pack(repo, packdir, item->string); - } -} - -static void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) -{ - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void existing_packs_collect(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - size_t i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index 3aaa351b5b5..9182e1c50bc 100644 --- a/repack.c +++ b/repack.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "dir.h" #include "midx.h" #include "odb.h" #include "packfile.h" @@ -62,3 +63,159 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + size_t i; + const char *base; + + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); +} diff --git a/repack.h b/repack.h index a62bfa2ff97..19796e22432 100644 --- a/repack.h +++ b/repack.h @@ -2,6 +2,7 @@ #define REPACK_H #include "list-objects-filter-options.h" +#include "string-list.h" struct pack_objects_args { char *window; @@ -31,4 +32,38 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + #endif /* REPACK_H */ -- GitLab From 93876709017308bfe5fbdd026778425018aa3a7f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:27 -0400 Subject: [PATCH 20/60] builtin/repack.c: rename "struct generated_pack_data" The name "generated_pack_data" is somewhat redundant, since the contents of the struct *is* the data associated with the generated pack. Rename the structure to just "generated_pack", resulting in less awkward function names, like "generated_pack_has_ext()" which is preferable to "generated_pack_data_has_ext()". Rename a few related functions to align with the convention that functions to do with a struct "S" should be prefixed with "S_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a168c887911..a4d80b6b044 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -146,15 +146,15 @@ static struct { {".idx"}, }; -struct generated_pack_data { +struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack_data *populate_pack_exts(const char *name) +static struct generated_pack *generated_pack_populate(const char *name) { struct stat statbuf; struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { @@ -164,21 +164,21 @@ static struct generated_pack_data *populate_pack_exts(const char *name) if (stat(path.buf, &statbuf)) continue; - data->tempfiles[i] = register_tempfile(path.buf); + pack->tempfiles[i] = register_tempfile(path.buf); } strbuf_release(&path); - return data; + return pack; } -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) +static int generated_pack_has_ext(const struct generated_pack *pack, + const char *ext) { int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { if (strcmp(exts[i].name, ext)) continue; - return !!data->tempfiles[i]; + return !!pack->tempfiles[i]; } BUG("unknown pack extension: '%s'", ext); } @@ -239,7 +239,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = populate_pack_exts(item->string); + item->util = generated_pack_populate(item->string); free(promisor_name); } @@ -780,8 +780,8 @@ static int write_midx_included_packs(struct string_list *include, * will suffice, so pick the first one.) */ for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) continue; strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", @@ -864,7 +864,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); + item->util = generated_pack_populate(line.buf); } } fclose(out); @@ -1435,7 +1435,7 @@ int cmd_repack(int argc, * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; + struct generated_pack *pack = item->util; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname; @@ -1443,8 +1443,8 @@ int cmd_repack(int argc, fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); struct stat statbuffer; if (!stat(fname_old, &statbuffer)) { @@ -1452,7 +1452,7 @@ int cmd_repack(int argc, chmod(fname_old, statbuffer.st_mode); } - if (rename_tempfile(&data->tempfiles[ext], fname)) + if (rename_tempfile(&pack->tempfiles[ext], fname)) die_errno(_("renaming pack to '%s' failed"), fname); } else if (!exts[ext].optional) die(_("pack-objects did not write a '%s' file for pack %s-%s"), -- GitLab From 59f0759d260d705a0e3ad0397c2412d2c16cddb3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:31 -0400 Subject: [PATCH 21/60] builtin/repack.c: factor our "generated_pack_install" Once all new packs are known to exist, 'repack' installs their contents from their temporary location into their permanent one. This is a semi-involved procedure for each pack, since for each extension (e.g., ".idx", ".pack", ".mtimes", and so on) we have to either: - adjust the filemode of the temporary file before renaming it into place, or - die() if we are missing a non-optional extension, or - unlink() any existing file for extensions that we did not generate (e.g., if a non-cruft pack we generated was identical to, say, a cruft pack which existed at the beginning of the process, we have to remove the ".mtimes" file). Extract this procedure into its own function, and call it "generated_pack_install"(). This will set us up for pulling this function out of the builtin entirely and making it part of the repack.h API, which will be done in a future commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 ++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a4d80b6b044..8c3a5f4f80b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -183,6 +183,38 @@ static int generated_pack_has_ext(const struct generated_pack *pack, BUG("unknown pack extension: '%s'", ext); } +static void generated_pack_install(struct generated_pack *pack, + const char *name) +{ + int ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} + static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) @@ -1045,7 +1077,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + int i, ret; int show_progress; char **midx_pack_names = NULL; size_t midx_pack_names_nr = 0; @@ -1434,35 +1466,9 @@ int cmd_repack(int argc, /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack *pack = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install((struct generated_pack *)item->util, + item->string); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) -- GitLab From d99a1faa9d99d9c6d3be80fd52cb59c136b477bb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:35 -0400 Subject: [PATCH 22/60] builtin/repack.c: pass "packtmp" to `generated_pack_populate()` In a similar spirit as previous commits, this function needs to know the temporary pack prefix, which it currently accesses through the static "packtmp" variable within builtin/repack.c. Pass it explicitly as a function parameter to facilitate moving this function out of builtin/repack.c entirely. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8c3a5f4f80b..2141c43bd27 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -150,7 +150,8 @@ struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack *generated_pack_populate(const char *name) +static struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) { struct stat statbuf; struct strbuf path = STRBUF_INIT; @@ -271,7 +272,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = generated_pack_populate(item->string); + item->util = generated_pack_populate(item->string, packtmp); free(promisor_name); } @@ -896,7 +897,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf); + item->util = generated_pack_populate(line.buf, packtmp); } } fclose(out); -- GitLab From 4bf53de93eedaf33fd9937da68b6e9d487a3b6d7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:39 -0400 Subject: [PATCH 23/60] builtin/repack.c: provide pack locations to `generated_pack_install()` Repeat what was done in the preceding commit for the `generated_pack_install()` function, which needs both "packdir" and "packtmp". (As an aside, it is somewhat unfortunate that the final three parameters to this function are all "const char *", making errors like passing "packdir" and "packtmp" in the wrong order easy. We could define a new structure here, but that may be too heavy-handed.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2141c43bd27..a4f0a194534 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -185,7 +185,8 @@ static int generated_pack_has_ext(const struct generated_pack *pack, } static void generated_pack_install(struct generated_pack *pack, - const char *name) + const char *name, + const char *packdir, const char *packtmp) { int ext; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { @@ -1469,7 +1470,7 @@ int cmd_repack(int argc, */ for_each_string_list_item(item, &names) generated_pack_install((struct generated_pack *)item->util, - item->string); + item->string, packdir, packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) -- GitLab From 494fee74dc4ea9561d5bbda63026bb1eb641c85c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:43 -0400 Subject: [PATCH 24/60] repack: remove 'generated_pack' API from the builtin Now that we have factored the "generated_pack" API, we can move it to repack.ch, further slimming down builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 83 ------------------------------------------------ repack.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 8 +++++ 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a4f0a194534..b7826e676b3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -134,89 +134,6 @@ static int write_oid(const struct object_id *oid, return 0; } -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack *generated_pack_populate(const char *name, - const char *packtmp) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack *pack = xcalloc(1, sizeof(*pack)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - pack->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return pack; -} - -static int generated_pack_has_ext(const struct generated_pack *pack, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!pack->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void generated_pack_install(struct generated_pack *pack, - const char *name, - const char *packdir, const char *packtmp) -{ - int ext; - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", packdir, name, - exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), - fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, name); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } -} - static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) diff --git a/repack.c b/repack.c index 9182e1c50bc..d8afdd352d4 100644 --- a/repack.c +++ b/repack.c @@ -3,9 +3,11 @@ #include "midx.h" #include "odb.h" #include "packfile.h" +#include "path.h" #include "repack.h" #include "repository.h" #include "run-command.h" +#include "tempfile.h" void prepare_pack_objects(struct child_process *cmd, const struct pack_objects_args *args, @@ -219,3 +221,84 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); } + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h index 19796e22432..f37eb495249 100644 --- a/repack.h +++ b/repack.h @@ -66,4 +66,12 @@ void existing_packs_remove_redundant(struct existing_packs *existing, const char *packdir); void existing_packs_release(struct existing_packs *existing); +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + #endif /* REPACK_H */ -- GitLab From f4f36b2990f15806d499fa6aaaed5459694e87a9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:47 -0400 Subject: [PATCH 25/60] builtin/repack.c: pass "packtmp" to `repack_promisor_objects()` In a similar spirit as previous commit(s), pass the "packtmp" variable to "repack_promisor_objects()" as an explicit parameter of the function, preparing us to move this function in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index b7826e676b3..ca7658e38fa 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int write_oid(const struct object_id *oid, static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, - struct string_list *names) + struct string_list *names, + const char *packtmp) { struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; @@ -1199,7 +1200,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(repo, &po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); if (existing_packs_has_non_kept(&existing) && delete_redundant && -- GitLab From 9b063b13e28a1ac153c1c54a283164900bb09624 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:51 -0400 Subject: [PATCH 26/60] builtin/repack.c: remove "repack_promisor_objects()" from the builtin Now that we have properly factored the portion of the builtin which is responsible for repacking promisor objects, we can move that function (and associated dependencies) out of the builtin entirely. Similar to previous extractions, this function is declared in repack.h, but implemented in a separate repack-promisor.c file. This is done to separate promisor-specific repacking functionality from generic repack utilities (like "existing_packs", and "generated_pack" APIs). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 95 ------------------------------------------ meson.build | 1 + repack-promisor.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++ 5 files changed, 108 insertions(+), 95 deletions(-) create mode 100644 repack-promisor.c diff --git a/Makefile b/Makefile index c0df6da2371..2a01bd92dc7 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index ca7658e38fa..18c3df72001 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,101 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct write_oid_context { - struct child_process *cmd; - const struct git_hash_algo *algop; -}; - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct write_oid_context *ctx = data; - struct child_process *cmd = ctx->cmd; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static void repack_promisor_objects(struct repository *repo, - const struct pack_objects_args *args, - struct string_list *names, - const char *packtmp) -{ - struct write_oid_context ctx; - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - ctx.cmd = &cmd; - ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != repo->hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = generated_pack_populate(item->string, packtmp); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - struct pack_geometry { struct packed_git **pack; uint32_t pack_nr, pack_alloc; diff --git a/meson.build b/meson.build index 993e8f368ff..1fbb8c52a64 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 00000000000..8bf42fc7157 --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "run-command.h" +#include "hex.h" +#include "repository.h" +#include "packfile.h" +#include "path.h" +#include "pack.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.h b/repack.h index f37eb495249..19dc4fd738d 100644 --- a/repack.h +++ b/repack.h @@ -74,4 +74,8 @@ int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); void generated_pack_install(struct generated_pack *pack, const char *name, const char *packdir, const char *packtmp); +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + #endif /* REPACK_H */ -- GitLab From ee590bc1f23df2a0034c09bb0779c3c0e8fe25b5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:54 -0400 Subject: [PATCH 27/60] builtin/repack.c: rename various pack_geometry functions Rename functions which work with 'struct pack_geometry' to begin with "pack_geometry_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 18c3df72001..2ce1ae33642 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -115,17 +115,17 @@ struct pack_geometry { int split_factor; }; -static uint32_t geometry_pack_weight(struct packed_git *p) +static uint32_t pack_geometry_weight(struct packed_git *p) { if (open_pack_index(p)) die(_("cannot open index for %s"), p->pack_name); return p->num_objects; } -static int geometry_cmp(const void *va, const void *vb) +static int pack_geometry_cmp(const void *va, const void *vb) { - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); if (aw < bw) return -1; @@ -134,7 +134,7 @@ static int geometry_cmp(const void *va, const void *vb) return 0; } -static void init_pack_geometry(struct pack_geometry *geometry, +static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { @@ -184,11 +184,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, geometry->pack_nr++; } - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); strbuf_release(&buf); } -static void split_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_split(struct pack_geometry *geometry) { uint32_t i; uint32_t split; @@ -208,13 +208,13 @@ static void split_pack_geometry(struct pack_geometry *geometry) struct packed_git *prev = geometry->pack[i - 1]; if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) + pack_geometry_weight(prev))) die(_("pack %s too large to consider in geometric " "progression"), prev->pack_name); - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) break; } @@ -242,9 +242,9 @@ static void split_pack_geometry(struct pack_geometry *geometry) for (i = 0; i < split; i++) { struct packed_git *p = geometry->pack[i]; - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); + total_size += pack_geometry_weight(p); } for (i = split; i < geometry->pack_nr; i++) { struct packed_git *ours = geometry->pack[i]; @@ -253,15 +253,15 @@ static void split_pack_geometry(struct pack_geometry *geometry) total_size)) die(_("pack %s too large to roll up"), ours->pack_name); - if (geometry_pack_weight(ours) < + if (pack_geometry_weight(ours) < geometry->split_factor * total_size) { if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) + pack_geometry_weight(ours))) die(_("pack %s too large to roll up"), ours->pack_name); split++; - total_size += geometry_pack_weight(ours); + total_size += pack_geometry_weight(ours); } else break; } @@ -269,7 +269,7 @@ static void split_pack_geometry(struct pack_geometry *geometry) geometry->split = split; } -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) +static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) { uint32_t i; @@ -304,9 +304,9 @@ static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) return NULL; } -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) +static void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -332,7 +332,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, strbuf_release(&buf); } -static void free_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_release(struct pack_geometry *geometry) { if (!geometry) return; @@ -599,7 +599,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(geometry); FILE *in; int ret; @@ -1063,8 +1063,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1324,8 +1324,8 @@ int cmd_repack(int argc, existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1352,7 +1352,7 @@ int cmd_repack(int argc, string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); + pack_geometry_release(&geometry); for (size_t i = 0; i < midx_pack_names_nr; i++) free(midx_pack_names[i]); free(midx_pack_names); -- GitLab From ce852b95800c81c17c08f7a45425cac52d208279 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:58 -0400 Subject: [PATCH 28/60] builtin/repack.c: pass 'pack_kept_objects' to `pack_geometry_init()` Prepare to move pack_geometry-related APIs to their own compilation unit by passing in the static "pack_kept_objects" variable directly as a parameter to the 'pack_geometry_init()' function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2ce1ae33642..60dce455567 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int pack_geometry_cmp(const void *va, const void *vb) static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args) + const struct pack_objects_args *args, + int pack_kept_objects) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -1063,7 +1064,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_init(&geometry, &existing, &po_args, + pack_kept_objects); pack_geometry_split(&geometry); } -- GitLab From 4738f57237927d34c7e13a1ff29c1000eda2ffca Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:02 -0400 Subject: [PATCH 29/60] builtin/repack.c: pass 'packdir' to `pack_geometry_remove_redundant()` For similar reasons as the preceding commit, pass the "packdir" variable directly to `pack_geometry_remove_redundant()` as a parameter to the function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 60dce455567..9e2523d9483 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -307,7 +307,8 @@ static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geo static void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, - struct existing_packs *existing) + struct existing_packs *existing, + const char *packdir) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -1327,7 +1328,7 @@ int cmd_repack(int argc, if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing); + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); -- GitLab From eab133767233eaeb9bd2c78108f20059e4ae7675 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:06 -0400 Subject: [PATCH 30/60] repack: remove pack_geometry API from the builtin Now that the pack_geometry API is fully factored and isolated from the rest of the builtin, declare it within repack.h and move its implementation to "repack-geometry.c" as a separate component. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 235 ---------------------------------------------- meson.build | 1 + repack-geometry.c | 233 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 20 ++++ 5 files changed, 255 insertions(+), 235 deletions(-) create mode 100644 repack-geometry.c diff --git a/Makefile b/Makefile index 2a01bd92dc7..3ee8d27dba2 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-geometry.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index 9e2523d9483..b49e2dab9a6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,241 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t pack_geometry_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int pack_geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), - bw = pack_geometry_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void pack_geometry_init(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); - strbuf_release(&buf); -} - -static void pack_geometry_split(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - pack_geometry_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * pack_geometry_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += pack_geometry_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - pack_geometry_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += pack_geometry_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void pack_geometry_remove_redundant(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing, - const char *packdir) -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex_algop(p->hash, - algop))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void pack_geometry_release(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, diff --git a/meson.build b/meson.build index 1fbb8c52a64..47b05089eec 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-geometry.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 00000000000..a879f2fe49c --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,233 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "repack.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack.h b/repack.h index 19dc4fd738d..cea7969ae47 100644 --- a/repack.h +++ b/repack.h @@ -78,4 +78,24 @@ void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names, const char *packtmp); +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + #endif /* REPACK_H */ -- GitLab From d5adcba5ee1e78d85ff60520862c2c4a3dfb3d9c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:10 -0400 Subject: [PATCH 31/60] builtin/repack.c: remove ref snapshotting from builtin When writing a MIDX, 'git repack' takes a snapshot of the repository's references and writes the result out to a file, which it then passes to 'git multi-pack-index write' via the '--refs-snapshot'. This is done in order to make bitmap selections with respect to what we are packing, thus avoiding a race where an incoming reference update causes us to try and write a bitmap for a commit not present in the MIDX. Extract this functionality out into a new repack-midx.c compilation unit, and expose the necessary functions via the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 68 ------------------------------------------ meson.build | 1 + repack-midx.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 +++ 5 files changed, 83 insertions(+), 68 deletions(-) create mode 100644 repack-midx.c diff --git a/Makefile b/Makefile index 3ee8d27dba2..b2142771635 100644 --- a/Makefile +++ b/Makefile @@ -1138,6 +1138,7 @@ LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index b49e2dab9a6..5a1f1cb5629 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -178,74 +178,6 @@ static int midx_has_unknown_packs(char **midx_pack_names, return 0; } -struct midx_snapshot_ref_data { - struct repository *repo; - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(repo); - - data.repo = repo; - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(repo), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(repo), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - static void midx_included_packs(struct string_list *include, struct existing_packs *existing, char **midx_pack_names, diff --git a/meson.build b/meson.build index 47b05089eec..0423ed30c41 100644 --- a/meson.build +++ b/meson.build @@ -464,6 +464,7 @@ libgit_sources = [ 'remote.c', 'repack.c', 'repack-geometry.c', + 'repack-midx.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 00000000000..354df729a5e --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,77 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} diff --git a/repack.h b/repack.h index cea7969ae47..803e1292240 100644 --- a/repack.h +++ b/repack.h @@ -98,4 +98,8 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry, const char *packdir); void pack_geometry_release(struct pack_geometry *geometry); +struct tempfile; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); + #endif /* REPACK_H */ -- GitLab From 3fc50b4f4f667f2c47d0581d879a3953a51cc592 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:14 -0400 Subject: [PATCH 32/60] builtin/repack.c: extract opts struct for 'write_midx_included_packs()' The function 'write_midx_included_packs()', which is responsible for writing a new MIDX with a given set of included packs, currently takes a list of six arguments. In order to extract this function out of the builtin, we have to pass in a few additional parameters, like 'midx_must_contain_cruft' and 'packdir', which are currently declared as static variables within the builtin/repack.c compilation unit. Instead of adding additional parameters to `write_midx_included_packs()` extract out an "opts" struct that names these parameters, and pass a pointer to that, making it less cumbersome to add additional parameters. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5a1f1cb5629..e4b8aa2c6f0 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,6 +107,17 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +struct repack_write_midx_opts { + struct string_list *include; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, @@ -290,19 +301,15 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret; - if (!include->nr) + if (!opts->include->nr) return 0; cmd.in = -1; @@ -311,18 +318,18 @@ static int write_midx_included_packs(struct string_list *include, strvec_push(&cmd.args, "multi-pack-index"); strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - if (show_progress) + if (opts->show_progress) strvec_push(&cmd.args, "--progress"); else strvec_push(&cmd.args, "--no-progress"); - if (write_bitmaps) + if (opts->write_bitmaps) strvec_push(&cmd.args, "--bitmap"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", pack_basename(preferred)); - else if (names->nr) { + else if (opts->names->nr) { /* The largest pack was repacked, meaning that either * one or two packs exist depending on whether the * repository has a cruft pack or not. @@ -335,7 +342,7 @@ static int write_midx_included_packs(struct string_list *include, * `--max-pack-size` was given, but any one of them * will suffice, so pick the first one.) */ - for_each_string_list_item(item, names) { + for_each_string_list_item(item, opts->names) { struct generated_pack *pack = item->util; if (generated_pack_has_ext(pack, ".mtimes")) continue; @@ -355,15 +362,16 @@ static int write_midx_included_packs(struct string_list *include, ; } - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); ret = start_command(&cmd); if (ret) return ret; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) + for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); @@ -1001,15 +1009,23 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; + struct repack_write_midx_opts opts = { + .include = &include, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; midx_included_packs(&include, &existing, midx_pack_names, midx_pack_names_nr, &names, &geometry); - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); + ret = write_midx_included_packs(&opts); if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); + remove_redundant_bitmaps(&include, opts.packdir); string_list_clear(&include, 0); -- GitLab From c64f873571b85878d0451d74c19d96109fbb3e0c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:18 -0400 Subject: [PATCH 33/60] builtin/repack.c: use a string_list for 'midx_pack_names' When writing a new MIDX, repack must determine whether or not there are any packs in the MIDX it is replacing (if one exists) that are not somehow represented in the new MIDX (e.g., either by preserving the pack verbatim, or rolling it up as part of a geometric repack, etc.). In order to do this, it keeps track of a list of pack names from the MIDX present in the repository at the start of the repack operation. Since we manipulate and close the object store, we cannot rely on the repository's in-core representation of the MIDX, since this is subject to change and/or go away. When this behavior was introduced in 5ee86c273b (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), we maintained an array of character pointers instead of using a convenience API, such as string-list.h. Store the list of MIDX pack names in a string_list, thereby reducing the number of parameters we have to pass to `midx_has_unknown_packs()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e4b8aa2c6f0..8ae56b05e96 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,18 +118,17 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, +static int midx_has_unknown_packs(struct string_list *midx_pack_names, struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { - size_t i; + struct string_list_item *item; string_list_sort(include); - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; + for_each_string_list_item(item, midx_pack_names) { + const char *pack_name = item->string; /* * Determine whether or not each MIDX'd pack from the existing @@ -191,8 +190,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, + struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -247,8 +245,8 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { + midx_has_unknown_packs(midx_pack_names, include, geometry, + existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -606,13 +604,12 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; + struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ struct repack_config_ctx config_ctx; @@ -985,13 +982,12 @@ int cmd_repack(int argc, struct multi_pack_index *m = get_multi_pack_index(repo->objects->sources); - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); + for (; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + string_list_append(&midx_pack_names, + m->pack_names[i]); + } + } } close_object_store(repo->objects); @@ -1019,8 +1015,8 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); + midx_included_packs(&include, &existing, &midx_pack_names, + &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1067,11 +1063,9 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); + string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); -- GitLab From 3e4f895ce8853f859d3b15633a8f8a64f1f52ba8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:22 -0400 Subject: [PATCH 34/60] repack: keep track of MIDX pack names using existing_packs Instead of storing the list of MIDX pack names separately, let's inline it into the existing_packs struct, further reducing the number of parameters we have to pass around. This amounts to adding a new string_list to the existing_packs struct, and populating it via `existing_packs_collect()`. This is fairly straightforward to do, since we are already looping over all packs, all we need to do is: if (p->multi_pack_index) string_list_append(&existing->midx_packs, pack_basename(p)); Note, however, that this check *must* come before other conditions where we discard and do not keep track of a pack, including the condition "if (!p->pack_local)" immediately below. This is because the existing routine which collects MIDX pack names does so blindly, and does not discard, for example, non-local packs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 ++++---------------------- repack.c | 5 +++++ repack.h | 1 + 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8ae56b05e96..251dd08b0ad 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,8 +118,7 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(struct string_list *midx_pack_names, - struct string_list *include, +static int midx_has_unknown_packs(struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { @@ -127,7 +126,7 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, string_list_sort(include); - for_each_string_list_item(item, midx_pack_names) { + for_each_string_list_item(item, &existing->midx_packs) { const char *pack_name = item->string; /* @@ -190,7 +189,6 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -245,8 +243,7 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, include, geometry, - existing)) { + midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -604,7 +601,6 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; - struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; @@ -978,18 +974,6 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(repo->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(repo->objects->sources); - - for (; m; m = m->base_midx) { - for (uint32_t i = 0; i < m->num_packs; i++) { - string_list_append(&midx_pack_names, - m->pack_names[i]); - } - } - } - close_object_store(repo->objects); /* @@ -1015,8 +999,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &midx_pack_names, - &names, &geometry); + midx_included_packs(&include, &existing, &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1063,7 +1046,6 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); - string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); pack_objects_args_release(&po_args); diff --git a/repack.c b/repack.c index d8afdd352d4..1d485e01124 100644 --- a/repack.c +++ b/repack.c @@ -80,6 +80,9 @@ void existing_packs_collect(struct existing_packs *existing, size_t i; const char *base; + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); if (!p->pack_local) continue; @@ -104,6 +107,7 @@ void existing_packs_collect(struct existing_packs *existing, string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); strbuf_release(&buf); } @@ -220,6 +224,7 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->kept_packs, 0); string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); } static struct { diff --git a/repack.h b/repack.h index 803e1292240..6aa5b4e0f0f 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct existing_packs { struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; + struct string_list midx_packs; }; #define EXISTING_PACKS_INIT { \ -- GitLab From d13c9522c1683c21a4aa485c8a6aa40e4b9c06af Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:26 -0400 Subject: [PATCH 35/60] builtin/repack.c: reorder `remove_redundant_bitmaps()` The next commit will inline the call to `remove_redundant_bitmaps()` into `write_midx_included_packs()`. Reorder these two functions to avoid a forward declaration to `remove_redundant_bitmaps()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 58 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 251dd08b0ad..957a9b5f9e8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -296,6 +296,35 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -373,35 +402,6 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) return finish_command(&cmd); } -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, -- GitLab From 168c70c6899debf1eace0bd7e94ef8ad20c8521f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:30 -0400 Subject: [PATCH 36/60] builtin/repack.c: inline `remove_redundant_bitmaps()` After writing a new MIDX, the repack command removes any bitmaps belonging to packs which were written into the MIDX. This is currently done in a separate function outside of `write_midx_included_packs()`, which forces the caller to keep track of the set of packs written into the MIDX. Prepare to no longer require the caller to keep track of such information by inlining the clean-up into `write_midx_included_packs()`. Future commits will make the caller oblivious to the set of packs included in the MIDX altogether. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 957a9b5f9e8..b55c8934e88 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -331,10 +331,10 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; - int ret; + int ret = 0; if (!opts->include->nr) - return 0; + goto done; cmd.in = -1; cmd.git_cmd = 1; @@ -392,14 +392,18 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) ret = start_command(&cmd); if (ret) - return ret; + goto done; in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); - return finish_command(&cmd); + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(opts->include, opts->packdir); + return ret; } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, @@ -1003,9 +1007,6 @@ int cmd_repack(int argc, ret = write_midx_included_packs(&opts); - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, opts.packdir); - string_list_clear(&include, 0); if (ret) -- GitLab From e500d06af50bc3c879fda8139fcb23177d4318dc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:35 -0400 Subject: [PATCH 37/60] builtin/repack.c: pass `repack_write_midx_opts` to `midx_included_packs` Instead of passing individual parameters (in this case, "existing", "names", and "geometry") to `midx_included_packs()`, pass a pointer to a `repack_write_midx_opts` structure instead. Besides reducing the number of parameters necessary to call the `midx_included_packs` function, this refactoring sets us up nicely to inline the call to `midx_included_packs()` into `write_midx_included_packs()`, thus making the caller (in this case, `cmd_repack()`) oblivious to the set of packs being written into the MIDX. In order to do this, `repack_write_midx_opts` has to keep track of the set of existing packs, so add an additional field to point to that set. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index b55c8934e88..bd3034a4f07 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,6 +108,7 @@ static int repack_config(const char *var, const char *value, } struct repack_write_midx_opts { + struct existing_packs *existing; struct string_list *include; struct pack_geometry *geometry; struct string_list *names; @@ -188,10 +189,11 @@ static int midx_has_unknown_packs(struct string_list *include, } static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - struct string_list *names, - struct pack_geometry *geometry) + struct repack_write_midx_opts *opts) { + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; struct string_list_item *item; struct strbuf buf = STRBUF_INIT; @@ -242,7 +244,7 @@ static void midx_included_packs(struct string_list *include, } } - if (midx_must_contain_cruft || + if (opts->midx_must_contain_cruft || midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see @@ -994,6 +996,7 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { + .existing = &existing, .include = &include, .geometry = &geometry, .names = &names, @@ -1003,7 +1006,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); -- GitLab From fff699f45d590619939d16d36c44e4a5a099744b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:40 -0400 Subject: [PATCH 38/60] builtin/repack.c: inline packs within `write_midx_included_packs()` To write a MIDX at the end of a repack operation, 'git repack' presently computes the set of packs to write into the MIDX, before invoking `write_midx_included_packs()` with a `string_list` containing those packs. The logic for computing which packs are supposed to appear in the resulting MIDX is within `midx_included_packs()`, where it is aware of details like which cruft pack(s) were written/combined, if/how we did a geometric repack, etc. Computing this list ourselves before providing it to the sole function to make use of that list `write_midx_included_packs()` is somewhat awkward. In the future, repack will learn how to write incremental MIDXs, which will use a very different pack selection routine. Instead of doing something like: struct string_list included_packs = STRING_LIST_INIT_DUP; if (incremental) { midx_incremental_included_packs(&included_packs, ...): write_midx_incremental_included_packs(&included_packs, ...); } else { midx_included_packs(&included_packs, ...): write_midx_included_packs(&included_packs, ...); } in the future, let's have each function which writes a MIDX be responsible for itself computing the list of included packs. Inline the declaration and initialization of `included_packs` into the `write_midx_included_packs()` function itself, and repeat that pattern in the future when we introduce new ways to write MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index bd3034a4f07..e2acba63124 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -109,7 +109,6 @@ static int repack_config(const char *var, const char *value, struct repack_write_midx_opts { struct existing_packs *existing; - struct string_list *include; struct pack_geometry *geometry; struct string_list *names; const char *refs_snapshot; @@ -330,12 +329,14 @@ static void remove_redundant_bitmaps(struct string_list *include, static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret = 0; - if (!opts->include->nr) + midx_included_packs(&include, opts); + if (!include.nr) goto done; cmd.in = -1; @@ -397,14 +398,17 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, opts->include) + for_each_string_list_item(item, &include) fprintf(in, "%s\n", item->string); fclose(in); ret = finish_command(&cmd); done: if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(opts->include, opts->packdir); + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + return ret; } @@ -994,10 +998,8 @@ int cmd_repack(int argc, existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { .existing = &existing, - .include = &include, .geometry = &geometry, .names = &names, .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1006,12 +1008,9 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); - string_list_clear(&include, 0); - if (ret) goto cleanup; } -- GitLab From 2f8fb7d1da04f09b3b66ebffe9405db8587f8eae Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:46 -0400 Subject: [PATCH 39/60] repack: 'write_midx_included_packs' API from the builtin Now that we have sufficiently cleaned up the write_midx_included_packs() function, we can move it (along with the struct repack_write_midx_opts) out of the builtin, and into the repack.h header. Since this function (and the static ones that it depends on) are MIDX-specific details of the repacking process, move them to the repack-midx.c compilation unit instead of the general repack.c one. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 305 ----------------------------------------------- repack-midx.c | 295 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 12 ++ 3 files changed, 307 insertions(+), 305 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e2acba63124..5fed79e8262 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,311 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct repack_write_midx_opts { - struct existing_packs *existing; - struct pack_geometry *geometry; - struct string_list *names; - const char *refs_snapshot; - const char *packdir; - int show_progress; - int write_bitmaps; - int midx_must_contain_cruft; -}; - -static int midx_has_unknown_packs(struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - struct string_list_item *item; - - string_list_sort(include); - - for_each_string_list_item(item, &existing->midx_packs) { - const char *pack_name = item->string; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !existing_pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -static void midx_included_packs(struct string_list *include, - struct repack_write_midx_opts *opts) -{ - struct existing_packs *existing = opts->existing; - struct pack_geometry *geometry = opts->geometry; - struct string_list *names = opts->names; - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (opts->midx_must_contain_cruft || - midx_has_unknown_packs(include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `existing_packs_mark_for_deletion()` when - * doing an all-into-one repack). - */ - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int write_midx_included_packs(struct repack_write_midx_opts *opts) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list include = STRING_LIST_INIT_DUP; - struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; - int ret = 0; - - midx_included_packs(&include, opts); - if (!include.nr) - goto done; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (opts->names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, opts->names) { - struct generated_pack *pack = item->util; - if (generated_pack_has_ext(pack, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (opts->refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", - opts->refs_snapshot); - - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); -done: - if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(&include, opts->packdir); - - string_list_clear(&include, 0); - - return ret; -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, diff --git a/repack-midx.c b/repack-midx.c index 354df729a5e..6f6202c5bcc 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -6,6 +6,7 @@ #include "oidset.h" #include "pack-bitmap.h" #include "refs.h" +#include "run-command.h" #include "tempfile.h" struct midx_snapshot_ref_data { @@ -75,3 +76,297 @@ void midx_snapshot_refs(struct repository *repo, struct tempfile *f) oidset_clear(&data.seen); } + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack.h b/repack.h index 6aa5b4e0f0f..25a31ac0a05 100644 --- a/repack.h +++ b/repack.h @@ -101,6 +101,18 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); #endif /* REPACK_H */ -- GitLab From 9251edd257129ad3b77c63260a77b5bbd27e674a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:51 -0400 Subject: [PATCH 40/60] builtin/repack.c: introduce `struct write_pack_opts` There are various functions within the 'repack' builtin which are responsible for writing different kinds of packs. They include: - `static int write_filtered_pack(...)` - `static int write_cruft_pack(...)` as well as the function `finish_pack_objects_cmd()`, which is responsible for finalizing a new pack write, and recording the checksum of its contents in the 'names' list. Both of these `write_` functions have a few things in common. They both take a pointer to the 'pack_objects_args' struct, as well as a pair of character pointers for `destination` and `pack_prefix`. Instead of repeating those arguments for each function, let's extract an options struct called "write_pack_opts" which has these three parameters as member fields. While we're at it, add fields for "packdir," and "packtmp", both of which are static variables within the builtin, and need to be read from within these two functions. This will shorten the list of parameters that callers have to provide to `write_filtered_pack()`, avoid ambiguity when passing multiple variables of the same type, and provide a unified interface for the two functions mentioned earlier. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 30 ++++++++++++++++-------------- repack.h | 8 ++++++++ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5fed79e8262..6df7c880853 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -138,9 +138,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, return finish_command(cmd); } -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) { @@ -150,9 +148,9 @@ static int write_filtered_pack(const struct pack_objects_args *args, int ret; const char *caret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--stdin-packs"); @@ -175,7 +173,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -665,14 +663,18 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; - - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .pack_prefix = find_pack_prefix(packdir, packtmp), + .packdir = packdir, + .packtmp = packtmp, + }; + + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } diff --git a/repack.h b/repack.h index 25a31ac0a05..6ef503f6237 100644 --- a/repack.h +++ b/repack.h @@ -32,6 +32,14 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *pack_prefix; + const char *packdir; + const char *packtmp; +}; + struct repository; struct packed_git; -- GitLab From f9568ff63ff2770a44cea7a996f9d19ca1de414c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:57 -0400 Subject: [PATCH 41/60] builtin/repack.c: use `write_pack_opts` within `write_cruft_pack()` Similar to the changes made in the previous commit to `write_filtered_pack()`, teach `write_cruft_pack()` to take a `write_pack_opts` struct and use that where possible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6df7c880853..501359c580b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,9 +221,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, strbuf_release(&buf); } -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_cruft_pack(struct write_pack_opts *opts, const char *cruft_expiration, unsigned long combine_cruft_below_size, struct string_list *names, @@ -234,9 +232,9 @@ static int write_cruft_pack(const struct pack_objects_args *args, FILE *in; int ret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--cruft"); if (cruft_expiration) @@ -267,7 +265,7 @@ static int write_cruft_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -599,6 +597,13 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .pack_prefix = pack_prefix, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -615,8 +620,7 @@ int cmd_repack(int argc, cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -651,11 +655,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; -- GitLab From 3b7102b6ed9127730ec4e01bf9ee4c383b421081 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:02 -0400 Subject: [PATCH 42/60] repack: move `find_pack_prefix()` out of the builtin Both callers within the repack builtin which call functions that take a 'write_pack_opts' structure have the following pattern: struct write_pack_opts opts = { .packdir = packdir, .packtmp = packtmp, .pack_prefix = find_pack_prefix(packdir, packtmp), /* ... */ }; int ret = write_some_kind_of_pack(&opts, /* ... */); , but both "packdir" and "packtmp" are fields within the write_pack_opts struct itself! Instead of also computing the pack_prefix ahead of time, let's have the callees compute it themselves by moving `find_pack_prefix()` out of the repack builtin, and have it take a write_pack_opts pointer instead of the "packdir" and "packtmp" fields directly. This avoids the callers having to do some prep work that is common between the two of them, but also avoids the potential pitfall of accidentally writing: .pack_prefix = find_pack_prefix(packtmp, packdir), (which is well-typed) when the caller meant to instead write: .pack_prefix = find_pack_prefix(packdir, packtmp), Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 ++++---------------- repack.c | 11 +++++++++++ repack.h | 3 ++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 501359c580b..4d869206187 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -149,6 +149,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, const char *caret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -173,7 +174,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -233,6 +234,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, int ret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -265,7 +267,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -283,17 +285,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, local); } -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, @@ -596,11 +587,9 @@ int cmd_repack(int argc, } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); struct write_pack_opts opts = { .po_args = &cruft_po_args, .destination = packtmp, - .pack_prefix = pack_prefix, .packtmp = packtmp, .packdir = packdir, }; @@ -667,7 +656,6 @@ int cmd_repack(int argc, struct write_pack_opts opts = { .po_args = &po_args, .destination = filter_to, - .pack_prefix = find_pack_prefix(packdir, packtmp), .packdir = packdir, .packtmp = packtmp, }; diff --git a/repack.c b/repack.c index 1d485e01124..c4326a532df 100644 --- a/repack.c +++ b/repack.c @@ -66,6 +66,17 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, strbuf_release(&buf); } +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 6ef503f6237..46d2312fa96 100644 --- a/repack.h +++ b/repack.h @@ -35,11 +35,12 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, struct write_pack_opts { struct pack_objects_args *po_args; const char *destination; - const char *pack_prefix; const char *packdir; const char *packtmp; }; +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); + struct repository; struct packed_git; -- GitLab From cbd91c8d15ca552604c9489371dd77c652b009ef Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:08 -0400 Subject: [PATCH 43/60] repack: extract `write_pack_opts_is_local()` Similar to the previous commit, the functions `write_cruft_pack()` and `write_filtered_pack()` both compute a "local" variable via the exact same mechanism: const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); Not only does this cause us to repeat the same pair of lines, it also introduces an unnecessary "scratch" variable that is common between both functions. Instead of repeating ourselves, let's extract that functionality into a new function in the repack.h API called "write_pack_opts_is_local()". That function takes a pointer to a "struct write_pack_opts" (which has as fields both "destination" and "packdir"), and can encapsulate the dangling "scratch" field. Extract that function and make it visible within the repack.h API, and use it within both `write_cruft_pack()` and `write_filtered_pack()`. The remaining duplication (that is, that both `write_cruft_pack()` and `write_filtered_pack()` still both call `write_pack_opts_is_local()`) will be addressed in the following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++---- repack.c | 6 ++++++ repack.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4d869206187..be8e6689fc0 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -147,8 +147,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -232,8 +231,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); diff --git a/repack.c b/repack.c index c4326a532df..7af297ae48a 100644 --- a/repack.c +++ b/repack.c @@ -77,6 +77,12 @@ const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) return pack_prefix; } +int write_pack_opts_is_local(struct write_pack_opts *opts) +{ + const char *scratch; + return skip_prefix(opts->destination, opts->packdir, &scratch); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 46d2312fa96..16f2de2ea9a 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct write_pack_opts { }; const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); +int write_pack_opts_is_local(struct write_pack_opts *opts); struct repository; struct packed_git; -- GitLab From 6c5b90171fae963581ecfd9e30ed2e04bd0b02c3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:14 -0400 Subject: [PATCH 44/60] builtin/repack.c: pass `write_pack_opts` to `finish_pack_objects_cmd()` To prepare to move the `finish_pack_objects_cmd()` function out of the builtin and into the repack.h API, there are a couple of things we need to do first: - First, let's take advantage of `write_pack_opts_is_local()` function introduced in the previous commit instead of passing "local" explicitly. - Let's also avoid referring to the static 'packtmp' field within builtin/repack.c by instead accessing it through the write_pack_opts argument. There are three callers which need to adjust themselves in order to account for this change. The callers which reside in write_cruft_pack() and write_filtered_pack() both already have an "opts" in scope, so they can pass it through transparently. The other call (at the bottom of `cmd_repack()`) needs to initialize its own write_pack_opts to pass the necessary fields over to the direct call to `finish_pack_objects_cmd()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index be8e6689fc0..8db95305c8d 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,11 +108,12 @@ static int repack_config(const char *var, const char *value, } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, struct child_process *cmd, - struct string_list *names, - int local) + struct string_list *names) { FILE *out; + int local = write_pack_opts_is_local(opts); struct strbuf line = STRBUF_INIT; out = xfdopen(cmd->out, "r"); @@ -128,7 +129,8 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, packtmp); + item->util = generated_pack_populate(line.buf, + opts->packtmp); } } fclose(out); @@ -147,7 +149,6 @@ static int write_filtered_pack(struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -183,8 +184,8 @@ static int write_filtered_pack(struct write_pack_opts *opts, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -231,7 +232,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -279,8 +279,8 @@ static int write_cruft_pack(struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } int cmd_repack(int argc, @@ -294,6 +294,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; + struct write_pack_opts opts = { 0 }; int i, ret; int show_progress; @@ -560,7 +561,10 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); + opts.packdir = packdir; + opts.destination = packdir; + opts.packtmp = packtmp; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, &names); if (ret) goto cleanup; -- GitLab From f6cc2aed8a9b9124719ba4bbf4f779eda298d2d8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:19 -0400 Subject: [PATCH 45/60] repack: move `finish_pack_objects_cmd()` out of the builtin In a similar spirit as the previous commit(s), now that the function `finish_pack_objects_cmd()` has no explicit dependencies within the repack builtin, let's extract it. This prepares us to extract the remaining two functions within the repack builtin that explicitly write packfiles, which are `write_cruft_pack()` and `write_filtered_pack()`, which will be done in the future commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 33 --------------------------------- repack.c | 33 +++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8db95305c8d..836a0066072 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,39 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int finish_pack_objects_cmd(const struct git_hash_algo *algop, - struct write_pack_opts *opts, - struct child_process *cmd, - struct string_list *names) -{ - FILE *out; - int local = write_pack_opts_is_local(opts); - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != algop->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, - opts->packtmp); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - static int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) diff --git a/repack.c b/repack.c index 7af297ae48a..8a0e4789fad 100644 --- a/repack.c +++ b/repack.c @@ -83,6 +83,39 @@ int write_pack_opts_is_local(struct write_pack_opts *opts) return skip_prefix(opts->destination, opts->packdir, &scratch); } +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + int local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 16f2de2ea9a..9351293233c 100644 --- a/repack.h +++ b/repack.h @@ -42,6 +42,11 @@ struct write_pack_opts { const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); int write_pack_opts_is_local(struct write_pack_opts *opts); +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + struct repository; struct packed_git; -- GitLab From 7db10535cc5ed1eb18a07f9ce3639bbc7706610e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:25 -0400 Subject: [PATCH 46/60] repack: move `pack_kept_objects` to `struct pack_objects_args` The "pack_kept_objects" variable is defined as static to the repack builtin, but is inherently related to the pack-objects arguments that the builtin uses when generating new packs. Move that field into the "struct pack_objects_args", and shuffle around where we append the corresponding command-line option when preparing a pack-objects process. Specifically: - `write_cruft_pack()` always wants to pass "--honor-pack-keep", so explicitly set the `pack_kept_objects` field to "0" when initializing the `write_pack_opts` struct before calling `write_cruft_pack()`. - `write_filtered_pack()` no longer needs to handle writing the command-line option "--honor-pack-keep" when preparing a pack-objects process, since its call to `prepare_pack_objects()` will have already taken care of that. `write_filtered_pack()` also reads the `pack_kept_objects` field to determine whether to write the existing kept packs with a leading "^" character, so update that to read through the `po_args` pointer instead. - `cmd_repack()` also no longer has to write the "--honor-pack-keep" flag explicitly, since this is also handled via its call to `prepare_pack_objects()`. Since there is a default value for "pack_kept_objects" that relies on whether or not we are writing a bitmap (and not writing a MIDX), extract a default initializer for `struct pack_objects_args` that keeps this conditional default behavior. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 +++++++------------- repack-geometry.c | 5 ++--- repack.c | 2 ++ repack.h | 9 ++++++--- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 836a0066072..9d89217b772 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -68,7 +67,7 @@ static int repack_config(const char *var, const char *value, return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -122,8 +121,6 @@ static int write_filtered_pack(struct write_pack_opts *opts, strvec_push(&cmd.args, "--stdin-packs"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for_each_string_list_item(item, &existing->kept_packs) strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); @@ -146,7 +143,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; + caret = opts->po_args->pack_kept_objects ? "" : "^"; for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); @@ -208,7 +205,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, strvec_pushf(&cmd.args, "--cruft-expiration=%s", cruft_expiration); - strvec_push(&cmd.args, "--honor-pack-keep"); strvec_push(&cmd.args, "--non-empty"); cmd.in = -1; @@ -333,7 +329,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -379,8 +375,8 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); @@ -421,8 +417,7 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args, - pack_kept_objects); + pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -431,8 +426,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -577,6 +570,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, diff --git a/repack-geometry.c b/repack-geometry.c index a879f2fe49c..fd4a89a1159 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -26,8 +26,7 @@ static int pack_geometry_cmp(const void *va, const void *vb) void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) + const struct pack_objects_args *args) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -42,7 +41,7 @@ void pack_geometry_init(struct pack_geometry *geometry, */ continue; - if (!pack_kept_objects) { + if (!args->pack_kept_objects) { /* * Any pack that has its pack_keep bit set will * appear in existing->kept_packs below, but diff --git a/repack.c b/repack.c index 8a0e4789fad..1982a481650 100644 --- a/repack.c +++ b/repack.c @@ -38,6 +38,8 @@ void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--quiet"); if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; cmd->out = -1; diff --git a/repack.h b/repack.h index 9351293233c..4a1c4eb6066 100644 --- a/repack.h +++ b/repack.h @@ -17,10 +17,14 @@ struct pack_objects_args { int name_hash_version; int path_walk; int delta_base_offset; + int pack_kept_objects; struct list_objects_filter_options filter_options; }; -#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} struct child_process; @@ -104,8 +108,7 @@ struct pack_geometry { void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects); + const struct pack_objects_args *args); void pack_geometry_split(struct pack_geometry *geometry); struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, -- GitLab From d13115475862d9f699ffd75a661227e198bdf805 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:30 -0400 Subject: [PATCH 47/60] repack: move `write_filtered_pack()` out of the builtin In a similar fashion as in previous commits, move the function `write_filtered_pack()` out of the builtin and into its own compilation unit. This function is now part of the repack.h API, but implemented in its own "repack-filtered.c" unit as it is a separate component from other kinds of repacking operations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 46 ------------------------------------------ meson.build | 1 + repack-filtered.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 5 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 repack-filtered.c diff --git a/Makefile b/Makefile index b2142771635..ba4f3bbfa27 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o diff --git a/builtin/repack.c b/builtin/repack.c index 9d89217b772..a9fc09a24d0 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,52 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int write_filtered_pack(struct write_pack_opts *opts, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = opts->po_args->pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { diff --git a/meson.build b/meson.build index 0423ed30c41..7124b158ae8 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', 'repack-promisor.c', diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 00000000000..96c7b02bb6c --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index 4a1c4eb6066..a7ddbe784b9 100644 --- a/repack.h +++ b/repack.h @@ -133,4 +133,8 @@ struct repack_write_midx_opts { void midx_snapshot_refs(struct repository *repo, struct tempfile *f); int write_midx_included_packs(struct repack_write_midx_opts *opts); +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + #endif /* REPACK_H */ -- GitLab From 6f8838e96ba9ccdacbbe4cb009f2e9beef43ca88 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:34 -0400 Subject: [PATCH 48/60] repack: move `write_cruft_pack()` out of the builtin In an identical fashion as the previous commit, move the function `write_cruft_pack()` into its own compilation unit, and make the function visible through the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 94 --------------------------------------------- meson.build | 1 + repack-cruft.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 6 +++ 5 files changed, 107 insertions(+), 94 deletions(-) create mode 100644 repack-cruft.c diff --git a/Makefile b/Makefile index ba4f3bbfa27..e3c4bf1b4aa 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o diff --git a/builtin/repack.c b/builtin/repack.c index a9fc09a24d0..9171ca66a73 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,100 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - existing_packs_retain_cruft(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(struct write_pack_opts *opts, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - int cmd_repack(int argc, const char **argv, const char *prefix, diff --git a/meson.build b/meson.build index 7124b158ae8..39152b37ba1 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-cruft.c', 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 00000000000..accb98bcdbc --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,99 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index a7ddbe784b9..15886c34886 100644 --- a/repack.h +++ b/repack.h @@ -137,4 +137,10 @@ int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names); +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + #endif /* REPACK_H */ -- GitLab From e056dec3d6d09bba280b85d47ac1cd589c013e9f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:38 -0400 Subject: [PATCH 49/60] builtin/repack.c: clean up unused `#include`s Over the past several dozen commits, we have moved a large amount of functionality out of the repack builtin and into other files like repack.c, repack-cruft.c, repack-filtered.c, repack-midx.c, and repack-promisor.c. These files specify the minimal set of `#include`s that they need to compile successfully, but we did not change the set of `#include`s in the repack builtin itself. Now that the code movement is complete, let's clean up that set of `#include`s and trim down the builtin to include the minimal amount of external headers necessary to compile. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 9171ca66a73..ad60c4290d4 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -4,26 +4,17 @@ #include "builtin.h" #include "config.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" #include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 -- GitLab From c886af90f8d25bd73c841a9925b8f03e1b232910 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Sep 2025 15:40:59 -0700 Subject: [PATCH 50/60] SQUASH??? play well with other topics by preemptively including "repository.h" --- repack-geometry.c | 1 + 1 file changed, 1 insertion(+) diff --git a/repack-geometry.c b/repack-geometry.c index fd4a89a1159..56d651c9cee 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -1,6 +1,7 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" +#include #include "repack.h" #include "hex.h" #include "packfile.h" -- GitLab From dd22ddf44490be52c99be25d2c316ef0e487d726 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 14:07:18 +0200 Subject: [PATCH 51/60] builtin/maintenance: introduce "geometric" strategy Hi, by default, git-maintenance(1) uses git-gc(1) to perform repository housekeeping. This tool has a couple of shortcomings, most importantly that it regularly does all-into-one repacks. This doesn't really work all that well in the context of monorepos, where you really want to avoid repacking all objects regularly. An alternative maintenance strategy is the "incremental" strategy, but this strategy has two downsides: - Strategies in general only apply to scheduled maintenance. So if you run git-maintenance(1), you still end up with git-gc(1). - The strategy is designed to not ever delete any data, but a full replacment for git-gc(1) needs to also prune reflogs, rereree caches and vanished worktrees. This patch series aims to fix both of these issues. First, the series introduces a new "geometric" maintenance task, which makes use of geometric repacking as exposed by git-repack(1) in the general case. In the case where a geometric repack ends up merging all packfiles into one we instead do an all-into-one repack with cruft packs so that we can still phase out objects over time. Second, the series extends maintenance strategies to also cover normal maintenance. If the user has configured the "geometric" strategy, we'll thus use it for both manual and scheduled maintenance. For backwards compatibility, the "incremental" strategy is changed so that it uses git-gc(1) for manual maintenance and the other tasks for scheduled maintenance. The series is built on top of b660e2dcb9 (Sync with 'maint', 2025-10-14) with tb/incremental-midx-part-3.1 at c886af90f8 (SQUASH??? play well with other topics by preemptively including "repository.h", 2025-09-29) merged into it. Changes in v2: - Make the geometric factor configurable via "maintenance.geometric-repack.splitFactor". - Wrap some overly long lines in our tests. - Link to v1: https://lore.kernel.org/r/20251016-pks-maintenance-geometric-strategy-v1-0-18943d474203@pks.im Thanks! Patrick To: git@vger.kernel.org Cc: Derrick Stolee Cc: Taylor Blau --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 2, "change-id": "20251015-pks-maintenance-geometric-strategy-580c58581b01", "prefixes": [], "history": { "v1": [ "20251016-pks-maintenance-geometric-strategy-v1-0-18943d474203@pks.im" ] } } } -- GitLab From f14cf90529dbc8f432432a1965faffe8666fe7d4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Oct 2025 09:19:12 +0200 Subject: [PATCH 52/60] builtin/gc: remove global `repack` variable The global `repack` variable is used to store all command line arguments that we eventually want to pass to git-repack(1). It is being appended to from multiple different functions, which makes it hard to follow the logic. Besides being hard to follow, it also makes it unnecessarily hard to reuse this infrastructure in new code. Refactor the code so that we store this variable on the stack and pass a pointer to it around as needed. This is done so that we can reuse `add_repack_all_options()` in a subsequent commit. The refactoring itself is straight-forward. One function that deserves attention though is `need_to_gc()`: this function determines whether or not we need to execute garbage collection for `git gc --auto`, but also for `git maintenance run --auto`. But besides figuring out whether we have to perform GC, the function also sets up the `repack` arguments. For `git gc --auto` it's trivial to adapt, as we already have the on-stack variable at our fingertips. But for the maintenance condition it's less obvious what to do. As it turns out, we can just use another temporary variable there that we then immediately discard. If we need to perform GC we execute a child git-gc(1) process to repack objects for us, and that process will have to recompute the arguments anyway. Signed-off-by: Patrick Steinhardt --- builtin/gc.c | 74 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d9788..e9772eb3a30 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -55,7 +55,6 @@ static const char * const builtin_gc_usage[] = { }; static timestamp_t gc_log_expire_time; -static struct strvec repack = STRVEC_INIT; static struct tempfile *pidfile; static struct lock_file log_lock; static struct string_list pack_garbage = STRING_LIST_INIT_DUP; @@ -618,48 +617,50 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg, return os_cache + heap; } -static int keep_one_pack(struct string_list_item *item, void *data UNUSED) +static int keep_one_pack(struct string_list_item *item, void *data) { - strvec_pushf(&repack, "--keep-pack=%s", basename(item->string)); + struct strvec *args = data; + strvec_pushf(args, "--keep-pack=%s", basename(item->string)); return 0; } static void add_repack_all_option(struct gc_config *cfg, - struct string_list *keep_pack) + struct string_list *keep_pack, + struct strvec *args) { if (cfg->prune_expire && !strcmp(cfg->prune_expire, "now") && !(cfg->cruft_packs && cfg->repack_expire_to)) - strvec_push(&repack, "-a"); + strvec_push(args, "-a"); else if (cfg->cruft_packs) { - strvec_push(&repack, "--cruft"); + strvec_push(args, "--cruft"); if (cfg->prune_expire) - strvec_pushf(&repack, "--cruft-expiration=%s", cfg->prune_expire); + strvec_pushf(args, "--cruft-expiration=%s", cfg->prune_expire); if (cfg->max_cruft_size) - strvec_pushf(&repack, "--max-cruft-size=%lu", + strvec_pushf(args, "--max-cruft-size=%lu", cfg->max_cruft_size); if (cfg->repack_expire_to) - strvec_pushf(&repack, "--expire-to=%s", cfg->repack_expire_to); + strvec_pushf(args, "--expire-to=%s", cfg->repack_expire_to); } else { - strvec_push(&repack, "-A"); + strvec_push(args, "-A"); if (cfg->prune_expire) - strvec_pushf(&repack, "--unpack-unreachable=%s", cfg->prune_expire); + strvec_pushf(args, "--unpack-unreachable=%s", cfg->prune_expire); } if (keep_pack) - for_each_string_list(keep_pack, keep_one_pack, NULL); + for_each_string_list(keep_pack, keep_one_pack, args); if (cfg->repack_filter && *cfg->repack_filter) - strvec_pushf(&repack, "--filter=%s", cfg->repack_filter); + strvec_pushf(args, "--filter=%s", cfg->repack_filter); if (cfg->repack_filter_to && *cfg->repack_filter_to) - strvec_pushf(&repack, "--filter-to=%s", cfg->repack_filter_to); + strvec_pushf(args, "--filter-to=%s", cfg->repack_filter_to); } -static void add_repack_incremental_option(void) +static void add_repack_incremental_option(struct strvec *args) { - strvec_push(&repack, "--no-write-bitmap-index"); + strvec_push(args, "--no-write-bitmap-index"); } -static int need_to_gc(struct gc_config *cfg) +static int need_to_gc(struct gc_config *cfg, struct strvec *repack_args) { /* * Setting gc.auto to 0 or negative can disable the @@ -700,10 +701,10 @@ static int need_to_gc(struct gc_config *cfg) string_list_clear(&keep_pack, 0); } - add_repack_all_option(cfg, &keep_pack); + add_repack_all_option(cfg, &keep_pack, repack_args); string_list_clear(&keep_pack, 0); } else if (too_many_loose_objects(cfg)) - add_repack_incremental_option(); + add_repack_incremental_option(repack_args); else return 0; @@ -852,6 +853,7 @@ int cmd_gc(int argc, int keep_largest_pack = -1; int skip_foreground_tasks = 0; timestamp_t dummy; + struct strvec repack_args = STRVEC_INIT; struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT; struct gc_config cfg = GC_CONFIG_INIT; const char *prune_expire_sentinel = "sentinel"; @@ -891,7 +893,7 @@ int cmd_gc(int argc, show_usage_with_options_if_asked(argc, argv, builtin_gc_usage, builtin_gc_options); - strvec_pushl(&repack, "repack", "-d", "-l", NULL); + strvec_pushl(&repack_args, "repack", "-d", "-l", NULL); gc_config(&cfg); @@ -914,14 +916,14 @@ int cmd_gc(int argc, die(_("failed to parse prune expiry value %s"), cfg.prune_expire); if (aggressive) { - strvec_push(&repack, "-f"); + strvec_push(&repack_args, "-f"); if (cfg.aggressive_depth > 0) - strvec_pushf(&repack, "--depth=%d", cfg.aggressive_depth); + strvec_pushf(&repack_args, "--depth=%d", cfg.aggressive_depth); if (cfg.aggressive_window > 0) - strvec_pushf(&repack, "--window=%d", cfg.aggressive_window); + strvec_pushf(&repack_args, "--window=%d", cfg.aggressive_window); } if (opts.quiet) - strvec_push(&repack, "-q"); + strvec_push(&repack_args, "-q"); if (opts.auto_flag) { if (cfg.detach_auto && opts.detach < 0) @@ -930,7 +932,7 @@ int cmd_gc(int argc, /* * Auto-gc should be least intrusive as possible. */ - if (!need_to_gc(&cfg)) { + if (!need_to_gc(&cfg, &repack_args)) { ret = 0; goto out; } @@ -952,7 +954,7 @@ int cmd_gc(int argc, find_base_packs(&keep_pack, cfg.big_pack_threshold); } - add_repack_all_option(&cfg, &keep_pack); + add_repack_all_option(&cfg, &keep_pack, &repack_args); string_list_clear(&keep_pack, 0); } @@ -1014,9 +1016,9 @@ int cmd_gc(int argc, repack_cmd.git_cmd = 1; repack_cmd.close_object_store = 1; - strvec_pushv(&repack_cmd.args, repack.v); + strvec_pushv(&repack_cmd.args, repack_args.v); if (run_command(&repack_cmd)) - die(FAILED_RUN, repack.v[0]); + die(FAILED_RUN, repack_args.v[0]); if (cfg.prune_expire) { struct child_process prune_cmd = CHILD_PROCESS_INIT; @@ -1067,6 +1069,7 @@ int cmd_gc(int argc, out: maintenance_run_opts_release(&opts); + strvec_clear(&repack_args); gc_config_release(&cfg); return 0; } @@ -1269,6 +1272,19 @@ static int maintenance_task_gc_background(struct maintenance_run_opts *opts, return run_command(&child); } +static int gc_condition(struct gc_config *cfg) +{ + /* + * Note that it's fine to drop the repack arguments here, as we execute + * git-gc(1) as a separate child process anyway. So it knows to compute + * these arguments again. + */ + struct strvec repack_args = STRVEC_INIT; + int ret = need_to_gc(cfg, &repack_args); + strvec_clear(&repack_args); + return ret; +} + static int prune_packed(struct maintenance_run_opts *opts) { struct child_process child = CHILD_PROCESS_INIT; @@ -1596,7 +1612,7 @@ static const struct maintenance_task tasks[] = { .name = "gc", .foreground = maintenance_task_gc_foreground, .background = maintenance_task_gc_background, - .auto_condition = need_to_gc, + .auto_condition = gc_condition, }, [TASK_COMMIT_GRAPH] = { .name = "commit-graph", -- GitLab From 64fde2d3fb048ec0520fcce40e894e807be5b35a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 10:07:13 +0200 Subject: [PATCH 53/60] builtin/gc: make `too_many_loose_objects()` reusable without GC config To decide whether or not a repository needs to be repacked we estimate the number of loose objects. If the number exceeds a certain threshold we perform the repack, otherwise we don't. This is done via `too_many_loose_objects()`, which takes as parameter the `struct gc_config`. This configuration is only used to determine the threshold. In a subsequent commit we'll add another caller of this function that wants to pass a different limit than the one stored in that structure. Refactor the function accordingly so that we only take the limit as parameter instead of the whole structure. Signed-off-by: Patrick Steinhardt --- builtin/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index e9772eb3a30..026d3a1d714 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -447,7 +447,7 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED) return should_gc; } -static int too_many_loose_objects(struct gc_config *cfg) +static int too_many_loose_objects(int limit) { /* * Quickly check if a "gc" is needed, by estimating how @@ -469,7 +469,7 @@ static int too_many_loose_objects(struct gc_config *cfg) if (!dir) return 0; - auto_threshold = DIV_ROUND_UP(cfg->gc_auto_threshold, 256); + auto_threshold = DIV_ROUND_UP(limit, 256); while ((ent = readdir(dir)) != NULL) { if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || ent->d_name[hexsz_loose] != '\0') @@ -703,7 +703,7 @@ static int need_to_gc(struct gc_config *cfg, struct strvec *repack_args) add_repack_all_option(cfg, &keep_pack, repack_args); string_list_clear(&keep_pack, 0); - } else if (too_many_loose_objects(cfg)) + } else if (too_many_loose_objects(cfg->gc_auto_threshold)) add_repack_incremental_option(repack_args); else return 0; @@ -1057,7 +1057,7 @@ int cmd_gc(int argc, !opts.quiet && !daemonized ? COMMIT_GRAPH_WRITE_PROGRESS : 0, NULL); - if (opts.auto_flag && too_many_loose_objects(&cfg)) + if (opts.auto_flag && too_many_loose_objects(cfg.gc_auto_threshold)) warning(_("There are too many unreachable loose objects; " "run 'git prune' to remove them.")); -- GitLab From 9ba245402385ecb0a2187f73451a31a9a1c068e6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Oct 2025 08:55:50 +0200 Subject: [PATCH 54/60] builtin/maintenance: introduce "geometric-repack" task Introduce a new "geometric-repack" task. This task uses our geometric repack infrastructure as provided by git-repack(1) itself, which is a strategy that especially hosting providers tend to use to amortize the costs of repacking objects. There is one issue though with geometric repacks, namely that they unconditionally pack all loose objects, regardless of whether or not they are reachable. This is done because it means that we can completely skip the reachability step, which significantly speeds up the operation. But it has the big downside that we are unable to expire objects over time. To address this issue we thus use a split strategy in this new task: whenever a geometric repack would merge together all packs, we instead do an all-into-one repack. By default, these all-into-one repacks have cruft packs enabled, so unreachable objects would now be written into their own pack. Consequently, they won't be soaked up during geometric repacking anymore and can be expired with the next full repack, assuming that their expiry date has surpassed. Signed-off-by: Patrick Steinhardt --- Documentation/config/maintenance.adoc | 11 ++ builtin/gc.c | 102 +++++++++++++++++++ t/t7900-maintenance.sh | 139 ++++++++++++++++++++++++++ 3 files changed, 252 insertions(+) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 2f719342183..26dc5de423f 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -75,6 +75,17 @@ maintenance.incremental-repack.auto:: number of pack-files not in the multi-pack-index is at least the value of `maintenance.incremental-repack.auto`. The default value is 10. +maintenance.geometric-repack.auto:: + This integer config option controls how often the `geometric-repack` + task should be run as part of `git maintenance run --auto`. If zero, + then the `geometric-repack` task will not run with the `--auto` + option. A negative value will force the task to run every time. + Otherwise, a positive value implies the command should run either when + there are packfiles that need to be merged together to retain the + geometric progression, or when there are at least this many loose + objects that would be written into a new packfile. The default value is + 100. + maintenance.reflog-expire.auto:: This integer config option controls how often the `reflog-expire` task should be run as part of `git maintenance run --auto`. If zero, then diff --git a/builtin/gc.c b/builtin/gc.c index 026d3a1d714..2c9ecd464d2 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -34,6 +34,7 @@ #include "pack-objects.h" #include "path.h" #include "reflog.h" +#include "repack.h" #include "rerere.h" #include "blob.h" #include "tree.h" @@ -254,6 +255,7 @@ enum maintenance_task_label { TASK_PREFETCH, TASK_LOOSE_OBJECTS, TASK_INCREMENTAL_REPACK, + TASK_GEOMETRIC_REPACK, TASK_GC, TASK_COMMIT_GRAPH, TASK_PACK_REFS, @@ -1566,6 +1568,101 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts return 0; } +static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts, + struct gc_config *cfg) +{ + struct pack_geometry geometry = { + .split_factor = 2, + }; + struct pack_objects_args po_args = { + .local = 1, + }; + struct existing_packs existing_packs = EXISTING_PACKS_INIT; + struct string_list kept_packs = STRING_LIST_INIT_DUP; + struct child_process child = CHILD_PROCESS_INIT; + int ret; + + existing_packs.repo = the_repository; + existing_packs_collect(&existing_packs, &kept_packs); + pack_geometry_init(&geometry, &existing_packs, &po_args); + pack_geometry_split(&geometry); + + child.git_cmd = 1; + + strvec_pushl(&child.args, "repack", "-d", "-l", NULL); + if (geometry.split < geometry.pack_nr) + strvec_push(&child.args, "--geometric=2"); + else + add_repack_all_option(cfg, NULL, &child.args); + if (opts->quiet) + strvec_push(&child.args, "--quiet"); + if (the_repository->settings.core_multi_pack_index) + strvec_push(&child.args, "--write-midx"); + + if (run_command(&child)) { + ret = error(_("failed to perform geometric repack")); + goto out; + } + + ret = 0; + +out: + existing_packs_release(&existing_packs); + pack_geometry_release(&geometry); + return ret; +} + +static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED) +{ + struct pack_geometry geometry = { + .split_factor = 2, + }; + struct pack_objects_args po_args = { + .local = 1, + }; + struct existing_packs existing_packs = EXISTING_PACKS_INIT; + struct string_list kept_packs = STRING_LIST_INIT_DUP; + int auto_value = 100; + int ret; + + repo_config_get_int(the_repository, "maintenance.geometric-repack.auto", + &auto_value); + if (!auto_value) + return 0; + if (auto_value < 0) + return 1; + + existing_packs.repo = the_repository; + existing_packs_collect(&existing_packs, &kept_packs); + pack_geometry_init(&geometry, &existing_packs, &po_args); + pack_geometry_split(&geometry); + + /* + * When we'd merge at least two packs with one another we always + * perform the repack. + */ + if (geometry.split) { + ret = 1; + goto out; + } + + /* + * Otherwise, we estimate the number of loose objects to determine + * whether we want to create a new packfile or not. + */ + if (too_many_loose_objects(auto_value)) { + ret = 1; + goto out; + } + + ret = 0; + +out: + existing_packs_release(&existing_packs); + pack_geometry_release(&geometry); + return ret; +} + typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts, struct gc_config *cfg); typedef int (*maintenance_auto_fn)(struct gc_config *cfg); @@ -1608,6 +1705,11 @@ static const struct maintenance_task tasks[] = { .background = maintenance_task_incremental_repack, .auto_condition = incremental_repack_auto_condition, }, + [TASK_GEOMETRIC_REPACK] = { + .name = "geometric-repack", + .background = maintenance_task_geometric_repack, + .auto_condition = geometric_repack_auto_condition, + }, [TASK_GC] = { .name = "gc", .foreground = maintenance_task_gc_foreground, diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index ddd273d8dc2..60029a65a35 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -465,6 +465,145 @@ test_expect_success 'maintenance.incremental-repack.auto (when config is unset)' ) ' +run_and_verify_geometric_pack () { + EXPECTED_PACKS="$1" && + + # Verify that we perform a geometric repack. + rm -f "trace2.txt" && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --geometric=2 \ + --quiet --write-midx packfiles && + test_line_count = "$EXPECTED_PACKS" packfiles && + + # And verify that there are no loose objects anymore. + cat >expect <<-\EOF && + info + pack + EOF + ls .git/objects >actual && + test_cmp expect actual +} + +test_expect_success 'geometric repacking task' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + test_commit initial && + + # The initial repack causes an all-into-one repack. + GIT_TRACE2_EVENT="$(pwd)/initial-repack.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago --quiet --write-midx before && + run_and_verify_geometric_pack 1 && + ls -l .git/objects/pack >after && + test_cmp before after && + + # This incremental change creates a new packfile that only + # soaks up loose objects. The packfiles are not getting merged + # at this point. + test_commit loose && + run_and_verify_geometric_pack 2 && + + # Both packfiles have 3 objects, so the next run would cause us + # to merge all packfiles together. This should be turned into + # an all-into-one-repack. + GIT_TRACE2_EVENT="$(pwd)/all-into-one-repack.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago --quiet --write-midx /dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago --quiet --write-midx packs && + test_line_count = 2 packs && + ls .git/objects/pack/*.mtimes >cruft && + test_line_count = 1 cruft + ) +' + +test_geometric_repack_needed () { + NEEDED="$1" + GEOMETRIC_CONFIG="$2" && + rm -f trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git ${GEOMETRIC_CONFIG:+-c maintenance.geometric-repack.$GEOMETRIC_CONFIG} \ + maintenance run --auto --task=geometric-repack 2>/dev/null && + case "$NEEDED" in + true) + test_grep "\[\"git\",\"repack\"," trace2.txt;; + false) + ! test_grep "\[\"git\",\"repack\"," trace2.txt;; + *) + BUG "invalid parameter: $NEEDED";; + esac +} + +test_expect_success 'geometric repacking with --auto' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + # An empty repository does not need repacking, except when + # explicitly told to do it. + test_geometric_repack_needed false && + test_geometric_repack_needed false auto=0 && + test_geometric_repack_needed false auto=1 && + test_geometric_repack_needed true auto=-1 && + + test_oid_init && + + # Loose objects cause a repack when crossing the limit. Note + # that the number of objects gets extrapolated by having a look + # at the "objects/17/" shard. + test_commit "$(test_oid blob17_1)" && + test_geometric_repack_needed false && + test_commit "$(test_oid blob17_2)" && + test_geometric_repack_needed false auto=257 && + test_geometric_repack_needed true auto=256 && + + # Force another repack. + test_commit first && + test_commit second && + test_geometric_repack_needed true auto=-1 && + + # We now have two packfiles that would be merged together. As + # such, the repack should always happen unless the user has + # disabled the auto task. + test_geometric_repack_needed false auto=0 && + test_geometric_repack_needed true auto=9000 + ) +' + test_expect_success 'pack-refs task' ' for n in $(test_seq 1 5) do -- GitLab From d1b805004b9897041a89618b34f01e4dc92e6c4c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 21 Oct 2025 14:56:00 +0200 Subject: [PATCH 55/60] builtin/maintenance: make the geometric factor configurable The geometric repacking task uses a factor of two for its geometric sequence, meaning that each next pack must contain at least twice as many objects as the next-smaller one. In some cases it may be helpful to configure this factor though to reduce the number of packfile merges even further, e.g. in very big repositories. But while git-repack(1) itself supports doing this, the maintenance task does not give us a way to tune it. Introduce a new "maintenance.geometric-repack.splitFactor" configuration to plug this gap. Signed-off-by: Patrick Steinhardt --- Documentation/config/maintenance.adoc | 5 +++++ builtin/gc.c | 9 +++++++- t/t7900-maintenance.sh | 32 +++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 26dc5de423f..45fdafc2c63 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -86,6 +86,11 @@ maintenance.geometric-repack.auto:: objects that would be written into a new packfile. The default value is 100. +maintenance.geometric-repack.splitFactor:: + This integer config option controls the factor used for the geometric + sequence. See the `--geometric=` option in linkgit:git-repack[1] for + more details. Defaults to `2`. + maintenance.reflog-expire.auto:: This integer config option controls how often the `reflog-expire` task should be run as part of `git maintenance run --auto`. If zero, then diff --git a/builtin/gc.c b/builtin/gc.c index 2c9ecd464d2..fb1a82e0304 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1582,6 +1582,9 @@ static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts, struct child_process child = CHILD_PROCESS_INIT; int ret; + repo_config_get_int(the_repository, "maintenance.geometric-repack.splitFactor", + &geometry.split_factor); + existing_packs.repo = the_repository; existing_packs_collect(&existing_packs, &kept_packs); pack_geometry_init(&geometry, &existing_packs, &po_args); @@ -1591,7 +1594,8 @@ static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts, strvec_pushl(&child.args, "repack", "-d", "-l", NULL); if (geometry.split < geometry.pack_nr) - strvec_push(&child.args, "--geometric=2"); + strvec_pushf(&child.args, "--geometric=%d", + geometry.split_factor); else add_repack_all_option(cfg, NULL, &child.args); if (opts->quiet) @@ -1632,6 +1636,9 @@ static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED) if (auto_value < 0) return 1; + repo_config_get_int(the_repository, "maintenance.geometric-repack.splitFactor", + &geometry.split_factor); + existing_packs.repo = the_repository; existing_packs_collect(&existing_packs, &kept_packs); pack_geometry_init(&geometry, &existing_packs, &po_args); diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 60029a65a35..8f332e7fbbe 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -604,6 +604,38 @@ test_expect_success 'geometric repacking with --auto' ' ) ' +test_expect_success 'geometric repacking honors configured split factor' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + + # Create three different packs with 9, 2 and 1 object, respectively. + # This is done so that only a subset of packs would be merged + # together so that we can verify that `git repack` receives the + # correct geometric factor. + for i in $(test_seq 9) + do + echo first-$i | git hash-object -w --stdin -t blob || return 1 + done && + git repack --geometric=2 -d && + + for i in $(test_seq 2) + do + echo second-$i | git hash-object -w --stdin -t blob || return 1 + done && + git repack --geometric=2 -d && + + echo third | git hash-object -w --stdin -t blob && + git repack --geometric=2 -d && + + test_geometric_repack_needed false splitFactor=2 && + test_geometric_repack_needed true splitFactor=3 && + test_subcommand git repack -d -l --geometric=3 --quiet --write-midx Date: Tue, 14 Oct 2025 08:42:40 +0200 Subject: [PATCH 56/60] builtin/maintenance: don't silently ignore invalid strategy When parsing maintenance strategies we completely ignore the user-configured value in case it is unknown to us. This makes it basically undiscoverable to the user that scheduled maintenance is devolving into a no-op. Change this to instead die when seeing an unknown maintenance strategy. While at it, pull out the parsing logic into a separate function so that we can reuse it in a subsequent commit. Signed-off-by: Patrick Steinhardt --- builtin/gc.c | 17 +++++++++++------ t/t7900-maintenance.sh | 5 +++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index fb1a82e0304..726d944d3bd 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1855,6 +1855,13 @@ static const struct maintenance_strategy incremental_strategy = { }, }; +static struct maintenance_strategy parse_maintenance_strategy(const char *name) +{ + if (!strcasecmp(name, "incremental")) + return incremental_strategy; + die(_("unknown maintenance strategy: '%s'"), name); +} + static void initialize_task_config(struct maintenance_run_opts *opts, const struct string_list *selected_tasks) { @@ -1890,12 +1897,10 @@ static void initialize_task_config(struct maintenance_run_opts *opts, * override specific aspects of our strategy. */ if (opts->schedule) { - strategy = none_strategy; - - if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) { - if (!strcasecmp(config_str, "incremental")) - strategy = incremental_strategy; - } + if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) + strategy = parse_maintenance_strategy(config_str); + else + strategy = none_strategy; } else { strategy = default_strategy; } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 8f332e7fbbe..69fb6e9ee2e 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -1264,6 +1264,11 @@ test_expect_success 'fails when running outside of a repository' ' nongit test_must_fail git maintenance unregister ' +test_expect_success 'fails when configured to use an invalid strategy' ' + test_must_fail git -c maintenance.strategy=invalid maintenance run --schedule=hourly 2>err && + test_grep "unknown maintenance strategy: .invalid." err +' + test_expect_success 'register and unregister bare repo' ' test_when_finished "git config --global --unset-all maintenance.repo || :" && test_might_fail git config --global --unset-all maintenance.repo && -- GitLab From 37f7793dab9c2cd1cb527d7d9f92bb8c7969b0ac Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Oct 2025 10:06:09 +0200 Subject: [PATCH 57/60] builtin/maintenance: run maintenance tasks depending on type We basically have three different ways to execute repository maintenance: 1. Manual maintenance via `git maintenance run`. 2. Automatic maintenance via `git maintenance run --auto`. 3. Scheduled maintenance via `git maintenance run --schedule=`. At the moment, maintenance strategies only have an effect for the last type of maintenance. This is about to change in subsequent commits, but to do so we need to be able to skip some tasks depending on how exactly maintenance was invoked. Introduce a new maintenance type that discern between manual (1 & 2) and scheduled (3) maintenance. Convert the `enabled` field into a bitset so that it becomes possible to specifiy which tasks exactly should run in a specific context. The types picked for existing strategies match the status quo: - The default strategy is only ever executed as part of a manual maintenance run. It is not possible to use it for scheduled maintenance. - The incremental strategy is only ever executed as part of a scheduled maintenance run. It is not possible to use it for manual maintenance. The strategies will be tweaked in subsequent commits to make use of this new infrastructure. Signed-off-by: Patrick Steinhardt --- builtin/gc.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 726d944d3bd..eff4e4886a8 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1827,30 +1827,39 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts, return result; } +enum maintenance_type { + /* As invoked via `git maintenance run --schedule=`. */ + MAINTENANCE_TYPE_SCHEDULED = (1 << 0), + /* As invoked via `git maintenance run` and with `--auto`. */ + MAINTENANCE_TYPE_MANUAL = (1 << 1), +}; + struct maintenance_strategy { struct { - int enabled; + unsigned type; enum schedule_priority schedule; } tasks[TASK__COUNT]; }; static const struct maintenance_strategy none_strategy = { 0 }; + static const struct maintenance_strategy default_strategy = { .tasks = { - [TASK_GC].enabled = 1, + [TASK_GC].type = MAINTENANCE_TYPE_MANUAL, }, }; + static const struct maintenance_strategy incremental_strategy = { .tasks = { - [TASK_COMMIT_GRAPH].enabled = 1, + [TASK_COMMIT_GRAPH].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_COMMIT_GRAPH].schedule = SCHEDULE_HOURLY, - [TASK_PREFETCH].enabled = 1, + [TASK_PREFETCH].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_PREFETCH].schedule = SCHEDULE_HOURLY, - [TASK_INCREMENTAL_REPACK].enabled = 1, + [TASK_INCREMENTAL_REPACK].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_INCREMENTAL_REPACK].schedule = SCHEDULE_DAILY, - [TASK_LOOSE_OBJECTS].enabled = 1, + [TASK_LOOSE_OBJECTS].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY, - [TASK_PACK_REFS].enabled = 1, + [TASK_PACK_REFS].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY, }, }; @@ -1867,6 +1876,7 @@ static void initialize_task_config(struct maintenance_run_opts *opts, { struct strbuf config_name = STRBUF_INIT; struct maintenance_strategy strategy; + enum maintenance_type type; const char *config_str; /* @@ -1901,8 +1911,10 @@ static void initialize_task_config(struct maintenance_run_opts *opts, strategy = parse_maintenance_strategy(config_str); else strategy = none_strategy; + type = MAINTENANCE_TYPE_SCHEDULED; } else { strategy = default_strategy; + type = MAINTENANCE_TYPE_MANUAL; } for (size_t i = 0; i < TASK__COUNT; i++) { @@ -1912,8 +1924,8 @@ static void initialize_task_config(struct maintenance_run_opts *opts, strbuf_addf(&config_name, "maintenance.%s.enabled", tasks[i].name); if (!repo_config_get_bool(the_repository, config_name.buf, &config_value)) - strategy.tasks[i].enabled = config_value; - if (!strategy.tasks[i].enabled) + strategy.tasks[i].type = config_value ? type : 0; + if (!(strategy.tasks[i].type & type)) continue; if (opts->schedule) { -- GitLab From 4b15eac845c10c4558cfbc3d4b97f3bc56bcfa6b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Oct 2025 10:06:33 +0200 Subject: [PATCH 58/60] builtin/maintenance: extend "maintenance.strategy" to manual maintenance The "maintenance.strategy" configuration allows users to configure how Git is supposed to perform repository maintenance. The idea is that we provide a set of high-level strategies that may be useful in different contexts, like for example when handling a large monorepo. Furthermore, the strategy can be tweaked by the user by overriding specific tasks. In its current form though, the strategy only applies to scheduled maintenance. This creates something of a gap, as scheduled and manual maintenance will now use _different_ strategies as the latter would continue to use git-gc(1) by default. This makes the strategies way less useful than they could be on the one hand. But even more importantly, the two different strategies might clash with one another, where one of the strategies performs maintenance in such a way that it discards benefits from the other strategy. So ideally, it should be possible to pick one strategy that then applies globally to all the different ways that we perform maintenance. This doesn't necessarily mean that the strategy always does the _same_ thing for every maintenance type. But it means that the strategy can configure the different types to work in tandem with each other. Change the meaning of "maintenance.strategy" accordingly so that the strategy is applied to both types, manual and scheduled. As preceding commits have introduced logic to run maintenance tasks depending on this type we can tweak strategies so that they perform those tasks depending on the context. Note that this raises the question of backwards compatibility: when the user has configured the "incremental" strategy we would have ignored that strategy beforehand. Instead, repository maintenance would have continued to use git-gc(1) by default. But luckily, we can match that behaviour by: - Keeping all current tasks of the incremental strategy as `MAINTENANCE_TYPE_SCHEDULED`. This ensures that those tasks will not run during manual maintenance. - Configuring the "gc" task so that it is invoked during manual maintenance. Like this, the user shouldn't observe any difference in behaviour. Signed-off-by: Patrick Steinhardt --- Documentation/config/maintenance.adoc | 22 +++++++++------ builtin/gc.c | 24 ++++++++++++---- t/t7900-maintenance.sh | 40 +++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 45fdafc2c63..b7e90a71a3d 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -16,19 +16,25 @@ detach. maintenance.strategy:: This string config option provides a way to specify one of a few - recommended schedules for background maintenance. This only affects - which tasks are run during `git maintenance run --schedule=X` - commands, provided no `--task=` arguments are provided. - Further, if a `maintenance..schedule` config value is set, - then that value is used instead of the one provided by - `maintenance.strategy`. The possible strategy strings are: + recommended strategies for repository maintenance. This affects + which tasks are run during `git maintenance run`, provided no + `--task=` arguments are provided. This setting impacts manual + maintenance, auto-maintenance as well as scheduled maintenance. The + tasks that run may be different depending on the maintenance type. + -* `none`: This default setting implies no tasks are run at any schedule. +The maintenance strategy can be further tweaked by setting +`maintenance..enabled` and `maintenance..schedule`. If set, these +values are used instead of the defaults provided by `maintenance.strategy`. ++ +The possible strategies are: ++ +* `none`: This strategy implies no tasks are run at all. This is the default + strategy for scheduled maintenance. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` task, but runs the `prefetch` and `commit-graph` tasks hourly, the `loose-objects` and `incremental-repack` tasks daily, and the `pack-refs` - task weekly. + task weekly. Manual repository maintenance uses the `gc` task. maintenance..enabled:: This boolean config option controls whether the maintenance task diff --git a/builtin/gc.c b/builtin/gc.c index eff4e4886a8..9c05905b9a7 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1861,6 +1861,19 @@ static const struct maintenance_strategy incremental_strategy = { [TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY, [TASK_PACK_REFS].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY, + + /* + * Historically, the "incremental" strategy was only available + * in the context of scheduled maintenance when set up via + * "maintenance.strategy". We have later expanded that config + * to also cover manual maintenance. + * + * To retain backwards compatibility with the previous status + * quo we thus run git-gc(1) in case manual maintenance was + * requested. This is the same as the default strategy, which + * would have been in use beforehand. + */ + [TASK_GC].type = MAINTENANCE_TYPE_MANUAL, }, }; @@ -1904,19 +1917,20 @@ static void initialize_task_config(struct maintenance_run_opts *opts, * - Unscheduled maintenance uses our default strategy. * * Both of these are affected by the gitconfig though, which may - * override specific aspects of our strategy. + * override specific aspects of our strategy. Furthermore, both + * strategies can be overridden by setting "maintenance.strategy". */ if (opts->schedule) { - if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) - strategy = parse_maintenance_strategy(config_str); - else - strategy = none_strategy; + strategy = none_strategy; type = MAINTENANCE_TYPE_SCHEDULED; } else { strategy = default_strategy; type = MAINTENANCE_TYPE_MANUAL; } + if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) + strategy = parse_maintenance_strategy(config_str); + for (size_t i = 0; i < TASK__COUNT; i++) { int config_value; diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 69fb6e9ee2e..3530895bfb1 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -887,6 +887,46 @@ test_expect_success 'maintenance.strategy inheritance' ' expect && + rm -f trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git -c maintenance.strategy=$STRATEGY maintenance run --quiet "$@" && + sed -n 's/{"event":"child_start","sid":"[^/"]*",.*,"argv":\["\(.*\)\"]}/\1/p' actual + test_cmp expect actual +} + +test_expect_success 'maintenance.strategy is respected' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + + test_must_fail git -c maintenance.strategy=unknown maintenance run 2>err && + test_grep "unknown maintenance strategy: .unknown." err && + + test_strategy incremental <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF + + test_strategy incremental --schedule=weekly <<-\EOF + git pack-refs --all --prune + git prune-packed --quiet + git multi-pack-index write --no-progress + git multi-pack-index expire --no-progress + git multi-pack-index repack --no-progress --batch-size=1 + git commit-graph write --split --reachable --no-progress + EOF + ) +' + test_expect_success 'register and unregister' ' test_when_finished git config --global --unset-all maintenance.repo && -- GitLab From eb75881b6ae8ae39adade12b0584378a433c4a00 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Oct 2025 10:14:51 +0200 Subject: [PATCH 59/60] builtin/maintenance: make "gc" strategy accessible While the user can pick the "incremental" maintenance strategy, it is not possible to explicitly use the "gc" strategy. This has two downsides: - It is impossible to use the default "gc" strategy for a specific repository when the strategy was globally set to a different strategy. - It is not possible to use git-gc(1) for scheduled maintenance. Address these issues by making making the "gc" strategy configurable. Furthermore, extend the strategy so that git-gc(1) runs for both manual and scheduled maintenance. Signed-off-by: Patrick Steinhardt --- Documentation/config/maintenance.adoc | 2 ++ builtin/gc.c | 9 ++++++--- t/t7900-maintenance.sh | 14 +++++++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index b7e90a71a3d..b2bacdc8220 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -30,6 +30,8 @@ The possible strategies are: + * `none`: This strategy implies no tasks are run at all. This is the default strategy for scheduled maintenance. +* `gc`: This strategy runs the `gc` task. This is the default strategy for + manual maintenance. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` task, but runs the `prefetch` and `commit-graph` tasks hourly, the diff --git a/builtin/gc.c b/builtin/gc.c index 9c05905b9a7..aaff0bae15f 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1843,9 +1843,10 @@ struct maintenance_strategy { static const struct maintenance_strategy none_strategy = { 0 }; -static const struct maintenance_strategy default_strategy = { +static const struct maintenance_strategy gc_strategy = { .tasks = { - [TASK_GC].type = MAINTENANCE_TYPE_MANUAL, + [TASK_GC].type = MAINTENANCE_TYPE_MANUAL | MAINTENANCE_TYPE_SCHEDULED, + [TASK_GC].schedule = SCHEDULE_DAILY, }, }; @@ -1881,6 +1882,8 @@ static struct maintenance_strategy parse_maintenance_strategy(const char *name) { if (!strcasecmp(name, "incremental")) return incremental_strategy; + if (!strcasecmp(name, "gc")) + return gc_strategy; die(_("unknown maintenance strategy: '%s'"), name); } @@ -1924,7 +1927,7 @@ static void initialize_task_config(struct maintenance_run_opts *opts, strategy = none_strategy; type = MAINTENANCE_TYPE_SCHEDULED; } else { - strategy = default_strategy; + strategy = gc_strategy; type = MAINTENANCE_TYPE_MANUAL; } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 3530895bfb1..2770148fd10 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -916,7 +916,7 @@ test_expect_success 'maintenance.strategy is respected' ' git gc --quiet --no-detach --skip-foreground-tasks EOF - test_strategy incremental --schedule=weekly <<-\EOF + test_strategy incremental --schedule=weekly <<-\EOF && git pack-refs --all --prune git prune-packed --quiet git multi-pack-index write --no-progress @@ -924,6 +924,18 @@ test_expect_success 'maintenance.strategy is respected' ' git multi-pack-index repack --no-progress --batch-size=1 git commit-graph write --split --reachable --no-progress EOF + + test_strategy gc <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF + + test_strategy gc --schedule=weekly <<-\EOF + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF ) ' -- GitLab From 5c011e7a7e205225feeb31ac4c8aeac640369953 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Oct 2025 10:23:17 +0200 Subject: [PATCH 60/60] builtin/maintenance: introduce "geometric" strategy We have two different repacking strategies in Git: - The "gc" strategy uses git-gc(1). - The "incremental" strategy uses multi-pack indices and `git multi-pack-index repack` to merge together smaller packfiles as determined by a specific batch size. The former strategy is our old and trusted default, whereas the latter has historically been used for our scheduled maintenance. But both strategies have their shortcomings: - The "gc" strategy performs regular all-into-one repacks. Furthermore it is rather inflexible, as it is not easily possible for a user to enable or disable specific subtasks. - The "incremental" strategy is not a full replacement for the "gc" strategy as it doesn't know to prune stale data. So today, we don't have a strategy that is well-suited for large repos while being a full replacement for the "gc" strategy. Introduce a new "geometric" strategy that aims to fill this gap. This strategy invokes all the usual cleanup tasks that git-gc(1) does like pruning reflogs and rerere caches as well as stale worktrees. But where it differs from both the "gc" and "incremental" strategy is that it uses our geometric repacking infrastructure exposed by git-repack(1) to repack packfiles. The advantage of geometric repacking is that we only need to perform an all-into-one repack when the object count in a repo has grown significantly. One downside of this strategy is that pruning of unreferenced objects is not going to happen regularly anymore. Every geometric repack knows to soak up all loose objects regardless of their reachability, and merging two or more packs doesn't consider reachability, either. Consequently, the number of unreachable objects will grow over time. This is remedied by doing an all-into-one repack instead of a geometric repack whenever we determine that the geometric repack would end up merging all packfiles anyway. This all-into-one repack then performs our usual reachability checks and writes unreachable objects into a cruft pack. As cruft packs won't ever be merged during geometric repacks we can thus phase out these objects over time. Of course, this still means that we retain unreachable objects for far longer than with the "gc" strategy. But the maintenance strategy is intended especially for large repositories, where the basic assumption is that the set of unreachable objects will be significantly dwarfed by the number of reachable objects. If this assumption is ever proven to be too disadvantageous we could for example introduce a time-based strategy: if the largest packfile has not been touched for longer than $T, we perform an all-into-one repack. But for now, such a mechanism is deferred into the future as it is not clear yet whether it is needed in the first place. Signed-off-by: Patrick Steinhardt --- Documentation/config/maintenance.adoc | 9 +++++++++ builtin/gc.c | 19 +++++++++++++++++++ t/t7900-maintenance.sh | 20 +++++++++++++++++++- 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index b2bacdc8220..d0c38f03fab 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -32,6 +32,15 @@ The possible strategies are: strategy for scheduled maintenance. * `gc`: This strategy runs the `gc` task. This is the default strategy for manual maintenance. +* `geometric`: This strategy performs geometric repacking of packfiles and + keeps auxiliary data structures up-to-date. The strategy expires data in the + reflog and removes worktrees that cannot be located anymore. When the + geometric repacking strategy would decide to do an all-into-one repack, then + the strategy generates a cruft pack for all unreachable objects. Objects that + are already part of a cruft pack will be expired. ++ +This repacking strategy is a full replacement for the `gc` strategy and is +recommended for large repositories. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` task, but runs the `prefetch` and `commit-graph` tasks hourly, the diff --git a/builtin/gc.c b/builtin/gc.c index aaff0bae15f..9739bb0ea24 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1878,12 +1878,31 @@ static const struct maintenance_strategy incremental_strategy = { }, }; +static const struct maintenance_strategy geometric_strategy = { + .tasks = { + [TASK_COMMIT_GRAPH].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_COMMIT_GRAPH].schedule = SCHEDULE_HOURLY, + [TASK_GEOMETRIC_REPACK].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_GEOMETRIC_REPACK].schedule = SCHEDULE_DAILY, + [TASK_PACK_REFS].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_PACK_REFS].schedule = SCHEDULE_DAILY, + [TASK_RERERE_GC].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_RERERE_GC].schedule = SCHEDULE_WEEKLY, + [TASK_REFLOG_EXPIRE].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_REFLOG_EXPIRE].schedule = SCHEDULE_WEEKLY, + [TASK_WORKTREE_PRUNE].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_WORKTREE_PRUNE].schedule = SCHEDULE_WEEKLY, + }, +}; + static struct maintenance_strategy parse_maintenance_strategy(const char *name) { if (!strcasecmp(name, "incremental")) return incremental_strategy; if (!strcasecmp(name, "gc")) return gc_strategy; + if (!strcasecmp(name, "geometric")) + return geometric_strategy; die(_("unknown maintenance strategy: '%s'"), name); } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 2770148fd10..aedb9e7e8e3 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -931,11 +931,29 @@ test_expect_success 'maintenance.strategy is respected' ' git gc --quiet --no-detach --skip-foreground-tasks EOF - test_strategy gc --schedule=weekly <<-\EOF + test_strategy gc --schedule=weekly <<-\EOF && git pack-refs --all --prune git reflog expire --all git gc --quiet --no-detach --skip-foreground-tasks EOF + + test_strategy geometric <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git repack -d -l --geometric=2 --quiet --write-midx + git commit-graph write --split --reachable --no-progress + git worktree prune --expire 3.months.ago + git rerere gc + EOF + + test_strategy geometric --schedule=weekly <<-\EOF + git pack-refs --all --prune + git reflog expire --all + git repack -d -l --geometric=2 --quiet --write-midx + git commit-graph write --split --reachable --no-progress + git worktree prune --expire 3.months.ago + git rerere gc + EOF ) ' -- GitLab