From 595bef7180b57889a4dec4b675a7fc6084c863ac Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:41 +0200 Subject: [PATCH 01/26] odb: store locality in object database sources Object database sources are classified either as: - Local, which means that the source is the repository's primary source. This is typically ".git/objects". - Non-local, which is everything else. Most importantly this includes alternates and quarantine directories. This locality is often computed ad-hoc by checking whether a given object source is the first one. This works, but it is quite roundabout. Refactor the code so that we store locality when creating the sources in the first place. This makes it both more accessible and robust. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 5 +++-- midx.h | 2 +- odb.c | 1 + odb.h | 8 ++++++++ packfile.c | 9 ++++----- repository.c | 1 + 6 files changed, 18 insertions(+), 8 deletions(-) diff --git a/midx.c b/midx.c index 7d407682e60..b9ca0915a67 100644 --- a/midx.c +++ b/midx.c @@ -723,7 +723,7 @@ int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) return 0; } -int prepare_multi_pack_index_one(struct odb_source *source, int local) +int prepare_multi_pack_index_one(struct odb_source *source) { struct repository *r = source->odb->repo; @@ -734,7 +734,8 @@ int prepare_multi_pack_index_one(struct odb_source *source, int local) if (source->midx) return 1; - source->midx = load_multi_pack_index(r, source->path, local); + source->midx = load_multi_pack_index(r, source->path, + source->local); return !!source->midx; } diff --git a/midx.h b/midx.h index 076382de8ac..28c426a8232 100644 --- a/midx.h +++ b/midx.h @@ -122,7 +122,7 @@ int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pa int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); -int prepare_multi_pack_index_one(struct odb_source *source, int local); +int prepare_multi_pack_index_one(struct odb_source *source); /* * Variant of write_midx_file which writes a MIDX containing only the packs diff --git a/odb.c b/odb.c index 1f48a0448e3..1761a50840d 100644 --- a/odb.c +++ b/odb.c @@ -176,6 +176,7 @@ static int link_alt_odb_entry(struct object_database *odb, CALLOC_ARRAY(alternate, 1); alternate->odb = odb; + alternate->local = false; /* pathbuf.buf is already in r->objects->source_by_path */ alternate->path = strbuf_detach(&pathbuf, NULL); diff --git a/odb.h b/odb.h index 09177bf430d..f9300439bab 100644 --- a/odb.h +++ b/odb.h @@ -63,6 +63,14 @@ struct odb_source { */ struct multi_pack_index *midx; + /* + * Figure out whether this is the local source of the owning + * repository, which would typically be its ".git/objects" directory. + * This local object directory is usually where objects would be + * written to. + */ + bool local; + /* * This is a temporary object store created by the tmp_objdir * facility. Disable ref updates since the objects in the store diff --git a/packfile.c b/packfile.c index 5d73932f50c..a38544b87bf 100644 --- a/packfile.c +++ b/packfile.c @@ -935,14 +935,14 @@ static void prepare_pack(const char *full_name, size_t full_name_len, report_garbage(PACKDIR_FILE_GARBAGE, full_name); } -static void prepare_packed_git_one(struct odb_source *source, int local) +static void prepare_packed_git_one(struct odb_source *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { .m = source->midx, .r = source->odb->repo, .garbage = &garbage, - .local = local, + .local = source->local, }; for_each_file_in_pack_dir(source->path, prepare_pack, &data); @@ -1037,9 +1037,8 @@ static void prepare_packed_git(struct repository *r) odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - int local = (source == r->objects->sources); - prepare_multi_pack_index_one(source, local); - prepare_packed_git_one(source, local); + prepare_multi_pack_index_one(source); + prepare_packed_git_one(source); } rearrange_packed_git(r); diff --git a/repository.c b/repository.c index ecd691181fc..97f0578381d 100644 --- a/repository.c +++ b/repository.c @@ -168,6 +168,7 @@ void repo_set_gitdir(struct repository *repo, if (!repo->objects->sources) { CALLOC_ARRAY(repo->objects->sources, 1); repo->objects->sources->odb = repo->objects; + repo->objects->sources->local = true; repo->objects->sources_tail = &repo->objects->sources->next; } expand_base_dir(&repo->objects->sources->path, o->object_dir, -- GitLab From 0d61933b8f9a0392310196578e1374283496843c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:42 +0200 Subject: [PATCH 02/26] odb: allow `odb_find_source()` to fail When trying to locate a source for an unknown object directory we will die right away. In subsequent patches we will add new callsites though that want to handle this situation gracefully instead. Refactor the function to return a `NULL` pointer if the source could not be found and adapt the callsites to die instead. Introduce a new wrapper `odb_find_source_or_die()` that continues to die in case the source could not be found. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 4 ++-- midx-write.c | 2 +- odb.c | 6 ++++++ odb.h | 7 +++++-- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 25018a0b9df..33fb7a5145c 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -101,7 +101,7 @@ static int graph_verify(int argc, const char **argv, const char *prefix, if (opts.progress) flags |= COMMIT_GRAPH_WRITE_PROGRESS; - source = odb_find_source(the_repository->objects, opts.obj_dir); + source = odb_find_source_or_die(the_repository->objects, opts.obj_dir); graph_name = get_commit_graph_filename(source); chain_name = get_commit_graph_chain_filename(source); if (open_commit_graph(graph_name, &fd, &st)) @@ -289,7 +289,7 @@ static int graph_write(int argc, const char **argv, const char *prefix, git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0)) flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS; - source = odb_find_source(the_repository->objects, opts.obj_dir); + source = odb_find_source_or_die(the_repository->objects, opts.obj_dir); if (opts.reachable) { if (write_commit_graph_reachable(source, flags, &write_opts)) diff --git a/midx-write.c b/midx-write.c index c1ae62d3549..d38caceadb4 100644 --- a/midx-write.c +++ b/midx-write.c @@ -916,7 +916,7 @@ static int write_midx_bitmap(struct write_midx_context *ctx, static struct multi_pack_index *lookup_multi_pack_index(struct repository *r, const char *object_dir) { - struct odb_source *source = odb_find_source(r->objects, object_dir); + struct odb_source *source = odb_find_source_or_die(r->objects, object_dir); return get_multi_pack_index(source); } diff --git a/odb.c b/odb.c index 1761a50840d..4e7f14be4a0 100644 --- a/odb.c +++ b/odb.c @@ -464,6 +464,12 @@ struct odb_source *odb_find_source(struct object_database *odb, const char *obj_ free(obj_dir_real); strbuf_release(&odb_path_real); + return source; +} + +struct odb_source *odb_find_source_or_die(struct object_database *odb, const char *obj_dir) +{ + struct odb_source *source = odb_find_source(odb, obj_dir); if (!source) die(_("could not find object directory matching %s"), obj_dir); return source; diff --git a/odb.h b/odb.h index f9300439bab..312921077b8 100644 --- a/odb.h +++ b/odb.h @@ -186,11 +186,14 @@ struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); /* - * Find source by its object directory path. Dies in case the source couldn't - * be found. + * Find source by its object directory path. Returns a `NULL` pointer in case + * the source could not be found. */ struct odb_source *odb_find_source(struct object_database *odb, const char *obj_dir); +/* Same as `odb_find_source()`, but dies in case the source doesn't exist. */ +struct odb_source *odb_find_source_or_die(struct object_database *odb, const char *obj_dir); + /* * Replace the current writable object directory with the specified temporary * object directory; returns the former primary source. -- GitLab From 25c532f6e0797ef501ce43835fb4af4bd9c33de5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:43 +0200 Subject: [PATCH 03/26] odb: consistently use "dir" to refer to alternate's directory The functions that add an alternate object directory to the object database are somewhat inconsistent in how they call the paramater that refers to the directory path: in our headers we refer to it as "dir", whereas in the implementation we often call it "reference" or "entry". Unify this and consistently call the parameter "dir". While at it, refactor `link_alt_odb_entry()` to accept a C string instead of a `struct strbuf` as parameter to clarify that we really only need the path and nothing else. Suggested-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/odb.c b/odb.c index 4e7f14be4a0..e41e3952ea0 100644 --- a/odb.c +++ b/odb.c @@ -140,7 +140,7 @@ static void read_info_alternates(struct object_database *odb, int depth); static int link_alt_odb_entry(struct object_database *odb, - const struct strbuf *entry, + const char *dir, const char *relative_base, int depth, const char *normalized_objdir) @@ -151,11 +151,11 @@ static int link_alt_odb_entry(struct object_database *odb, khiter_t pos; int ret = -1; - if (!is_absolute_path(entry->buf) && relative_base) { + if (!is_absolute_path(dir) && relative_base) { strbuf_realpath(&pathbuf, relative_base, 1); strbuf_addch(&pathbuf, '/'); } - strbuf_addbuf(&pathbuf, entry); + strbuf_addstr(&pathbuf, dir); if (!strbuf_realpath(&tmp, pathbuf.buf, 0)) { error(_("unable to normalize alternate object path: %s"), @@ -229,7 +229,7 @@ static void link_alt_odb_entries(struct object_database *odb, const char *alt, int sep, const char *relative_base, int depth) { struct strbuf objdirbuf = STRBUF_INIT; - struct strbuf entry = STRBUF_INIT; + struct strbuf dir = STRBUF_INIT; if (!alt || !*alt) return; @@ -243,13 +243,13 @@ static void link_alt_odb_entries(struct object_database *odb, const char *alt, strbuf_realpath(&objdirbuf, odb->sources->path, 1); while (*alt) { - alt = parse_alt_odb_entry(alt, sep, &entry); - if (!entry.len) + alt = parse_alt_odb_entry(alt, sep, &dir); + if (!dir.len) continue; - link_alt_odb_entry(odb, &entry, + link_alt_odb_entry(odb, dir.buf, relative_base, depth, objdirbuf.buf); } - strbuf_release(&entry); + strbuf_release(&dir); strbuf_release(&objdirbuf); } @@ -273,7 +273,7 @@ static void read_info_alternates(struct object_database *odb, } void odb_add_to_alternates_file(struct object_database *odb, - const char *reference) + const char *dir) { struct lock_file lock = LOCK_INIT; char *alts = repo_git_path(odb->repo, "objects/info/alternates"); @@ -290,7 +290,7 @@ void odb_add_to_alternates_file(struct object_database *odb, struct strbuf line = STRBUF_INIT; while (strbuf_getline(&line, in) != EOF) { - if (!strcmp(reference, line.buf)) { + if (!strcmp(dir, line.buf)) { found = 1; break; } @@ -306,18 +306,17 @@ void odb_add_to_alternates_file(struct object_database *odb, if (found) { rollback_lock_file(&lock); } else { - fprintf_or_die(out, "%s\n", reference); + fprintf_or_die(out, "%s\n", dir); if (commit_lock_file(&lock)) die_errno(_("unable to move new alternates file into place")); if (odb->loaded_alternates) - link_alt_odb_entries(odb, reference, - '\n', NULL, 0); + link_alt_odb_entries(odb, dir, '\n', NULL, 0); } free(alts); } void odb_add_to_alternates_memory(struct object_database *odb, - const char *reference) + const char *dir) { /* * Make sure alternates are initialized, or else our entry may be @@ -325,8 +324,7 @@ void odb_add_to_alternates_memory(struct object_database *odb, */ odb_prepare_alternates(odb); - link_alt_odb_entries(odb, reference, - '\n', NULL, 0); + link_alt_odb_entries(odb, dir, '\n', NULL, 0); } struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, -- GitLab From a59d44ff3f0f308f9577b05c858c063d2466b061 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:44 +0200 Subject: [PATCH 04/26] odb: return newly created in-memory sources Callers have no trivial way to obtain the newly created object database source when adding it to the in-memory list of alternates. While not yet needed anywhere, a subsequent commit will want to obtain that pointer. Refactor the function to return the source to make it easily accessible. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 30 ++++++++++++++++++------------ odb.h | 4 ++-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index e41e3952ea0..0c808bb288f 100644 --- a/odb.c +++ b/odb.c @@ -139,17 +139,16 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); -static int link_alt_odb_entry(struct object_database *odb, - const char *dir, - const char *relative_base, - int depth, - const char *normalized_objdir) +static struct odb_source *link_alt_odb_entry(struct object_database *odb, + const char *dir, + const char *relative_base, + int depth, + const char *normalized_objdir) { - struct odb_source *alternate; + struct odb_source *alternate = NULL; struct strbuf pathbuf = STRBUF_INIT; struct strbuf tmp = STRBUF_INIT; khiter_t pos; - int ret = -1; if (!is_absolute_path(dir) && relative_base) { strbuf_realpath(&pathbuf, relative_base, 1); @@ -189,11 +188,11 @@ static int link_alt_odb_entry(struct object_database *odb, /* recursively add alternates */ read_info_alternates(odb, alternate->path, depth + 1); - ret = 0; + error: strbuf_release(&tmp); strbuf_release(&pathbuf); - return ret; + return alternate; } static const char *parse_alt_odb_entry(const char *string, @@ -315,16 +314,23 @@ void odb_add_to_alternates_file(struct object_database *odb, free(alts); } -void odb_add_to_alternates_memory(struct object_database *odb, - const char *dir) +struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, + const char *dir) { + struct odb_source *alternate; + char *objdir; + /* * Make sure alternates are initialized, or else our entry may be * overwritten when they are. */ odb_prepare_alternates(odb); - link_alt_odb_entries(odb, dir, '\n', NULL, 0); + objdir = real_pathdup(odb->sources->path, 1); + alternate = link_alt_odb_entry(odb, dir, NULL, 0, objdir); + + free(objdir); + return alternate; } struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, diff --git a/odb.h b/odb.h index 312921077b8..d7691326997 100644 --- a/odb.h +++ b/odb.h @@ -268,8 +268,8 @@ void odb_add_to_alternates_file(struct object_database *odb, * recursive alternates it points to), but do not modify the on-disk alternates * file. */ -void odb_add_to_alternates_memory(struct object_database *odb, - const char *dir); +struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, + const char *dir); /* * Read an object from the database. Returns the object data and assigns object -- GitLab From 57363dfa0dce05aac735d5cfd626e6aac8cb706c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:45 +0200 Subject: [PATCH 05/26] odb: simplify calling `link_alt_odb_entry()` Callers of `link_alt_odb_entry()` are expected to pass in three different paths: - The (potentially relative) path of the object directory that we're about to add. - The base that should be used to resolve a relative object directory path. - The resolved path to the object database's objects directory. Juggling those three paths makes the calling convention somewhat hard to grok at first. As it turns out, the third parameter is redundant: we always pass in the resolved path of the object database's primary source, and we already pass in the database itself. So instead, we can resolve that path in the function itself. One downside of this is that one caller of `link_alt_odb_entry()` calls this function in a loop, so we were able to resolve the directory a single time, only. But ultimately, we only ever end up with a rather limited number of alternates anyway, so the extra couple of cycles we save feels more like a micro optimization. Refactor the code accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/odb.c b/odb.c index 0c808bb288f..4f884e3b509 100644 --- a/odb.c +++ b/odb.c @@ -142,8 +142,7 @@ static void read_info_alternates(struct object_database *odb, static struct odb_source *link_alt_odb_entry(struct object_database *odb, const char *dir, const char *relative_base, - int depth, - const char *normalized_objdir) + int depth) { struct odb_source *alternate = NULL; struct strbuf pathbuf = STRBUF_INIT; @@ -170,7 +169,10 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/') strbuf_setlen(&pathbuf, pathbuf.len - 1); - if (!alt_odb_usable(odb, &pathbuf, normalized_objdir, &pos)) + strbuf_reset(&tmp); + strbuf_realpath(&tmp, odb->sources->path, 1); + + if (!alt_odb_usable(odb, &pathbuf, tmp.buf, &pos)) goto error; CALLOC_ARRAY(alternate, 1); @@ -227,7 +229,6 @@ static const char *parse_alt_odb_entry(const char *string, static void link_alt_odb_entries(struct object_database *odb, const char *alt, int sep, const char *relative_base, int depth) { - struct strbuf objdirbuf = STRBUF_INIT; struct strbuf dir = STRBUF_INIT; if (!alt || !*alt) @@ -239,17 +240,13 @@ static void link_alt_odb_entries(struct object_database *odb, const char *alt, return; } - strbuf_realpath(&objdirbuf, odb->sources->path, 1); - while (*alt) { alt = parse_alt_odb_entry(alt, sep, &dir); if (!dir.len) continue; - link_alt_odb_entry(odb, dir.buf, - relative_base, depth, objdirbuf.buf); + link_alt_odb_entry(odb, dir.buf, relative_base, depth); } strbuf_release(&dir); - strbuf_release(&objdirbuf); } static void read_info_alternates(struct object_database *odb, @@ -317,20 +314,12 @@ void odb_add_to_alternates_file(struct object_database *odb, struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, const char *dir) { - struct odb_source *alternate; - char *objdir; - /* * Make sure alternates are initialized, or else our entry may be * overwritten when they are. */ odb_prepare_alternates(odb); - - objdir = real_pathdup(odb->sources->path, 1); - alternate = link_alt_odb_entry(odb, dir, NULL, 0, objdir); - - free(objdir); - return alternate; + return link_alt_odb_entry(odb, dir, NULL, 0); } struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, -- GitLab From 9ff212961506679c1e2c1541b17ab2bd8563ff15 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:46 +0200 Subject: [PATCH 06/26] midx: drop redundant `struct repository` parameter There are a couple of functions that take both a `struct repository` and a `struct multi_pack_index`. This provides redundant information though without much benefit given that the multi-pack index already has a pointer to its owning repository. Drop the `struct repository` parameter from such functions. While at it, reorder the list of parameters of `fill_midx_entry()` so that the MIDX comes first to better align with our coding guidelines. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- midx-write.c | 16 +++++++--------- midx.c | 18 +++++++++--------- midx.h | 6 +++--- pack-bitmap.c | 4 ++-- packfile.c | 4 ++-- t/helper/test-read-midx.c | 4 ++-- 7 files changed, 26 insertions(+), 28 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 3dd84495b86..b9fd685b8fc 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1733,7 +1733,7 @@ static int want_object_in_pack_mtime(const struct object_id *oid, struct multi_pack_index *m = get_multi_pack_index(source); struct pack_entry e; - if (m && fill_midx_entry(the_repository, oid, &e, m)) { + if (m && fill_midx_entry(m, oid, &e)) { want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime); if (want != -1) return want; diff --git a/midx-write.c b/midx-write.c index d38caceadb4..b858be475fc 100644 --- a/midx-write.c +++ b/midx-write.c @@ -942,8 +942,7 @@ static int fill_packs_from_midx(struct write_midx_context *ctx, */ if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) { - if (prepare_midx_pack(ctx->repo, m, - m->num_packs_in_base + i)) { + if (prepare_midx_pack(m, m->num_packs_in_base + i)) { error(_("could not load pack")); return 1; } @@ -1566,7 +1565,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla if (count[i]) continue; - if (prepare_midx_pack(r, m, i)) + if (prepare_midx_pack(m, i)) continue; if (m->packs[i]->pack_keep || m->packs[i]->is_cruft) @@ -1612,13 +1611,12 @@ static int compare_by_mtime(const void *a_, const void *b_) return 0; } -static int want_included_pack(struct repository *r, - struct multi_pack_index *m, +static int want_included_pack(struct multi_pack_index *m, int pack_kept_objects, uint32_t pack_int_id) { struct packed_git *p; - if (prepare_midx_pack(r, m, pack_int_id)) + if (prepare_midx_pack(m, pack_int_id)) return 0; p = m->packs[pack_int_id]; if (!pack_kept_objects && p->pack_keep) @@ -1640,7 +1638,7 @@ static void fill_included_packs_all(struct repository *r, repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects); for (i = 0; i < m->num_packs; i++) { - if (!want_included_pack(r, m, pack_kept_objects, i)) + if (!want_included_pack(m, pack_kept_objects, i)) continue; include_pack[i] = 1; @@ -1664,7 +1662,7 @@ static void fill_included_packs_batch(struct repository *r, for (i = 0; i < m->num_packs; i++) { pack_info[i].pack_int_id = i; - if (prepare_midx_pack(r, m, i)) + if (prepare_midx_pack(m, i)) continue; pack_info[i].mtime = m->packs[i]->mtime; @@ -1683,7 +1681,7 @@ static void fill_included_packs_batch(struct repository *r, struct packed_git *p = m->packs[pack_int_id]; uint64_t expected_size; - if (!want_included_pack(r, m, pack_kept_objects, pack_int_id)) + if (!want_included_pack(m, pack_kept_objects, pack_int_id)) continue; /* diff --git a/midx.c b/midx.c index b9ca0915a67..8459dda8c9e 100644 --- a/midx.c +++ b/midx.c @@ -450,9 +450,10 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, return pack_int_id - m->num_packs_in_base; } -int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, +int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { + struct repository *r = m->repo; struct strbuf pack_name = STRBUF_INIT; struct strbuf key = STRBUF_INIT; struct packed_git *p; @@ -507,7 +508,7 @@ struct packed_git *nth_midxed_pack(struct multi_pack_index *m, #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t)) -int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, +int nth_bitmapped_pack(struct multi_pack_index *m, struct bitmapped_pack *bp, uint32_t pack_int_id) { uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id); @@ -515,7 +516,7 @@ int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, if (!m->chunk_bitmapped_packs) return error(_("MIDX does not contain the BTMP chunk")); - if (prepare_midx_pack(r, m, pack_int_id)) + if (prepare_midx_pack(m, pack_int_id)) return error(_("could not load bitmapped pack %"PRIu32), pack_int_id); bp->p = m->packs[local_pack_int_id]; @@ -600,10 +601,9 @@ uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH); } -int fill_midx_entry(struct repository *r, +int fill_midx_entry(struct multi_pack_index *m, const struct object_id *oid, - struct pack_entry *e, - struct multi_pack_index *m) + struct pack_entry *e) { uint32_t pos; uint32_t pack_int_id; @@ -615,7 +615,7 @@ int fill_midx_entry(struct repository *r, midx_for_object(&m, pos); pack_int_id = nth_midxed_pack_int_id(m, pos); - if (prepare_midx_pack(r, m, pack_int_id)) + if (prepare_midx_pack(m, pack_int_id)) return 0; p = m->packs[pack_int_id - m->num_packs_in_base]; @@ -912,7 +912,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag _("Looking for referenced packfiles"), m->num_packs + m->num_packs_in_base); for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) { - if (prepare_midx_pack(r, m, i)) + if (prepare_midx_pack(m, i)) midx_report("failed to load pack in position %d", i); display_progress(progress, i + 1); @@ -989,7 +989,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag nth_midxed_object_oid(&oid, m, pairs[i].pos); - if (!fill_midx_entry(r, &oid, &e, m)) { + if (!fill_midx_entry(m, &oid, &e)) { midx_report(_("failed to load pack entry for oid[%d] = %s"), pairs[i].pos, oid_to_hex(&oid)); continue; diff --git a/midx.h b/midx.h index 28c426a8232..f7e07083e1f 100644 --- a/midx.h +++ b/midx.h @@ -103,10 +103,10 @@ void get_split_midx_filename_ext(const struct git_hash_algo *hash_algo, struct multi_pack_index *load_multi_pack_index(struct repository *r, const char *object_dir, int local); -int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id); +int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); struct packed_git *nth_midxed_pack(struct multi_pack_index *m, uint32_t pack_int_id); -int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, +int nth_bitmapped_pack(struct multi_pack_index *m, struct bitmapped_pack *bp, uint32_t pack_int_id); int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); @@ -118,7 +118,7 @@ uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos); struct object_id *nth_midxed_object_oid(struct object_id *oid, struct multi_pack_index *m, uint32_t n); -int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); +int fill_midx_entry(struct multi_pack_index *m, const struct object_id *oid, struct pack_entry *e); int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); diff --git a/pack-bitmap.c b/pack-bitmap.c index d14421ee204..fb0b11ca073 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -493,7 +493,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, } for (i = 0; i < bitmap_git->midx->num_packs + bitmap_git->midx->num_packs_in_base; i++) { - if (prepare_midx_pack(bitmap_repo(bitmap_git), bitmap_git->midx, i)) { + if (prepare_midx_pack(bitmap_git->midx, i)) { warning(_("could not open pack %s"), bitmap_git->midx->pack_names[i]); goto cleanup; @@ -2466,7 +2466,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct multi_pack_index *m = bitmap_git->midx; for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) { struct bitmapped_pack pack; - if (nth_bitmapped_pack(r, bitmap_git->midx, &pack, i) < 0) { + if (nth_bitmapped_pack(bitmap_git->midx, &pack, i) < 0) { warning(_("unable to load pack: '%s', disabling pack-reuse"), bitmap_git->midx->pack_names[i]); free(packs); diff --git a/packfile.c b/packfile.c index a38544b87bf..acb680966da 100644 --- a/packfile.c +++ b/packfile.c @@ -1091,7 +1091,7 @@ struct packed_git *get_all_packs(struct repository *r) if (!m) continue; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) - prepare_midx_pack(r, m, i); + prepare_midx_pack(m, i); } return r->objects->packed_git; @@ -2077,7 +2077,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa prepare_packed_git(r); for (struct odb_source *source = r->objects->sources; source; source = source->next) - if (source->midx && fill_midx_entry(r, oid, e, source->midx)) + if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; if (!r->objects->packed_git) diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index da2aa036b57..e430aa247c6 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -65,7 +65,7 @@ static int read_midx_file(const char *object_dir, const char *checksum, for (i = 0; i < m->num_objects; i++) { nth_midxed_object_oid(&oid, m, i + m->num_objects_in_base); - fill_midx_entry(the_repository, &oid, &e, m); + fill_midx_entry(m, &oid, &e); printf("%s %"PRIu64"\t%s\n", oid_to_hex(&oid), e.offset, e.p->pack_name); @@ -126,7 +126,7 @@ static int read_midx_bitmapped_packs(const char *object_dir) return 1; for (i = 0; i < midx->num_packs + midx->num_packs_in_base; i++) { - if (nth_bitmapped_pack(the_repository, midx, &pack, i) < 0) { + if (nth_bitmapped_pack(midx, &pack, i) < 0) { close_midx(midx); return 1; } -- GitLab From 017db7bb14246dea55b678fc20e34ce91c28968a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:47 +0200 Subject: [PATCH 07/26] midx: load multi-pack indices via their source To load a multi-pack index the caller is expected to pass both the repository and the object directory where the multi-pack index is located. While this works, this layout has a couple of downsides: - We need to pass in information reduntant with the owning source, namely its object directory and whether the source is local or not. - We don't have access to the source when loading the multi-pack index. If we had that access, we could store a pointer to the owning source in the MIDX and thus deduplicate some information. - Multi-pack indices are inherently specific to the object source and its format. With the goal of pluggable object backends in mind we will eventually want the backends to own the logic of reading and writing multi-pack indices. Making the logic work on top of object sources is a step into that direction. Refactor loading of multi-pack indices accordingly. This surfaces one small problem though: git-multi-pack-index(1) and our MIDX test helper both know to read and write multi-pack-indices located in a different object directory. This issue is addressed by adding the user-provided object directory as an in-memory alternate. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 18 ++++++++++-- midx.c | 57 ++++++++++++++++--------------------- midx.h | 6 ++-- t/helper/test-read-midx.c | 25 +++++++++------- t/t5319-multi-pack-index.sh | 8 +++--- 5 files changed, 62 insertions(+), 52 deletions(-) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index aa25b06f9d0..e4a9305af3a 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -64,12 +64,20 @@ static int parse_object_dir(const struct option *opt, const char *arg, char **value = opt->value; free(*value); if (unset) - *value = xstrdup(repo_get_object_directory(the_repository)); + *value = xstrdup(the_repository->objects->sources->path); else *value = real_pathdup(arg, 1); return 0; } +static struct odb_source *handle_object_dir_option(struct repository *repo) +{ + struct odb_source *source = odb_find_source(repo->objects, opts.object_dir); + if (!source) + source = odb_add_to_alternates_memory(repo->objects, opts.object_dir); + return source; +} + static struct option common_opts[] = { OPT_CALLBACK(0, "object-dir", &opts.object_dir, N_("directory"), @@ -157,6 +165,7 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_write_usage, options); + handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -193,6 +202,8 @@ static int cmd_multi_pack_index_verify(int argc, const char **argv, N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; + struct odb_source *source; + options = add_common_options(builtin_multi_pack_index_verify_options); trace2_cmd_mode(argv[0]); @@ -205,10 +216,11 @@ static int cmd_multi_pack_index_verify(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_verify_usage, options); + source = handle_object_dir_option(the_repository); FREE_AND_NULL(options); - return verify_midx_file(the_repository, opts.object_dir, opts.flags); + return verify_midx_file(source, opts.flags); } static int cmd_multi_pack_index_expire(int argc, const char **argv, @@ -233,6 +245,7 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_expire_usage, options); + handle_object_dir_option(the_repository); FREE_AND_NULL(options); @@ -265,6 +278,7 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_repack_usage, options); + handle_object_dir_option(the_repository); FREE_AND_NULL(options); diff --git a/midx.c b/midx.c index 8459dda8c9e..831a7e9b5f2 100644 --- a/midx.c +++ b/midx.c @@ -95,11 +95,10 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, return 0; } -static struct multi_pack_index *load_multi_pack_index_one(struct repository *r, - const char *object_dir, - const char *midx_name, - int local) +static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, + const char *midx_name) { + struct repository *r = source->odb->repo; struct multi_pack_index *m = NULL; int fd; struct stat st; @@ -129,10 +128,10 @@ static struct multi_pack_index *load_multi_pack_index_one(struct repository *r, midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - FLEX_ALLOC_STR(m, object_dir, object_dir); + FLEX_ALLOC_STR(m, object_dir, source->path); m->data = midx_map; m->data_len = midx_size; - m->local = local; + m->local = source->local; m->repo = r; m->signature = get_be32(m->data); @@ -297,19 +296,18 @@ static int add_midx_to_chain(struct multi_pack_index *midx, return 1; } -static struct multi_pack_index *load_midx_chain_fd_st(struct repository *r, - const char *object_dir, - int local, +static struct multi_pack_index *load_midx_chain_fd_st(struct odb_source *source, int fd, struct stat *st, int *incomplete_chain) { + const struct git_hash_algo *hash_algo = source->odb->repo->hash_algo; struct multi_pack_index *midx_chain = NULL; struct strbuf buf = STRBUF_INIT; int valid = 1; uint32_t i, count; FILE *fp = xfdopen(fd, "r"); - count = st->st_size / (r->hash_algo->hexsz + 1); + count = st->st_size / (hash_algo->hexsz + 1); for (i = 0; i < count; i++) { struct multi_pack_index *m; @@ -318,7 +316,7 @@ static struct multi_pack_index *load_midx_chain_fd_st(struct repository *r, if (strbuf_getline_lf(&buf, fp) == EOF) break; - if (get_oid_hex_algop(buf.buf, &layer, r->hash_algo)) { + if (get_oid_hex_algop(buf.buf, &layer, hash_algo)) { warning(_("invalid multi-pack-index chain: line '%s' " "not a hash"), buf.buf); @@ -329,9 +327,9 @@ static struct multi_pack_index *load_midx_chain_fd_st(struct repository *r, valid = 0; strbuf_reset(&buf); - get_split_midx_filename_ext(r->hash_algo, &buf, object_dir, + get_split_midx_filename_ext(hash_algo, &buf, source->path, layer.hash, MIDX_EXT_MIDX); - m = load_multi_pack_index_one(r, object_dir, buf.buf, local); + m = load_multi_pack_index_one(source, buf.buf); if (m) { if (add_midx_to_chain(m, midx_chain)) { @@ -354,40 +352,35 @@ static struct multi_pack_index *load_midx_chain_fd_st(struct repository *r, return midx_chain; } -static struct multi_pack_index *load_multi_pack_index_chain(struct repository *r, - const char *object_dir, - int local) +static struct multi_pack_index *load_multi_pack_index_chain(struct odb_source *source) { struct strbuf chain_file = STRBUF_INIT; struct stat st; int fd; struct multi_pack_index *m = NULL; - get_midx_chain_filename(&chain_file, object_dir); - if (open_multi_pack_index_chain(r->hash_algo, chain_file.buf, &fd, &st)) { + get_midx_chain_filename(&chain_file, source->path); + if (open_multi_pack_index_chain(source->odb->repo->hash_algo, chain_file.buf, &fd, &st)) { int incomplete; /* ownership of fd is taken over by load function */ - m = load_midx_chain_fd_st(r, object_dir, local, fd, &st, - &incomplete); + m = load_midx_chain_fd_st(source, fd, &st, &incomplete); } strbuf_release(&chain_file); return m; } -struct multi_pack_index *load_multi_pack_index(struct repository *r, - const char *object_dir, - int local) +struct multi_pack_index *load_multi_pack_index(struct odb_source *source) { struct strbuf midx_name = STRBUF_INIT; struct multi_pack_index *m; - get_midx_filename(r->hash_algo, &midx_name, object_dir); + get_midx_filename(source->odb->repo->hash_algo, &midx_name, + source->path); - m = load_multi_pack_index_one(r, object_dir, - midx_name.buf, local); + m = load_multi_pack_index_one(source, midx_name.buf); if (!m) - m = load_multi_pack_index_chain(r, object_dir, local); + m = load_multi_pack_index_chain(source); strbuf_release(&midx_name); @@ -734,8 +727,7 @@ int prepare_multi_pack_index_one(struct odb_source *source) if (source->midx) return 1; - source->midx = load_multi_pack_index(r, source->path, - source->local); + source->midx = load_multi_pack_index(source); return !!source->midx; } @@ -880,12 +872,13 @@ static int compare_pair_pos_vs_id(const void *_a, const void *_b) display_progress(progress, _n); \ } while (0) -int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags) +int verify_midx_file(struct odb_source *source, unsigned flags) { + struct repository *r = source->odb->repo; struct pair_pos_vs_id *pairs = NULL; uint32_t i; struct progress *progress = NULL; - struct multi_pack_index *m = load_multi_pack_index(r, object_dir, 1); + struct multi_pack_index *m = load_multi_pack_index(source); struct multi_pack_index *curr; verify_midx_error = 0; @@ -894,7 +887,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag struct stat sb; struct strbuf filename = STRBUF_INIT; - get_midx_filename(r->hash_algo, &filename, object_dir); + get_midx_filename(r->hash_algo, &filename, source->path); if (!stat(filename.buf, &sb)) { error(_("multi-pack-index file exists, but failed to parse")); diff --git a/midx.h b/midx.h index f7e07083e1f..970d043989a 100644 --- a/midx.h +++ b/midx.h @@ -100,9 +100,7 @@ void get_split_midx_filename_ext(const struct git_hash_algo *hash_algo, struct strbuf *buf, const char *object_dir, const unsigned char *hash, const char *ext); -struct multi_pack_index *load_multi_pack_index(struct repository *r, - const char *object_dir, - int local); +struct multi_pack_index *load_multi_pack_index(struct odb_source *source); int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); struct packed_git *nth_midxed_pack(struct multi_pack_index *m, uint32_t pack_int_id); @@ -136,7 +134,7 @@ int write_midx_file_only(struct repository *r, const char *object_dir, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags); void clear_midx_file(struct repository *r); -int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags); +int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags); int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags); diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index e430aa247c6..bcb8ea76717 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -11,14 +11,24 @@ #include "gettext.h" #include "pack-revindex.h" +static struct multi_pack_index *setup_midx(const char *object_dir) +{ + struct odb_source *source; + setup_git_directory(); + source = odb_find_source(the_repository->objects, object_dir); + if (!source) + source = odb_add_to_alternates_memory(the_repository->objects, + object_dir); + return load_multi_pack_index(source); +} + static int read_midx_file(const char *object_dir, const char *checksum, int show_objects) { uint32_t i; struct multi_pack_index *m; - setup_git_directory(); - m = load_multi_pack_index(the_repository, object_dir, 1); + m = setup_midx(object_dir); if (!m) return 1; @@ -81,8 +91,7 @@ static int read_midx_checksum(const char *object_dir) { struct multi_pack_index *m; - setup_git_directory(); - m = load_multi_pack_index(the_repository, object_dir, 1); + m = setup_midx(object_dir); if (!m) return 1; printf("%s\n", hash_to_hex(get_midx_checksum(m))); @@ -96,9 +105,7 @@ static int read_midx_preferred_pack(const char *object_dir) struct multi_pack_index *midx = NULL; uint32_t preferred_pack; - setup_git_directory(); - - midx = load_multi_pack_index(the_repository, object_dir, 1); + midx = setup_midx(object_dir); if (!midx) return 1; @@ -119,9 +126,7 @@ static int read_midx_bitmapped_packs(const char *object_dir) struct bitmapped_pack pack; uint32_t i; - setup_git_directory(); - - midx = load_multi_pack_index(the_repository, object_dir, 1); + midx = setup_midx(object_dir); if (!midx) return 1; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index bd75dea9501..4e5e882989f 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -28,11 +28,11 @@ midx_read_expect () { EOF if test $NUM_PACKS -ge 1 then - ls $OBJECT_DIR/pack/ | grep idx | sort + ls "$OBJECT_DIR"/pack/ | grep idx | sort fi && printf "object-dir: $OBJECT_DIR\n" } >expect && - test-tool read-midx $OBJECT_DIR >actual && + test-tool read-midx "$OBJECT_DIR" >actual && test_cmp expect actual } @@ -305,7 +305,7 @@ test_expect_success 'midx picks objects from preferred pack' ' ofs=$(git show-index expect && grep ^$b out >actual && @@ -639,7 +639,7 @@ test_expect_success 'force some 64-bit offsets with pack-objects' ' ( cd ../objects64 && pwd ) >.git/objects/info/alternates && midx64=$(git multi-pack-index --object-dir=../objects64 write) ) && - midx_read_expect 1 63 5 objects64 " large-offsets" + midx_read_expect 1 63 5 "$(pwd)/objects64" " large-offsets" ' test_expect_success 'verify multi-pack-index with 64-bit offsets' ' -- GitLab From c3f5d251469525a52074b0373671a588f0e5b972 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:48 +0200 Subject: [PATCH 08/26] midx: write multi-pack indices via their source Similar to the preceding commit, refactor the writing side of multi-pack indices so that we pass in the object database source where the index should be written to. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 19 ++++++----- builtin/repack.c | 2 +- midx-write.c | 67 ++++++++++++++++++-------------------- midx.h | 8 ++--- 4 files changed, 47 insertions(+), 49 deletions(-) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index e4a9305af3a..b1e971e535d 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -147,6 +147,7 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("refs snapshot for selecting bitmap commits")), OPT_END(), }; + struct odb_source *source; int ret; opts.flags |= MIDX_WRITE_BITMAP_HASH_CACHE; @@ -165,7 +166,7 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_write_usage, options); - handle_object_dir_option(repo); + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -174,7 +175,7 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, read_packs_from_stdin(&packs); - ret = write_midx_file_only(repo, opts.object_dir, &packs, + ret = write_midx_file_only(source, &packs, opts.preferred_pack, opts.refs_snapshot, opts.flags); @@ -185,7 +186,7 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, } - ret = write_midx_file(repo, opts.object_dir, opts.preferred_pack, + ret = write_midx_file(source, opts.preferred_pack, opts.refs_snapshot, opts.flags); free(opts.refs_snapshot); @@ -233,6 +234,8 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv, N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; + struct odb_source *source; + options = add_common_options(builtin_multi_pack_index_expire_options); trace2_cmd_mode(argv[0]); @@ -245,11 +248,11 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_expire_usage, options); - handle_object_dir_option(the_repository); + source = handle_object_dir_option(the_repository); FREE_AND_NULL(options); - return expire_midx_packs(the_repository, opts.object_dir, opts.flags); + return expire_midx_packs(source, opts.flags); } static int cmd_multi_pack_index_repack(int argc, const char **argv, @@ -264,6 +267,7 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv, N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; + struct odb_source *source; options = add_common_options(builtin_multi_pack_index_repack_options); @@ -278,12 +282,11 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_repack_usage, options); - handle_object_dir_option(the_repository); + source = handle_object_dir_option(the_repository); FREE_AND_NULL(options); - return midx_repack(the_repository, opts.object_dir, - (size_t)opts.batch_size, opts.flags); + return midx_repack(source, (size_t)opts.batch_size, opts.flags); } int cmd_multi_pack_index(int argc, diff --git a/builtin/repack.c b/builtin/repack.c index 21723866b9c..94dec26f185 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1711,7 +1711,7 @@ int cmd_repack(int argc, unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository, repo_get_object_directory(the_repository), + write_midx_file(the_repository->objects->sources, NULL, NULL, flags); } diff --git a/midx-write.c b/midx-write.c index b858be475fc..bf7c01d4b1d 100644 --- a/midx-write.c +++ b/midx-write.c @@ -913,13 +913,6 @@ static int write_midx_bitmap(struct write_midx_context *ctx, return ret; } -static struct multi_pack_index *lookup_multi_pack_index(struct repository *r, - const char *object_dir) -{ - struct odb_source *source = odb_find_source_or_die(r->objects, object_dir); - return get_multi_pack_index(source); -} - static int fill_packs_from_midx(struct write_midx_context *ctx, const char *preferred_pack_name, uint32_t flags) { @@ -1010,7 +1003,7 @@ static int link_midx_to_chain(struct multi_pack_index *m) return ret; } -static void clear_midx_files(struct repository *r, const char *object_dir, +static void clear_midx_files(struct odb_source *source, const char **hashes, uint32_t hashes_nr, unsigned incremental) { @@ -1029,16 +1022,16 @@ static void clear_midx_files(struct repository *r, const char *object_dir, uint32_t i, j; for (i = 0; i < ARRAY_SIZE(exts); i++) { - clear_incremental_midx_files_ext(object_dir, exts[i], + clear_incremental_midx_files_ext(source->path, exts[i], hashes, hashes_nr); for (j = 0; j < hashes_nr; j++) - clear_midx_files_ext(object_dir, exts[i], hashes[j]); + clear_midx_files_ext(source->path, exts[i], hashes[j]); } if (incremental) - get_midx_filename(r->hash_algo, &buf, object_dir); + get_midx_filename(source->odb->repo->hash_algo, &buf, source->path); else - get_midx_chain_filename(&buf, object_dir); + get_midx_chain_filename(&buf, source->path); if (unlink(buf.buf) && errno != ENOENT) die_errno(_("failed to clear multi-pack-index at %s"), buf.buf); @@ -1046,13 +1039,14 @@ static void clear_midx_files(struct repository *r, const char *object_dir, strbuf_release(&buf); } -static int write_midx_internal(struct repository *r, const char *object_dir, +static int write_midx_internal(struct odb_source *source, struct string_list *packs_to_include, struct string_list *packs_to_drop, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { + struct repository *r = source->odb->repo; struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t i, start_pack; @@ -1076,15 +1070,15 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (ctx.incremental) strbuf_addf(&midx_name, "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", - object_dir); + source->path); else - get_midx_filename(r->hash_algo, &midx_name, object_dir); + get_midx_filename(r->hash_algo, &midx_name, source->path); if (safe_create_leading_directories(r, midx_name.buf)) die_errno(_("unable to create leading directories of %s"), midx_name.buf); if (!packs_to_include || ctx.incremental) { - struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir); + struct multi_pack_index *m = get_multi_pack_index(source); if (m && !midx_checksum_valid(m)) { warning(_("ignoring existing multi-pack-index; checksum mismatch")); m = NULL; @@ -1138,7 +1132,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, ctx.to_include = packs_to_include; - for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx); + for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); stop_progress(&ctx.progress); if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) && @@ -1158,7 +1152,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * corresponding bitmap (or one wasn't requested). */ if (!want_bitmap) - clear_midx_files_ext(object_dir, "bitmap", NULL); + clear_midx_files_ext(source->path, "bitmap", NULL); goto cleanup; } } @@ -1326,7 +1320,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (ctx.incremental) { struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(&lock_name, object_dir); + get_midx_chain_filename(&lock_name, source->path); hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); strbuf_release(&lock_name); @@ -1389,7 +1383,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (flags & MIDX_WRITE_REV_INDEX && git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) - write_midx_reverse_index(&ctx, object_dir, midx_hash); + write_midx_reverse_index(&ctx, source->path, midx_hash); if (flags & MIDX_WRITE_BITMAP) { struct packing_data pdata; @@ -1412,7 +1406,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, FREE_AND_NULL(ctx.entries); ctx.entries_nr = 0; - if (write_midx_bitmap(&ctx, object_dir, + if (write_midx_bitmap(&ctx, source->path, midx_hash, &pdata, commits, commits_nr, flags) < 0) { error(_("could not write multi-pack bitmap")); @@ -1446,7 +1440,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, return -1; get_split_midx_filename_ext(r->hash_algo, &final_midx_name, - object_dir, midx_hash, MIDX_EXT_MIDX); + source->path, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { error_errno(_("unable to rename new multi-pack-index layer")); @@ -1479,7 +1473,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(r, object_dir, keep_hashes, + clear_midx_files(source, keep_hashes, ctx.num_multi_pack_indexes_before + 1, ctx.incremental); @@ -1508,29 +1502,29 @@ static int write_midx_internal(struct repository *r, const char *object_dir, return result; } -int write_midx_file(struct repository *r, const char *object_dir, +int write_midx_file(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(r, object_dir, NULL, NULL, + return write_midx_internal(source, NULL, NULL, preferred_pack_name, refs_snapshot, flags); } -int write_midx_file_only(struct repository *r, const char *object_dir, +int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(r, object_dir, packs_to_include, NULL, + return write_midx_internal(source, packs_to_include, NULL, preferred_pack_name, refs_snapshot, flags); } -int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags) +int expire_midx_packs(struct odb_source *source, unsigned flags) { uint32_t i, *count, result = 0; struct string_list packs_to_drop = STRING_LIST_INIT_DUP; - struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir); + struct multi_pack_index *m = get_multi_pack_index(source); struct progress *progress = NULL; if (!m) @@ -1543,7 +1537,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla if (flags & MIDX_PROGRESS) progress = start_delayed_progress( - r, + source->odb->repo, _("Counting referenced objects"), m->num_objects); for (i = 0; i < m->num_objects; i++) { @@ -1555,7 +1549,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla if (flags & MIDX_PROGRESS) progress = start_delayed_progress( - r, + source->odb->repo, _("Finding and deleting unreferenced packfiles"), m->num_packs); for (i = 0; i < m->num_packs; i++) { @@ -1583,7 +1577,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla free(count); if (packs_to_drop.nr) - result = write_midx_internal(r, object_dir, NULL, + result = write_midx_internal(source, NULL, &packs_to_drop, NULL, NULL, flags); string_list_clear(&packs_to_drop, 0); @@ -1708,14 +1702,15 @@ static void fill_included_packs_batch(struct repository *r, free(pack_info); } -int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags) +int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) { + struct repository *r = source->odb->repo; int result = 0; uint32_t i, packs_to_repack = 0; unsigned char *include_pack; struct child_process cmd = CHILD_PROCESS_INIT; FILE *cmd_in; - struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir); + struct multi_pack_index *m = get_multi_pack_index(source); /* * When updating the default for these configuration @@ -1749,7 +1744,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, strvec_push(&cmd.args, "pack-objects"); - strvec_pushf(&cmd.args, "%s/pack/pack", object_dir); + strvec_pushf(&cmd.args, "%s/pack/pack", source->path); if (delta_base_offset) strvec_push(&cmd.args, "--delta-base-offset"); @@ -1790,7 +1785,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, goto cleanup; } - result = write_midx_internal(r, object_dir, NULL, NULL, NULL, NULL, + result = write_midx_internal(source, NULL, NULL, NULL, NULL, flags); cleanup: diff --git a/midx.h b/midx.h index 970d043989a..d162001fbbe 100644 --- a/midx.h +++ b/midx.h @@ -126,17 +126,17 @@ int prepare_multi_pack_index_one(struct odb_source *source); * Variant of write_midx_file which writes a MIDX containing only the packs * specified in packs_to_include. */ -int write_midx_file(struct repository *r, const char *object_dir, +int write_midx_file(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags); -int write_midx_file_only(struct repository *r, const char *object_dir, +int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); -int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags); -int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags); +int expire_midx_packs(struct odb_source *source, unsigned flags); +int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags); void close_midx(struct multi_pack_index *m); -- GitLab From 7744936f374308d6fa3c6e317fb8fe0b685d0ef2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:49 +0200 Subject: [PATCH 09/26] midx: stop duplicating info redundant with its owning source Multi-pack indices store some information that is redundant with their owning source: - The locality bit that tracks whether the source is the primary object source or an alternate. - The object directory path the multi-pack index is located in. - The pointer to the owning parent directory. All of this information is already contained in `struct odb_source`. So now that we always have that struct available when loading a multi-pack index we have it readily accessible. Drop the redundant information and instead store a pointer to the object source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- midx-write.c | 9 +++++---- midx.c | 21 +++++++++++---------- midx.h | 7 ++----- pack-bitmap.c | 13 +++++++------ pack-revindex.c | 14 +++++++------- t/helper/test-read-midx.c | 2 +- 7 files changed, 36 insertions(+), 35 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 94dec26f185..5af3e27357c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -223,9 +223,10 @@ static void mark_packs_for_deletion(struct existing_packs *existing, static void remove_redundant_pack(const char *dir_name, const char *base_name) { struct strbuf buf = STRBUF_INIT; - struct multi_pack_index *m = get_multi_pack_index(the_repository->objects->sources); + struct odb_source *source = the_repository->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); - if (m && m->local && midx_contains_pack(m, buf.buf)) + if (m && source->local && midx_contains_pack(m, buf.buf)) clear_midx_file(the_repository); strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); diff --git a/midx-write.c b/midx-write.c index bf7c01d4b1d..84f76856d67 100644 --- a/midx-write.c +++ b/midx-write.c @@ -981,10 +981,11 @@ static int link_midx_to_chain(struct multi_pack_index *m) for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { const unsigned char *hash = get_midx_checksum(m); - get_midx_filename_ext(m->repo->hash_algo, &from, m->object_dir, + get_midx_filename_ext(m->source->odb->repo->hash_algo, &from, + m->source->path, hash, midx_exts[i].non_split); - get_split_midx_filename_ext(m->repo->hash_algo, &to, - m->object_dir, hash, + get_split_midx_filename_ext(m->source->odb->repo->hash_algo, &to, + m->source->path, hash, midx_exts[i].split); if (link(from.buf, to.buf) < 0 && errno != ENOENT) { @@ -1109,7 +1110,7 @@ static int write_midx_internal(struct odb_source *source, if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), hash_to_hex_algop(get_midx_checksum(m), - m->repo->hash_algo)); + m->source->odb->repo->hash_algo)); result = 1; goto cleanup; } diff --git a/midx.c b/midx.c index 831a7e9b5f2..81bf3c4d5f3 100644 --- a/midx.c +++ b/midx.c @@ -26,7 +26,7 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name, const unsigned char *get_midx_checksum(struct multi_pack_index *m) { - return m->data + m->data_len - m->repo->hash_algo->rawsz; + return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } void get_midx_filename(const struct git_hash_algo *hash_algo, @@ -128,11 +128,10 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - FLEX_ALLOC_STR(m, object_dir, source->path); + CALLOC_ARRAY(m, 1); m->data = midx_map; m->data_len = midx_size; - m->local = source->local; - m->repo = r; + m->source = source; m->signature = get_be32(m->data); if (m->signature != MIDX_SIGNATURE) @@ -446,7 +445,7 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct repository *r = m->repo; + struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; struct strbuf key = STRBUF_INIT; struct packed_git *p; @@ -458,7 +457,7 @@ int prepare_midx_pack(struct multi_pack_index *m, if (m->packs[pack_int_id]) return 0; - strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir, + strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); /* pack_map holds the ".pack" name, but we have the .idx */ @@ -469,7 +468,8 @@ int prepare_midx_pack(struct multi_pack_index *m, strhash(key.buf), key.buf, struct packed_git, packmap_ent); if (!p) { - p = add_packed_git(r, pack_name.buf, pack_name.len, m->local); + p = add_packed_git(r, pack_name.buf, pack_name.len, + m->source->local); if (p) { install_packed_git(r, p); list_add_tail(&p->mru, &r->objects->packed_git_mru); @@ -528,7 +528,8 @@ int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) { int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout, - m->chunk_oid_lookup, m->repo->hash_algo->rawsz, + m->chunk_oid_lookup, + m->source->odb->repo->hash_algo->rawsz, result); if (result) *result += m->num_objects_in_base; @@ -559,7 +560,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, n = midx_for_object(&m, n); oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n), - m->repo->hash_algo); + m->source->odb->repo->hash_algo); return oid; } @@ -734,7 +735,7 @@ int prepare_multi_pack_index_one(struct odb_source *source) int midx_checksum_valid(struct multi_pack_index *m) { - return hashfile_checksum_valid(m->repo->hash_algo, + return hashfile_checksum_valid(m->source->odb->repo->hash_algo, m->data, m->data_len); } diff --git a/midx.h b/midx.h index d162001fbbe..71dbdec66ef 100644 --- a/midx.h +++ b/midx.h @@ -35,6 +35,8 @@ struct odb_source; "GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL" struct multi_pack_index { + struct odb_source *source; + const unsigned char *data; size_t data_len; @@ -50,7 +52,6 @@ struct multi_pack_index { uint32_t num_objects; int preferred_pack_idx; - int local; int has_chain; const unsigned char *chunk_pack_names; @@ -71,10 +72,6 @@ struct multi_pack_index { const char **pack_names; struct packed_git **packs; - - struct repository *repo; - - char object_dir[FLEX_ARRAY]; }; #define MIDX_PROGRESS (1 << 0) diff --git a/pack-bitmap.c b/pack-bitmap.c index fb0b11ca073..01e14c34bd0 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -216,7 +216,7 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index) static struct repository *bitmap_repo(struct bitmap_index *bitmap_git) { if (bitmap_is_midx(bitmap_git)) - return bitmap_git->midx->repo; + return bitmap_git->midx->source->odb->repo; return bitmap_git->pack->repo; } @@ -418,13 +418,13 @@ char *midx_bitmap_filename(struct multi_pack_index *midx) { struct strbuf buf = STRBUF_INIT; if (midx->has_chain) - get_split_midx_filename_ext(midx->repo->hash_algo, &buf, - midx->object_dir, + get_split_midx_filename_ext(midx->source->odb->repo->hash_algo, &buf, + midx->source->path, get_midx_checksum(midx), MIDX_EXT_BITMAP); else - get_midx_filename_ext(midx->repo->hash_algo, &buf, - midx->object_dir, get_midx_checksum(midx), + get_midx_filename_ext(midx->source->odb->repo->hash_algo, &buf, + midx->source->path, get_midx_checksum(midx), MIDX_EXT_BITMAP); return strbuf_detach(&buf, NULL); @@ -463,7 +463,8 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (bitmap_git->pack || bitmap_git->midx) { struct strbuf buf = STRBUF_INIT; - get_midx_filename(midx->repo->hash_algo, &buf, midx->object_dir); + get_midx_filename(midx->source->odb->repo->hash_algo, &buf, + midx->source->path); trace2_data_string("bitmap", bitmap_repo(bitmap_git), "ignoring extra midx bitmap file", buf.buf); close(fd); diff --git a/pack-revindex.c b/pack-revindex.c index 0cc422a1e67..b206518dcb5 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -379,25 +379,25 @@ int load_midx_revindex(struct multi_pack_index *m) * not want to accidentally call munmap() in the middle of the * MIDX. */ - trace2_data_string("load_midx_revindex", m->repo, + trace2_data_string("load_midx_revindex", m->source->odb->repo, "source", "midx"); m->revindex_data = (const uint32_t *)m->chunk_revindex; return 0; } - trace2_data_string("load_midx_revindex", m->repo, + trace2_data_string("load_midx_revindex", m->source->odb->repo, "source", "rev"); if (m->has_chain) - get_split_midx_filename_ext(m->repo->hash_algo, &revindex_name, - m->object_dir, get_midx_checksum(m), + get_split_midx_filename_ext(m->source->odb->repo->hash_algo, &revindex_name, + m->source->path, get_midx_checksum(m), MIDX_EXT_REV); else - get_midx_filename_ext(m->repo->hash_algo, &revindex_name, - m->object_dir, get_midx_checksum(m), + get_midx_filename_ext(m->source->odb->repo->hash_algo, &revindex_name, + m->source->path, get_midx_checksum(m), MIDX_EXT_REV); - ret = load_revindex_from_disk(m->repo->hash_algo, + ret = load_revindex_from_disk(m->source->odb->repo->hash_algo, revindex_name.buf, m->num_objects, &m->revindex_map, diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index bcb8ea76717..6de5d1665af 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -66,7 +66,7 @@ static int read_midx_file(const char *object_dir, const char *checksum, for (i = 0; i < m->num_packs; i++) printf("%s\n", m->pack_names[i]); - printf("object-dir: %s\n", m->object_dir); + printf("object-dir: %s\n", m->source->path); if (show_objects) { struct object_id oid; -- GitLab From 13296ac909d53e14712f89a7f4fda94dd0465479 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Aug 2025 15:46:50 +0200 Subject: [PATCH 10/26] midx: compute paths via their source With the preceding commits we started to always have the object database source available when we load, write or access multi-pack indices. With this in place we can change how MIDX paths are computed so that we don't have to pass in the combination of a hash algorithm and object directory anymore, but only the object database source. Refactor the code accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx-write.c | 52 ++++++++++++++++++++++------------------------- midx.c | 54 +++++++++++++++++++++++-------------------------- midx.h | 13 +++++------- pack-bitmap.c | 10 ++++----- pack-revindex.c | 8 ++++---- 5 files changed, 62 insertions(+), 75 deletions(-) diff --git a/midx-write.c b/midx-write.c index 84f76856d67..1dcdf3dc0f1 100644 --- a/midx-write.c +++ b/midx-write.c @@ -26,9 +26,9 @@ #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) extern int midx_checksum_valid(struct multi_pack_index *m); -extern void clear_midx_files_ext(const char *object_dir, const char *ext, +extern void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); -extern void clear_incremental_midx_files_ext(const char *object_dir, +extern void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, const char **keep_hashes, uint32_t hashes_nr); @@ -112,6 +112,7 @@ struct write_midx_context { struct string_list *to_include; struct repository *repo; + struct odb_source *source; }; static int should_include_pack(const struct write_midx_context *ctx, @@ -648,7 +649,6 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) } static void write_midx_reverse_index(struct write_midx_context *ctx, - const char *object_dir, unsigned char *midx_hash) { struct strbuf buf = STRBUF_INIT; @@ -657,11 +657,10 @@ static void write_midx_reverse_index(struct write_midx_context *ctx, trace2_region_enter("midx", "write_midx_reverse_index", ctx->repo); if (ctx->incremental) - get_split_midx_filename_ext(ctx->repo->hash_algo, &buf, - object_dir, midx_hash, - MIDX_EXT_REV); + get_split_midx_filename_ext(ctx->source, &buf, + midx_hash, MIDX_EXT_REV); else - get_midx_filename_ext(ctx->repo->hash_algo, &buf, object_dir, + get_midx_filename_ext(ctx->source, &buf, midx_hash, MIDX_EXT_REV); tmp_file = write_rev_file_order(ctx->repo, NULL, ctx->pack_order, @@ -836,7 +835,6 @@ static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr } static int write_midx_bitmap(struct write_midx_context *ctx, - const char *object_dir, const unsigned char *midx_hash, struct packing_data *pdata, struct commit **commits, @@ -852,12 +850,11 @@ static int write_midx_bitmap(struct write_midx_context *ctx, trace2_region_enter("midx", "write_midx_bitmap", ctx->repo); if (ctx->incremental) - get_split_midx_filename_ext(ctx->repo->hash_algo, &bitmap_name, - object_dir, midx_hash, - MIDX_EXT_BITMAP); + get_split_midx_filename_ext(ctx->source, &bitmap_name, + midx_hash, MIDX_EXT_BITMAP); else - get_midx_filename_ext(ctx->repo->hash_algo, &bitmap_name, - object_dir, midx_hash, MIDX_EXT_BITMAP); + get_midx_filename_ext(ctx->source, &bitmap_name, + midx_hash, MIDX_EXT_BITMAP); if (flags & MIDX_WRITE_BITMAP_HASH_CACHE) options |= BITMAP_OPT_HASH_CACHE; @@ -981,11 +978,9 @@ static int link_midx_to_chain(struct multi_pack_index *m) for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { const unsigned char *hash = get_midx_checksum(m); - get_midx_filename_ext(m->source->odb->repo->hash_algo, &from, - m->source->path, + get_midx_filename_ext(m->source, &from, hash, midx_exts[i].non_split); - get_split_midx_filename_ext(m->source->odb->repo->hash_algo, &to, - m->source->path, hash, + get_split_midx_filename_ext(m->source, &to, hash, midx_exts[i].split); if (link(from.buf, to.buf) < 0 && errno != ENOENT) { @@ -1023,16 +1018,16 @@ static void clear_midx_files(struct odb_source *source, uint32_t i, j; for (i = 0; i < ARRAY_SIZE(exts); i++) { - clear_incremental_midx_files_ext(source->path, exts[i], + clear_incremental_midx_files_ext(source, exts[i], hashes, hashes_nr); for (j = 0; j < hashes_nr; j++) - clear_midx_files_ext(source->path, exts[i], hashes[j]); + clear_midx_files_ext(source, exts[i], hashes[j]); } if (incremental) - get_midx_filename(source->odb->repo->hash_algo, &buf, source->path); + get_midx_filename(source, &buf); else - get_midx_chain_filename(&buf, source->path); + get_midx_chain_filename(source, &buf); if (unlink(buf.buf) && errno != ENOENT) die_errno(_("failed to clear multi-pack-index at %s"), buf.buf); @@ -1065,6 +1060,7 @@ static int write_midx_internal(struct odb_source *source, trace2_region_enter("midx", "write_midx_internal", r); ctx.repo = r; + ctx.source = source; ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); @@ -1073,7 +1069,7 @@ static int write_midx_internal(struct odb_source *source, "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", source->path); else - get_midx_filename(r->hash_algo, &midx_name, source->path); + get_midx_filename(source, &midx_name); if (safe_create_leading_directories(r, midx_name.buf)) die_errno(_("unable to create leading directories of %s"), midx_name.buf); @@ -1153,7 +1149,7 @@ static int write_midx_internal(struct odb_source *source, * corresponding bitmap (or one wasn't requested). */ if (!want_bitmap) - clear_midx_files_ext(source->path, "bitmap", NULL); + clear_midx_files_ext(source, "bitmap", NULL); goto cleanup; } } @@ -1321,7 +1317,7 @@ static int write_midx_internal(struct odb_source *source, if (ctx.incremental) { struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(&lock_name, source->path); + get_midx_chain_filename(source, &lock_name); hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); strbuf_release(&lock_name); @@ -1384,7 +1380,7 @@ static int write_midx_internal(struct odb_source *source, if (flags & MIDX_WRITE_REV_INDEX && git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) - write_midx_reverse_index(&ctx, source->path, midx_hash); + write_midx_reverse_index(&ctx, midx_hash); if (flags & MIDX_WRITE_BITMAP) { struct packing_data pdata; @@ -1407,7 +1403,7 @@ static int write_midx_internal(struct odb_source *source, FREE_AND_NULL(ctx.entries); ctx.entries_nr = 0; - if (write_midx_bitmap(&ctx, source->path, + if (write_midx_bitmap(&ctx, midx_hash, &pdata, commits, commits_nr, flags) < 0) { error(_("could not write multi-pack bitmap")); @@ -1440,8 +1436,8 @@ static int write_midx_internal(struct odb_source *source, if (link_midx_to_chain(ctx.base_midx) < 0) return -1; - get_split_midx_filename_ext(r->hash_algo, &final_midx_name, - source->path, midx_hash, MIDX_EXT_MIDX); + get_split_midx_filename_ext(source, &final_midx_name, + midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { error_errno(_("unable to rename new multi-pack-index layer")); diff --git a/midx.c b/midx.c index 81bf3c4d5f3..7726c13d7e7 100644 --- a/midx.c +++ b/midx.c @@ -16,9 +16,9 @@ #define MIDX_PACK_ERROR ((void *)(intptr_t)-1) int midx_checksum_valid(struct multi_pack_index *m); -void clear_midx_files_ext(const char *object_dir, const char *ext, +void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); -void clear_incremental_midx_files_ext(const char *object_dir, const char *ext, +void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, char **keep_hashes, uint32_t hashes_nr); int cmp_idx_or_pack_name(const char *idx_or_pack_name, @@ -29,19 +29,17 @@ const unsigned char *get_midx_checksum(struct multi_pack_index *m) return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } -void get_midx_filename(const struct git_hash_algo *hash_algo, - struct strbuf *out, const char *object_dir) +void get_midx_filename(struct odb_source *source, struct strbuf *out) { - get_midx_filename_ext(hash_algo, out, object_dir, NULL, NULL); + get_midx_filename_ext(source, out, NULL, NULL); } -void get_midx_filename_ext(const struct git_hash_algo *hash_algo, - struct strbuf *out, const char *object_dir, +void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext) { - strbuf_addf(out, "%s/pack/multi-pack-index", object_dir); + strbuf_addf(out, "%s/pack/multi-pack-index", source->path); if (ext) - strbuf_addf(out, "-%s.%s", hash_to_hex_algop(hash, hash_algo), ext); + strbuf_addf(out, "-%s.%s", hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext); } static int midx_read_oid_fanout(const unsigned char *chunk_start, @@ -222,24 +220,23 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou return NULL; } -void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir) +void get_midx_chain_dirname(struct odb_source *source, struct strbuf *buf) { - strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir); + strbuf_addf(buf, "%s/pack/multi-pack-index.d", source->path); } -void get_midx_chain_filename(struct strbuf *buf, const char *object_dir) +void get_midx_chain_filename(struct odb_source *source, struct strbuf *buf) { - get_midx_chain_dirname(buf, object_dir); + get_midx_chain_dirname(source, buf); strbuf_addstr(buf, "/multi-pack-index-chain"); } -void get_split_midx_filename_ext(const struct git_hash_algo *hash_algo, - struct strbuf *buf, const char *object_dir, +void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, const unsigned char *hash, const char *ext) { - get_midx_chain_dirname(buf, object_dir); + get_midx_chain_dirname(source, buf); strbuf_addf(buf, "/multi-pack-index-%s.%s", - hash_to_hex_algop(hash, hash_algo), ext); + hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext); } static int open_multi_pack_index_chain(const struct git_hash_algo *hash_algo, @@ -326,7 +323,7 @@ static struct multi_pack_index *load_midx_chain_fd_st(struct odb_source *source, valid = 0; strbuf_reset(&buf); - get_split_midx_filename_ext(hash_algo, &buf, source->path, + get_split_midx_filename_ext(source, &buf, layer.hash, MIDX_EXT_MIDX); m = load_multi_pack_index_one(source, buf.buf); @@ -358,7 +355,7 @@ static struct multi_pack_index *load_multi_pack_index_chain(struct odb_source *s int fd; struct multi_pack_index *m = NULL; - get_midx_chain_filename(&chain_file, source->path); + get_midx_chain_filename(source, &chain_file); if (open_multi_pack_index_chain(source->odb->repo->hash_algo, chain_file.buf, &fd, &st)) { int incomplete; /* ownership of fd is taken over by load function */ @@ -374,8 +371,7 @@ struct multi_pack_index *load_multi_pack_index(struct odb_source *source) struct strbuf midx_name = STRBUF_INIT; struct multi_pack_index *m; - get_midx_filename(source->odb->repo->hash_algo, &midx_name, - source->path); + get_midx_filename(source, &midx_name); m = load_multi_pack_index_one(source, midx_name.buf); if (!m) @@ -762,7 +758,7 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS die_errno(_("failed to remove %s"), full_path); } -void clear_midx_files_ext(const char *object_dir, const char *ext, +void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash) { struct clear_midx_data data; @@ -776,7 +772,7 @@ void clear_midx_files_ext(const char *object_dir, const char *ext, } data.ext = ext; - for_each_file_in_pack_dir(object_dir, + for_each_file_in_pack_dir(source->path, clear_midx_file_ext, &data); @@ -785,7 +781,7 @@ void clear_midx_files_ext(const char *object_dir, const char *ext, free(data.keep); } -void clear_incremental_midx_files_ext(const char *object_dir, const char *ext, +void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, char **keep_hashes, uint32_t hashes_nr) { @@ -801,7 +797,7 @@ void clear_incremental_midx_files_ext(const char *object_dir, const char *ext, data.keep_nr = hashes_nr; data.ext = ext; - for_each_file_in_pack_subdir(object_dir, "multi-pack-index.d", + for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", clear_midx_file_ext, &data); for (i = 0; i < hashes_nr; i++) @@ -813,7 +809,7 @@ void clear_midx_file(struct repository *r) { struct strbuf midx = STRBUF_INIT; - get_midx_filename(r->hash_algo, &midx, r->objects->sources->path); + get_midx_filename(r->objects->sources, &midx); if (r->objects) { struct odb_source *source; @@ -828,8 +824,8 @@ void clear_midx_file(struct repository *r) if (remove_path(midx.buf)) die(_("failed to clear multi-pack-index at %s"), midx.buf); - clear_midx_files_ext(r->objects->sources->path, MIDX_EXT_BITMAP, NULL); - clear_midx_files_ext(r->objects->sources->path, MIDX_EXT_REV, NULL); + clear_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, NULL); + clear_midx_files_ext(r->objects->sources, MIDX_EXT_REV, NULL); strbuf_release(&midx); } @@ -888,7 +884,7 @@ int verify_midx_file(struct odb_source *source, unsigned flags) struct stat sb; struct strbuf filename = STRBUF_INIT; - get_midx_filename(r->hash_algo, &filename, source->path); + get_midx_filename(source, &filename); if (!stat(filename.buf, &sb)) { error(_("multi-pack-index file exists, but failed to parse")); diff --git a/midx.h b/midx.h index 71dbdec66ef..e241d2d6900 100644 --- a/midx.h +++ b/midx.h @@ -86,15 +86,12 @@ struct multi_pack_index { #define MIDX_EXT_MIDX "midx" const unsigned char *get_midx_checksum(struct multi_pack_index *m); -void get_midx_filename(const struct git_hash_algo *hash_algo, - struct strbuf *out, const char *object_dir); -void get_midx_filename_ext(const struct git_hash_algo *hash_algo, - struct strbuf *out, const char *object_dir, +void get_midx_filename(struct odb_source *source, struct strbuf *out); +void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext); -void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir); -void get_midx_chain_filename(struct strbuf *buf, const char *object_dir); -void get_split_midx_filename_ext(const struct git_hash_algo *hash_algo, - struct strbuf *buf, const char *object_dir, +void get_midx_chain_dirname(struct odb_source *source, struct strbuf *out); +void get_midx_chain_filename(struct odb_source *source, struct strbuf *out); +void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, const unsigned char *hash, const char *ext); struct multi_pack_index *load_multi_pack_index(struct odb_source *source); diff --git a/pack-bitmap.c b/pack-bitmap.c index 01e14c34bd0..058bdb5d7de 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -418,13 +418,12 @@ char *midx_bitmap_filename(struct multi_pack_index *midx) { struct strbuf buf = STRBUF_INIT; if (midx->has_chain) - get_split_midx_filename_ext(midx->source->odb->repo->hash_algo, &buf, - midx->source->path, + get_split_midx_filename_ext(midx->source, &buf, get_midx_checksum(midx), MIDX_EXT_BITMAP); else - get_midx_filename_ext(midx->source->odb->repo->hash_algo, &buf, - midx->source->path, get_midx_checksum(midx), + get_midx_filename_ext(midx->source, &buf, + get_midx_checksum(midx), MIDX_EXT_BITMAP); return strbuf_detach(&buf, NULL); @@ -463,8 +462,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (bitmap_git->pack || bitmap_git->midx) { struct strbuf buf = STRBUF_INIT; - get_midx_filename(midx->source->odb->repo->hash_algo, &buf, - midx->source->path); + get_midx_filename(midx->source, &buf); trace2_data_string("bitmap", bitmap_repo(bitmap_git), "ignoring extra midx bitmap file", buf.buf); close(fd); diff --git a/pack-revindex.c b/pack-revindex.c index b206518dcb5..d0791cc4938 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -389,12 +389,12 @@ int load_midx_revindex(struct multi_pack_index *m) "source", "rev"); if (m->has_chain) - get_split_midx_filename_ext(m->source->odb->repo->hash_algo, &revindex_name, - m->source->path, get_midx_checksum(m), + get_split_midx_filename_ext(m->source, &revindex_name, + get_midx_checksum(m), MIDX_EXT_REV); else - get_midx_filename_ext(m->source->odb->repo->hash_algo, &revindex_name, - m->source->path, get_midx_checksum(m), + get_midx_filename_ext(m->source, &revindex_name, + get_midx_checksum(m), MIDX_EXT_REV); ret = load_revindex_from_disk(m->source->odb->repo->hash_algo, -- GitLab From d1e26fec756b498f1b1140a53abd69a89f1414b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 14:54:20 +0200 Subject: [PATCH 11/26] packfile: carve out a new packfile store Hi, information about a object database's packfiles is currently distributed across two different structures: - `struct packed_git` contains the `next` pointer as well as the `mru_head`, both of which serve to store the list of packfiles. - `struct object_database` contains several fields that relate to the packfiles. So we don't really have a central data structure that tracks our packfiles, and consequently responsibilities aren't always clear cut. A consequence for the upcoming pluggable object databases is that this makes it very hard to move management of packfiles from the object database level down into the object database source. This patch series introduces a new `struct packfile_store`, which is about to become the single source of truth for managing packfiles, and carves out the packfile store subsystem. This is the first step to make packfiles work with pluggable object databases. Next steps will be to: - Move the `struct packed_git::next` and `struct packed::mru_head` pointers into the packfile store so that `struct packed_git` only tracks a single packfile. - Push the `struct packfile_store` down one level so that it's not hosted by the object database anymore, but instead by the object database source. Changes in v2: - Convert the `initialized` flag into a boolean. - Polish some commit messages. - Some smaller formatting changes to the layout of `struct object_database`. - Link to v1: https://lore.kernel.org/r/20250819-b4-pks-packfiles-store-v1-0-1660842e125a@pks.im Changes in v3: - Rebased on top of master at 6ad8021821 (The fifth batch, 2025-08-29) with ps/object-store-midx-dedup-info at 13296ac909 (midx: compute paths via their source, 2025-08-11) merged into it. This fixes various conflicts with "seen". There's still two conflicts: a trivial one with jt/de-global-bulk-checkin. And a more complex one with tb/prepare-midx-pack-cleanup. I don't think it's necessary to really address the first one, but I'm unsure how to proceed with the second one given that the patch series still seems to be cooking. - Set `struct object_database::packfiles` to `NULL` after free'ing it. - Add a comment to explain the kept cache. - Fix a missing `obj_read_lock()` call. - Drop the commit that always adds packfiles to the MRU. I've moved this into a subsequent patch series. - Avoid some overly long lines by storing the pointer to the packfile store on the stack. - Point out the difference between `get_all_packs()` and `get_packed_git()`. - Link to v2: https://lore.kernel.org/r/20250821-b4-pks-packfiles-store-v2-0-d10623355e9f@pks.im Changes in v4: - Small code style improvement as suggested by Junio. - Some commit message improvements as suggested by Karthik. - Link to v3: https://lore.kernel.org/r/20250902-b4-pks-packfiles-store-v3-0-6925278efeda@pks.im Changes in v5: - Stop closing packfiles when freeing the packfile store. - Don't drop `get_packed_git()` for now. This will be done in a subsequent patch series. - Link to v4: https://lore.kernel.org/r/20250909-b4-pks-packfiles-store-v4-0-151c4ba3619f@pks.im Changes in v6: - Fix a grammar issue in a commit message. - Update a comment to reflect that we access `struct packfile_store` in "midx.c" directly, as well. - Link to v5: https://lore.kernel.org/r/20250915-b4-pks-packfiles-store-v5-0-d6340350934f@pks.im Thanks! Patrick To: git@vger.kernel.org Cc: Karthik Nayak Cc: Jeff King Cc: Taylor Blau Cc: Junio C Hamano Cc: Justin Tobler --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 6, "change-id": "20250806-b4-pks-packfiles-store-a44a608ca396", "prefixes": [], "history": { "v1": [ "20250819-b4-pks-packfiles-store-v1-0-1660842e125a@pks.im" ], "v2": [ "20250821-b4-pks-packfiles-store-v2-0-d10623355e9f@pks.im" ], "v3": [ "20250902-b4-pks-packfiles-store-v3-0-6925278efeda@pks.im" ], "v4": [ "20250909-b4-pks-packfiles-store-v4-0-151c4ba3619f@pks.im" ], "v5": [ "20250915-b4-pks-packfiles-store-v5-0-d6340350934f@pks.im" ] } } } -- GitLab From 976df0d77d47e85ddb1a64f349386cbb4dd816d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 10:13:55 +0200 Subject: [PATCH 12/26] packfile: introduce a new `struct packfile_store` Information about an object database's packfiles is currently distributed across two different structures: - `struct packed_git` contains the `next` pointer as well as the `mru_head`, both of which serve to store the list of packfiles. - `struct object_database` contains several fields that relate to the packfiles. So we don't really have a central data structure that tracks our packfiles, and consequently responsibilities aren't always clear cut. A consequence for the upcoming pluggable object databases is that this makes it very hard to move management of packfiles from the object database level down into the object database source. Introduce a new `struct packfile_store` which is about to become the single source of truth for managing packfiles. Right now this data structure doesn't yet contain anything, but in subsequent patches we will move all data structures that relate to packfiles and that are currently contained in `struct object_database` into this new home. Note that this is only a first step: most importantly, we won't (yet) move the `struct packed_git::next` pointer around. This will happen in a subsequent patch series though so that `struct packed_git` will really only host information about the specific packfile it represents. Further note that the new structure still sits at the wrong level at the end of this patch series: as mentioned, it should eventually sit at the level of the object database source, not at the object database level. But introducing the packfile store now already makes it way easier to eventually push down the now-selfcontained data structure by one level. Signed-off-by: Patrick Steinhardt --- odb.c | 1 + odb.h | 3 ++- packfile.c | 13 +++++++++++++ packfile.h | 18 ++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/odb.c b/odb.c index 75c443fe665..a2289ea97df 100644 --- a/odb.c +++ b/odb.c @@ -996,6 +996,7 @@ struct object_database *odb_new(struct repository *repo) memset(o, 0, sizeof(*o)); o->repo = repo; + o->packfiles = packfile_store_new(o); INIT_LIST_HEAD(&o->packed_git_mru); hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); pthread_mutex_init(&o->replace_mutex, NULL); diff --git a/odb.h b/odb.h index 51fe8a5a929..33034eaf2fe 100644 --- a/odb.h +++ b/odb.h @@ -91,6 +91,7 @@ struct odb_source { }; struct packed_git; +struct packfile_store; struct cached_object_entry; /* @@ -136,7 +137,7 @@ struct object_database { * * should only be accessed directly by packfile.c */ - + struct packfile_store *packfiles; struct packed_git *packed_git; /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; diff --git a/packfile.c b/packfile.c index acb680966da..130d3e25073 100644 --- a/packfile.c +++ b/packfile.c @@ -2332,3 +2332,16 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l *len = hdr - out; return 0; } + +struct packfile_store *packfile_store_new(struct object_database *odb) +{ + struct packfile_store *store; + CALLOC_ARRAY(store, 1); + store->odb = odb; + return store; +} + +void packfile_store_free(struct packfile_store *store) +{ + free(store); +} diff --git a/packfile.h b/packfile.h index f16753f2a9b..8d31fd619ad 100644 --- a/packfile.h +++ b/packfile.h @@ -52,6 +52,24 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +/* + * A store that manages packfiles for a given object database. + */ +struct packfile_store { + struct object_database *odb; +}; + +/* + * Allocate and initialize a new empty packfile store for the given object + * database. + */ +struct packfile_store *packfile_store_new(struct object_database *odb); + +/* + * Free the packfile store and all its associated state. + */ +void packfile_store_free(struct packfile_store *store); + static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *entry, const struct hashmap_entry *entry2, -- GitLab From 0aa64a5a70f23aedc94c27247b4f194d16754aba Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Aug 2025 06:42:20 +0200 Subject: [PATCH 13/26] odb: move list of packfiles into `struct packfile_store` The object database tracks the list of packfiles it currently knows about. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the list accordingly. Extract the logic from `odb_clear()` that knows to close all such packfiles and move it into the new subsystem, as well. Signed-off-by: Patrick Steinhardt --- odb.c | 12 ++---------- odb.h | 1 - packfile.c | 42 +++++++++++++++++++++++++----------------- packfile.h | 15 ++++++++++++++- 4 files changed, 41 insertions(+), 29 deletions(-) diff --git a/odb.c b/odb.c index a2289ea97df..7201d01406d 100644 --- a/odb.c +++ b/odb.c @@ -1038,16 +1038,8 @@ void odb_clear(struct object_database *o) INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); - - /* - * `close_object_store()` only closes the packfiles, but doesn't free - * them. We thus have to do this manually. - */ - for (struct packed_git *p = o->packed_git, *next; p; p = next) { - next = p->next; - free(p); - } - o->packed_git = NULL; + packfile_store_free(o->packfiles); + o->packfiles = NULL; hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); diff --git a/odb.h b/odb.h index 33034eaf2fe..22a170b434c 100644 --- a/odb.h +++ b/odb.h @@ -138,7 +138,6 @@ struct object_database { * should only be accessed directly by packfile.c */ struct packfile_store *packfiles; - struct packed_git *packed_git; /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; diff --git a/packfile.c b/packfile.c index 130d3e25073..36bc240107b 100644 --- a/packfile.c +++ b/packfile.c @@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current) if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packed_git; p; p = p->next) + for (p = current->repo->objects->packfiles->packs; p; p = p->next) scan_windows(p, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -362,13 +362,8 @@ void close_pack(struct packed_git *p) void close_object_store(struct object_database *o) { struct odb_source *source; - struct packed_git *p; - for (p = o->packed_git; p; p = p->next) - if (p->do_not_close) - BUG("want to close pack marked 'do-not-close'"); - else - close_pack(p); + packfile_store_close(o->packfiles); for (source = o->sources; source; source = source->next) { if (source->midx) @@ -468,7 +463,7 @@ static int close_one_pack(struct repository *r) struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packed_git; p; p = p->next) { + for (p = r->objects->packfiles->packs; p; p = p->next) { if (p->pack_fd == -1) continue; find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); @@ -789,8 +784,8 @@ void install_packed_git(struct repository *r, struct packed_git *pack) if (pack->pack_fd != -1) pack_open_fds++; - pack->next = r->objects->packed_git; - r->objects->packed_git = pack; + pack->next = r->objects->packfiles->packs; + r->objects->packfiles->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); hashmap_add(&r->objects->pack_map, &pack->packmap_ent); @@ -974,7 +969,7 @@ unsigned long repo_approximate_object_count(struct repository *r) count += m->num_objects; } - for (p = r->objects->packed_git; p; p = p->next) { + for (p = r->objects->packfiles->packs; p; p = p->next) { if (open_pack_index(p)) continue; count += p->num_objects; @@ -1015,7 +1010,7 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) static void rearrange_packed_git(struct repository *r) { - sort_packs(&r->objects->packed_git, sort_pack); + sort_packs(&r->objects->packfiles->packs, sort_pack); } static void prepare_packed_git_mru(struct repository *r) @@ -1024,7 +1019,7 @@ static void prepare_packed_git_mru(struct repository *r) INIT_LIST_HEAD(&r->objects->packed_git_mru); - for (p = r->objects->packed_git; p; p = p->next) + for (p = r->objects->packfiles->packs; p; p = p->next) list_add_tail(&p->mru, &r->objects->packed_git_mru); } @@ -1073,7 +1068,7 @@ void reprepare_packed_git(struct repository *r) struct packed_git *get_packed_git(struct repository *r) { prepare_packed_git(r); - return r->objects->packed_git; + return r->objects->packfiles->packs; } struct multi_pack_index *get_multi_pack_index(struct odb_source *source) @@ -1094,7 +1089,7 @@ struct packed_git *get_all_packs(struct repository *r) prepare_midx_pack(m, i); } - return r->objects->packed_git; + return r->objects->packfiles->packs; } struct list_head *get_packed_git_mru(struct repository *r) @@ -1219,7 +1214,7 @@ const struct packed_git *has_packed_and_bad(struct repository *r, { struct packed_git *p; - for (p = r->objects->packed_git; p; p = p->next) + for (p = r->objects->packfiles->packs; p; p = p->next) if (oidset_contains(&p->bad_objects, oid)) return p; return NULL; @@ -2080,7 +2075,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packed_git) + if (!r->objects->packfiles->packs) return 0; list_for_each(pos, &r->objects->packed_git_mru) { @@ -2343,5 +2338,18 @@ struct packfile_store *packfile_store_new(struct object_database *odb) void packfile_store_free(struct packfile_store *store) { + for (struct packed_git *p = store->packs, *next; p; p = next) { + next = p->next; + free(p); + } free(store); } + +void packfile_store_close(struct packfile_store *store) +{ + for (struct packed_git *p = store->packs; p; p = p->next) { + if (p->do_not_close) + BUG("want to close pack marked 'do-not-close'"); + close_pack(p); + } +} diff --git a/packfile.h b/packfile.h index 8d31fd619ad..d7ac8d24b43 100644 --- a/packfile.h +++ b/packfile.h @@ -57,6 +57,12 @@ struct packed_git { */ struct packfile_store { struct object_database *odb; + + /* + * The list of packfiles in the order in which they are being added to + * the store. + */ + struct packed_git *packs; }; /* @@ -66,10 +72,17 @@ struct packfile_store { struct packfile_store *packfile_store_new(struct object_database *odb); /* - * Free the packfile store and all its associated state. + * Free the packfile store and all its associated state. All packfiles + * tracked by the store will be closed. */ void packfile_store_free(struct packfile_store *store); +/* + * Close all packfiles associated with this store. The packfiles won't be + * free'd, so they can be re-opened at a later point in time. + */ +void packfile_store_close(struct packfile_store *store); + static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *entry, const struct hashmap_entry *entry2, -- GitLab From 7681f7d72d098617db01b264a0f28900d4bb18a1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 10:24:49 +0200 Subject: [PATCH 14/26] odb: move initialization bit into `struct packfile_store` The object database knows to skip re-initializing the list of packfiles in case it's already been initialized. Whether or not that is the case is tracked via a separate `initialized` bit that is stored in the object database. With the introduction of the `struct packfile_store` we have a better place to host this bit though. Move it accordingly. While at it, convert the field into a boolean now that we're allowed to use them in our code base. Signed-off-by: Patrick Steinhardt --- odb.h | 6 ------ packfile.c | 6 +++--- packfile.h | 6 ++++++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/odb.h b/odb.h index 22a170b434c..bf1b4d46773 100644 --- a/odb.h +++ b/odb.h @@ -169,12 +169,6 @@ struct object_database { unsigned long approximate_object_count; unsigned approximate_object_count_valid : 1; - /* - * Whether packed_git has already been populated with this repository's - * packs. - */ - unsigned packed_git_initialized : 1; - /* * Submodule source paths that will be added as additional sources to * allow lookup of submodule objects via the main object database. diff --git a/packfile.c b/packfile.c index 36bc240107b..f37557eac54 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,7 +1027,7 @@ static void prepare_packed_git(struct repository *r) { struct odb_source *source; - if (r->objects->packed_git_initialized) + if (r->objects->packfiles->initialized) return; odb_prepare_alternates(r->objects); @@ -1038,7 +1038,7 @@ static void prepare_packed_git(struct repository *r) rearrange_packed_git(r); prepare_packed_git_mru(r); - r->objects->packed_git_initialized = 1; + r->objects->packfiles->initialized = true; } void reprepare_packed_git(struct repository *r) @@ -1060,7 +1060,7 @@ void reprepare_packed_git(struct repository *r) odb_clear_loose_cache(source); r->objects->approximate_object_count_valid = 0; - r->objects->packed_git_initialized = 0; + r->objects->packfiles->initialized = false; prepare_packed_git(r); obj_read_unlock(); } diff --git a/packfile.h b/packfile.h index d7ac8d24b43..cf81091175f 100644 --- a/packfile.h +++ b/packfile.h @@ -63,6 +63,12 @@ struct packfile_store { * the store. */ struct packed_git *packs; + + /* + * Whether packfiles have already been populated with this store's + * packs. + */ + bool initialized; }; /* -- GitLab From b65334c5816dd63c5aeef7893b6f89597fc2dc19 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 13:01:13 +0200 Subject: [PATCH 15/26] odb: move packfile map into `struct packfile_store` The object database tracks a map of packfiles by their respective paths, which is used to figure out whether a given packfile has already been loaded. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the map accordingly. `pack_map_entry_cmp()` isn't used anywhere but in "packfile.c" anymore after this change, so we convert it to a static function, as well. Note that we also drop the `inline` hint: the function is used as a callback function exclusively, and callbacks cannot be inlined. Signed-off-by: Patrick Steinhardt --- midx.c | 2 +- odb.c | 2 -- odb.h | 8 +------- packfile.c | 20 ++++++++++++++++++-- packfile.h | 20 ++++++-------------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/midx.c b/midx.c index 7726c13d7e7..e96970efbfb 100644 --- a/midx.c +++ b/midx.c @@ -460,7 +460,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addbuf(&key, &pack_name); strbuf_strip_suffix(&key, ".idx"); strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&r->objects->pack_map, + p = hashmap_get_entry_from_hash(&r->objects->packfiles->map, strhash(key.buf), key.buf, struct packed_git, packmap_ent); if (!p) { diff --git a/odb.c b/odb.c index 7201d01406d..737d98c9119 100644 --- a/odb.c +++ b/odb.c @@ -998,7 +998,6 @@ struct object_database *odb_new(struct repository *repo) o->repo = repo; o->packfiles = packfile_store_new(o); INIT_LIST_HEAD(&o->packed_git_mru); - hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); return o; @@ -1041,6 +1040,5 @@ void odb_clear(struct object_database *o) packfile_store_free(o->packfiles); o->packfiles = NULL; - hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } diff --git a/odb.h b/odb.h index bf1b4d46773..b79e7280c14 100644 --- a/odb.h +++ b/odb.h @@ -135,7 +135,7 @@ struct object_database { /* * private data * - * should only be accessed directly by packfile.c + * Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; /* A most-recently-used ordered version of the packed_git list. */ @@ -155,12 +155,6 @@ struct object_database { struct cached_object_entry *cached_objects; size_t cached_object_nr, cached_object_alloc; - /* - * A map of packfiles to packed_git structs for tracking which - * packs have been loaded already. - */ - struct hashmap pack_map; - /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use diff --git a/packfile.c b/packfile.c index f37557eac54..17e0b8ab27e 100644 --- a/packfile.c +++ b/packfile.c @@ -788,7 +788,7 @@ void install_packed_git(struct repository *r, struct packed_git *pack) r->objects->packfiles->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&r->objects->pack_map, &pack->packmap_ent); + hashmap_add(&r->objects->packfiles->map, &pack->packmap_ent); } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -901,7 +901,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, hashmap_entry_init(&hent, hash); /* Don't reopen a pack we already have. */ - if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) { + if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { p = add_packed_git(data->r, full_name, full_name_len, data->local); if (p) install_packed_git(data->r, p); @@ -2328,11 +2328,26 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } +static int pack_map_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *entry, + const struct hashmap_entry *entry2, + const void *keydata) +{ + const char *key = keydata; + const struct packed_git *pg1, *pg2; + + pg1 = container_of(entry, const struct packed_git, packmap_ent); + pg2 = container_of(entry2, const struct packed_git, packmap_ent); + + return strcmp(pg1->pack_name, key ? key : pg2->pack_name); +} + struct packfile_store *packfile_store_new(struct object_database *odb) { struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; + hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); return store; } @@ -2342,6 +2357,7 @@ void packfile_store_free(struct packfile_store *store) next = p->next; free(p); } + hashmap_clear(&store->map); free(store); } diff --git a/packfile.h b/packfile.h index cf81091175f..9bbef511647 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,12 @@ struct packfile_store { */ struct packed_git *packs; + /* + * A map of packfile names to packed_git structs for tracking which + * packs have been loaded already. + */ + struct hashmap map; + /* * Whether packfiles have already been populated with this store's * packs. @@ -89,20 +95,6 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); -static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct pack_window { struct pack_window *next; unsigned char *base; -- GitLab From 5d067ef530edc57548ffd04c26ab0eb365a45126 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 13:01:50 +0200 Subject: [PATCH 16/26] odb: move MRU list of packfiles into `struct packfile_store` The object database tracks the list of packfiles in most-recently-used order, which is mostly used to favor reading from packfiles that contain most of the objects that we're currently accessing. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the list accordingly. Signed-off-by: Patrick Steinhardt --- midx.c | 2 +- odb.c | 2 -- odb.h | 4 ---- packfile.c | 11 ++++++----- packfile.h | 3 +++ 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/midx.c b/midx.c index e96970efbfb..91c7b3917d6 100644 --- a/midx.c +++ b/midx.c @@ -468,7 +468,7 @@ int prepare_midx_pack(struct multi_pack_index *m, m->source->local); if (p) { install_packed_git(r, p); - list_add_tail(&p->mru, &r->objects->packed_git_mru); + list_add_tail(&p->mru, &r->objects->packfiles->mru); } } diff --git a/odb.c b/odb.c index 737d98c9119..32e982bf0b9 100644 --- a/odb.c +++ b/odb.c @@ -997,7 +997,6 @@ struct object_database *odb_new(struct repository *repo) memset(o, 0, sizeof(*o)); o->repo = repo; o->packfiles = packfile_store_new(o); - INIT_LIST_HEAD(&o->packed_git_mru); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); return o; @@ -1035,7 +1034,6 @@ void odb_clear(struct object_database *o) free((char *) o->cached_objects[i].value.buf); FREE_AND_NULL(o->cached_objects); - INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); packfile_store_free(o->packfiles); o->packfiles = NULL; diff --git a/odb.h b/odb.h index b79e7280c14..3044b6a6613 100644 --- a/odb.h +++ b/odb.h @@ -3,7 +3,6 @@ #include "hashmap.h" #include "object.h" -#include "list.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -138,9 +137,6 @@ struct object_database { * Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - /* A most-recently-used ordered version of the packed_git list. */ - struct list_head packed_git_mru; - struct { struct packed_git **packs; unsigned flags; diff --git a/packfile.c b/packfile.c index 17e0b8ab27e..861d7ffd6fa 100644 --- a/packfile.c +++ b/packfile.c @@ -1017,10 +1017,10 @@ static void prepare_packed_git_mru(struct repository *r) { struct packed_git *p; - INIT_LIST_HEAD(&r->objects->packed_git_mru); + INIT_LIST_HEAD(&r->objects->packfiles->mru); for (p = r->objects->packfiles->packs; p; p = p->next) - list_add_tail(&p->mru, &r->objects->packed_git_mru); + list_add_tail(&p->mru, &r->objects->packfiles->mru); } static void prepare_packed_git(struct repository *r) @@ -1095,7 +1095,7 @@ struct packed_git *get_all_packs(struct repository *r) struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); - return &r->objects->packed_git_mru; + return &r->objects->packfiles->mru; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -2078,10 +2078,10 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs) return 0; - list_for_each(pos, &r->objects->packed_git_mru) { + list_for_each(pos, &r->objects->packfiles->mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packed_git_mru); + list_move(&p->mru, &r->objects->packfiles->mru); return 1; } } @@ -2347,6 +2347,7 @@ struct packfile_store *packfile_store_new(struct object_database *odb) struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; + INIT_LIST_HEAD(&store->mru); hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); return store; } diff --git a/packfile.h b/packfile.h index 9bbef511647..d48d46cc1bd 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,9 @@ struct packfile_store { */ struct packed_git *packs; + /* A most-recently-used ordered version of the packs list. */ + struct list_head mru; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. -- GitLab From 2d4cd7e1703b7046526c93333f5a0ba03d496e4a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 11:32:09 +0200 Subject: [PATCH 17/26] odb: move kept cache into `struct packfile_store` The object database tracks a cache of "kept" packfiles, which is used by git-pack-objects(1) to handle cruft objects. With the introduction of the `struct packfile_store` we have a better place to host this cache though. Move the cache accordingly. This moves the last bit of packfile-related state from the object database into the packfile store. Adapt the comment for the `packfiles` pointer in `struct object_database` to reflect this. Signed-off-by: Patrick Steinhardt --- odb.h | 10 +--------- packfile.c | 16 ++++++++-------- packfile.h | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/odb.h b/odb.h index 3044b6a6613..9dd7bb6bc3e 100644 --- a/odb.h +++ b/odb.h @@ -131,16 +131,8 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* - * private data - * - * Should only be accessed directly by packfile.c and midx.c. - */ + /* Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - struct { - struct packed_git **packs; - unsigned flags; - } kept_pack_cache; /* * This is meant to hold a *small* number of objects that you would diff --git a/packfile.c b/packfile.c index 861d7ffd6fa..95a78f267f1 100644 --- a/packfile.c +++ b/packfile.c @@ -2091,19 +2091,19 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { - if (!r->objects->kept_pack_cache.packs) + if (!r->objects->packfiles->kept_cache.packs) return; - if (r->objects->kept_pack_cache.flags == flags) + if (r->objects->packfiles->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->kept_pack_cache.packs); - r->objects->kept_pack_cache.flags = 0; + FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); + r->objects->packfiles->kept_cache.flags = 0; } struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) { maybe_invalidate_kept_pack_cache(r, flags); - if (!r->objects->kept_pack_cache.packs) { + if (!r->objects->packfiles->kept_cache.packs) { struct packed_git **packs = NULL; size_t nr = 0, alloc = 0; struct packed_git *p; @@ -2126,11 +2126,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->kept_pack_cache.packs = packs; - r->objects->kept_pack_cache.flags = flags; + r->objects->packfiles->kept_cache.packs = packs; + r->objects->packfiles->kept_cache.flags = flags; } - return r->objects->kept_pack_cache.packs; + return r->objects->packfiles->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, diff --git a/packfile.h b/packfile.h index d48d46cc1bd..bf66211986e 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,20 @@ struct packfile_store { */ struct packed_git *packs; + /* + * Cache of packfiles which are marked as "kept", either because there + * is an on-disk ".keep" file or because they are marked as "kept" in + * memory. + * + * Should not be accessed directly, but via `kept_pack_cache()`. The + * list of packs gets invalidated when the stored flags and the flags + * passed to `kept_pack_cache()` mismatch. + */ + struct { + struct packed_git **packs; + unsigned flags; + } kept_cache; + /* A most-recently-used ordered version of the packs list. */ struct list_head mru; -- GitLab From dd515d94dd71f556433b1f5e5e511fcd69c73a71 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 10:45:22 +0200 Subject: [PATCH 18/26] packfile: reorder functions to avoid function declaration Reorder functions so that we can avoid a forward declaration of `prepare_packed_git()`. Signed-off-by: Patrick Steinhardt --- packfile.c | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/packfile.c b/packfile.c index 95a78f267f1..5588a7ad6df 100644 --- a/packfile.c +++ b/packfile.c @@ -946,40 +946,6 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -static void prepare_packed_git(struct repository *r); -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) -{ - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - struct packed_git *p; - - prepare_packed_git(r); - - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects; - } - - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (open_pack_index(p)) - continue; - count += p->num_objects; - } - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; - } - return r->objects->approximate_object_count; -} - DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); static int sort_pack(const struct packed_git *a, const struct packed_git *b) @@ -1098,6 +1064,39 @@ struct list_head *get_packed_git_mru(struct repository *r) return &r->objects->packfiles->mru; } +/* + * Give a fast, rough count of the number of objects in the repository. This + * ignores loose objects completely. If you have a lot of them, then either + * you should repack because your performance will be awful, or they are + * all unreachable objects about to be pruned, in which case they're not really + * interesting as a measure of repo size in the first place. + */ +unsigned long repo_approximate_object_count(struct repository *r) +{ + if (!r->objects->approximate_object_count_valid) { + struct odb_source *source; + unsigned long count = 0; + struct packed_git *p; + + prepare_packed_git(r); + + for (source = r->objects->sources; source; source = source->next) { + struct multi_pack_index *m = get_multi_pack_index(source); + if (m) + count += m->num_objects; + } + + for (p = r->objects->packfiles->packs; p; p = p->next) { + if (open_pack_index(p)) + continue; + count += p->num_objects; + } + r->objects->approximate_object_count = count; + r->objects->approximate_object_count_valid = 1; + } + return r->objects->approximate_object_count; +} + unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { -- GitLab From 7c7d116314c2943263e9b2ac3e1b5e9a8895d9c7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 2 Sep 2025 08:39:42 +0200 Subject: [PATCH 19/26] packfile: refactor `prepare_packed_git()` to work on packfile store The `prepare_packed_git()` function and its friends are responsible for loading packfiles as well as the multi-pack index for a given object database. Refactor these functions to accept a packfile store instead of a repository to clarify their scope. Signed-off-by: Patrick Steinhardt --- packfile.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/packfile.c b/packfile.c index 5588a7ad6df..095c85919b6 100644 --- a/packfile.c +++ b/packfile.c @@ -974,37 +974,32 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) return -1; } -static void rearrange_packed_git(struct repository *r) -{ - sort_packs(&r->objects->packfiles->packs, sort_pack); -} - -static void prepare_packed_git_mru(struct repository *r) +static void packfile_store_prepare_mru(struct packfile_store *store) { struct packed_git *p; - INIT_LIST_HEAD(&r->objects->packfiles->mru); + INIT_LIST_HEAD(&store->mru); - for (p = r->objects->packfiles->packs; p; p = p->next) - list_add_tail(&p->mru, &r->objects->packfiles->mru); + for (p = store->packs; p; p = p->next) + list_add_tail(&p->mru, &store->mru); } -static void prepare_packed_git(struct repository *r) +static void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; - if (r->objects->packfiles->initialized) + if (store->initialized) return; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { + odb_prepare_alternates(store->odb); + for (source = store->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - rearrange_packed_git(r); + sort_packs(&store->packs, sort_pack); - prepare_packed_git_mru(r); - r->objects->packfiles->initialized = true; + packfile_store_prepare_mru(store); + store->initialized = true; } void reprepare_packed_git(struct repository *r) @@ -1027,25 +1022,25 @@ void reprepare_packed_git(struct repository *r) r->objects->approximate_object_count_valid = 0; r->objects->packfiles->initialized = false; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); obj_read_unlock(); } struct packed_git *get_packed_git(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); return r->objects->packfiles->packs; } struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - prepare_packed_git(source->odb->repo); + packfile_store_prepare(source->odb->packfiles); return source->midx; } struct packed_git *get_all_packs(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (struct odb_source *source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; @@ -1060,7 +1055,7 @@ struct packed_git *get_all_packs(struct repository *r) struct list_head *get_packed_git_mru(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); return &r->objects->packfiles->mru; } @@ -1078,7 +1073,7 @@ unsigned long repo_approximate_object_count(struct repository *r) unsigned long count = 0; struct packed_git *p; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = get_multi_pack_index(source); @@ -2068,7 +2063,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa { struct list_head *pos; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (struct odb_source *source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) -- GitLab From b75323ece845058d273c7bfb94222a4a928e0a8a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 2 Sep 2025 08:40:50 +0200 Subject: [PATCH 20/26] packfile: split up responsibilities of `reprepare_packed_git()` In `reprepare_packed_git()` we perform a couple of operations: - We reload alternate object directories. - We clear the loose object cache. - We reprepare packfiles. While the logic is hosted in "packfile.c", it clearly reaches into other subsystems that aren't related to packfiles. Split up the responsibility and introduce `odb_reprepare()` which now becomes responsible for repreparing the whole object database. The existing `reprepare_packed_git()` function is refactored accordingly and only cares about reloading the packfile store now. Signed-off-by: Patrick Steinhardt --- builtin/backfill.c | 2 +- builtin/gc.c | 4 ++-- builtin/receive-pack.c | 2 +- builtin/repack.c | 2 +- bulk-checkin.c | 2 +- connected.c | 2 +- fetch-pack.c | 4 ++-- object-name.c | 2 +- odb.c | 27 ++++++++++++++++++++++++++- odb.h | 6 ++++++ packfile.c | 26 ++++---------------------- packfile.h | 9 ++++++++- transport-helper.c | 2 +- 13 files changed, 55 insertions(+), 35 deletions(-) diff --git a/builtin/backfill.c b/builtin/backfill.c index 80056abe473..e80fc1b694d 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -53,7 +53,7 @@ static void download_batch(struct backfill_context *ctx) * We likely have a new packfile. Add it to the packed list to * avoid possible duplicate downloads of the same objects. */ - reprepare_packed_git(ctx->repo); + odb_reprepare(ctx->repo->objects); } static int fill_missing_blobs(const char *path UNUSED, diff --git a/builtin/gc.c b/builtin/gc.c index 03ae4926b20..aeca06a08be 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1042,7 +1042,7 @@ int cmd_gc(int argc, die(FAILED_RUN, "rerere"); report_garbage = report_pack_garbage; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { close_object_store(the_repository->objects); clean_pack_garbage(); @@ -1491,7 +1491,7 @@ static off_t get_auto_pack_size(void) struct packed_git *p; struct repository *r = the_repository; - reprepare_packed_git(r); + odb_reprepare(r->objects); for (p = get_all_packs(r); p; p = p->next) { if (p->pack_size > max_size) { second_largest_size = max_size; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 1113137a6f0..c9288a9c7e3 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2389,7 +2389,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) status = finish_command(&child); if (status) return "index-pack abnormal exit"; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); } return NULL; } diff --git a/builtin/repack.c b/builtin/repack.c index c490a51e919..5ff27fc8e29 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1685,7 +1685,7 @@ int cmd_repack(int argc, goto cleanup; } - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (delete_redundant) { int opts = 0; diff --git a/bulk-checkin.c b/bulk-checkin.c index b2809ab0398..f65439a748a 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -90,7 +90,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); } /* diff --git a/connected.c b/connected.c index 18c13245d8e..d6e9682fd93 100644 --- a/connected.c +++ b/connected.c @@ -72,7 +72,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, * Before checking for promisor packs, be sure we have the * latest pack-files loaded into memory. */ - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); do { struct packed_git *p; diff --git a/fetch-pack.c b/fetch-pack.c index 6ed56629518..fe7a84bf2f9 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1983,7 +1983,7 @@ static void update_shallow(struct fetch_pack_args *args, * remote is shallow, but this is a clone, there are * no objects in repo to worry about. Accept any * shallow points that exist in the pack (iow in repo - * after get_pack() and reprepare_packed_git()) + * after get_pack() and odb_reprepare()) */ struct oid_array extra = OID_ARRAY_INIT; struct object_id *oid = si->shallow->oid; @@ -2108,7 +2108,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, &si, pack_lockfiles); } - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (!args->cloning && args->deepen) { struct check_connected_options opt = CHECK_CONNECTED_INIT; diff --git a/object-name.c b/object-name.c index 732056ff5e3..df9e0c5f020 100644 --- a/object-name.c +++ b/object-name.c @@ -596,7 +596,7 @@ static enum get_oid_result get_short_oid(struct repository *r, * or migrated from loose to packed. */ if (status == MISSING_OBJECT) { - reprepare_packed_git(r); + odb_reprepare(r->objects); find_short_object_filename(&ds); find_short_packed_object(&ds); status = finish_object_disambiguation(&ds, oid); diff --git a/odb.c b/odb.c index 32e982bf0b9..65a6cc67b61 100644 --- a/odb.c +++ b/odb.c @@ -694,7 +694,7 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { - reprepare_packed_git(odb->repo); + odb_reprepare(odb->repo->objects); if (find_pack_entry(odb->repo, real, &e)) break; } @@ -1040,3 +1040,28 @@ void odb_clear(struct object_database *o) string_list_clear(&o->submodule_source_paths, 0); } + +void odb_reprepare(struct object_database *o) +{ + struct odb_source *source; + + obj_read_lock(); + + /* + * Reprepare alt odbs, in case the alternates file was modified + * during the course of this process. This only _adds_ odbs to + * the linked list, so existing odbs will continue to exist for + * the lifetime of the process. + */ + o->loaded_alternates = 0; + odb_prepare_alternates(o); + + for (source = o->sources; source; source = source->next) + odb_clear_loose_cache(source); + + o->approximate_object_count_valid = 0; + + packfile_store_reprepare(o->packfiles); + + obj_read_unlock(); +} diff --git a/odb.h b/odb.h index 9dd7bb6bc3e..ab39e3605d5 100644 --- a/odb.h +++ b/odb.h @@ -161,6 +161,12 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Clear caches, reload alternates and then reload object sources so that new + * objects may become accessible. + */ +void odb_reprepare(struct object_database *o); + /* * Find source by its object directory path. Returns a `NULL` pointer in case * the source could not be found. diff --git a/packfile.c b/packfile.c index 095c85919b6..950b98aac51 100644 --- a/packfile.c +++ b/packfile.c @@ -1002,28 +1002,10 @@ static void packfile_store_prepare(struct packfile_store *store) store->initialized = true; } -void reprepare_packed_git(struct repository *r) +void packfile_store_reprepare(struct packfile_store *store) { - struct odb_source *source; - - obj_read_lock(); - - /* - * Reprepare alt odbs, in case the alternates file was modified - * during the course of this process. This only _adds_ odbs to - * the linked list, so existing odbs will continue to exist for - * the lifetime of the process. - */ - r->objects->loaded_alternates = 0; - odb_prepare_alternates(r->objects); - - for (source = r->objects->sources; source; source = source->next) - odb_clear_loose_cache(source); - - r->objects->approximate_object_count_valid = 0; - r->objects->packfiles->initialized = false; - packfile_store_prepare(r->objects->packfiles); - obj_read_unlock(); + store->initialized = false; + packfile_store_prepare(store); } struct packed_git *get_packed_git(struct repository *r) @@ -1144,7 +1126,7 @@ unsigned long get_size_from_delta(struct packed_git *p, * * Other worrying sections could be the call to close_pack_fd(), * which can close packs even with in-use windows, and to - * reprepare_packed_git(). Regarding the former, mmap doc says: + * odb_reprepare(). Regarding the former, mmap doc says: * "closing the file descriptor does not unmap the region". And * for the latter, it won't re-open already available packs. */ diff --git a/packfile.h b/packfile.h index bf66211986e..a85ff607feb 100644 --- a/packfile.h +++ b/packfile.h @@ -112,6 +112,14 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); +/* + * Clear the packfile caches and try to look up any new packfiles that have + * appeared since last preparing the packfiles store. + * + * This function must be called under the `odb_read_lock()`. + */ +void packfile_store_reprepare(struct packfile_store *store); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -188,7 +196,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -void reprepare_packed_git(struct repository *r); void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); diff --git a/transport-helper.c b/transport-helper.c index 0789e5bca53..4d95d84f9e4 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -450,7 +450,7 @@ static int fetch_with_fetch(struct transport *transport, } strbuf_release(&buf); - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); return 0; } -- GitLab From 7863cd6bae0b8229dbb934f80e4a39d7e834bbad Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 11:19:38 +0200 Subject: [PATCH 21/26] packfile: refactor `install_packed_git()` to work on packfile store The `install_packed_git()` functions adds a packfile to a specific object store. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt --- builtin/fast-import.c | 2 +- builtin/index-pack.c | 2 +- http.c | 2 +- http.h | 2 +- midx.c | 2 +- packfile.c | 11 ++++++----- packfile.h | 9 +++++++-- 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2c35f9345d0..e9d82b31c39 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -901,7 +901,7 @@ static void end_packfile(void) if (!new_p) die("core git rejected index %s", idx_name); all_packs[pack_id] = new_p; - install_packed_git(the_repository, new_p); + packfile_store_add_pack(the_repository->objects->packfiles, new_p); free(idx_name); /* Print the boundary */ diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f91c301bba9..ed490dfad47 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1645,7 +1645,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, p = add_packed_git(the_repository, final_index_name, strlen(final_index_name), 0); if (p) - install_packed_git(the_repository, p); + packfile_store_add_pack(the_repository->objects->packfiles, p); } if (!from_stdin) { diff --git a/http.c b/http.c index 98853d64834..af2120b64c7 100644 --- a/http.c +++ b/http.c @@ -2541,7 +2541,7 @@ void http_install_packfile(struct packed_git *p, lst = &((*lst)->next); *lst = (*lst)->next; - install_packed_git(the_repository, p); + packfile_store_add_pack(the_repository->objects->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/http.h b/http.h index 36202139f45..e5a5380c6c3 100644 --- a/http.h +++ b/http.h @@ -210,7 +210,7 @@ int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); /* - * Remove p from the given list, and invoke install_packed_git() on it. + * Remove p from the given list, and invoke packfile_store_add_pack() on it. * * This is a convenience function for users that have obtained a list of packs * from http_get_info_packs() and have chosen a specific pack to fetch. diff --git a/midx.c b/midx.c index 91c7b3917d6..69c44be71c5 100644 --- a/midx.c +++ b/midx.c @@ -467,7 +467,7 @@ int prepare_midx_pack(struct multi_pack_index *m, p = add_packed_git(r, pack_name.buf, pack_name.len, m->source->local); if (p) { - install_packed_git(r, p); + packfile_store_add_pack(r->objects->packfiles, p); list_add_tail(&p->mru, &r->objects->packfiles->mru); } } diff --git a/packfile.c b/packfile.c index 950b98aac51..af806aba093 100644 --- a/packfile.c +++ b/packfile.c @@ -779,16 +779,17 @@ struct packed_git *add_packed_git(struct repository *r, const char *path, return p; } -void install_packed_git(struct repository *r, struct packed_git *pack) +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack) { if (pack->pack_fd != -1) pack_open_fds++; - pack->next = r->objects->packfiles->packs; - r->objects->packfiles->packs = pack; + pack->next = store->packs; + store->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&r->objects->packfiles->map, &pack->packmap_ent); + hashmap_add(&store->map, &pack->packmap_ent); } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -904,7 +905,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { p = add_packed_git(data->r, full_name, full_name_len, data->local); if (p) - install_packed_git(data->r, p); + packfile_store_add_pack(data->r->objects->packfiles, p); } free(pack_name); } diff --git a/packfile.h b/packfile.h index a85ff607feb..ba4b0cef9cb 100644 --- a/packfile.h +++ b/packfile.h @@ -120,6 +120,13 @@ void packfile_store_close(struct packfile_store *store); */ void packfile_store_reprepare(struct packfile_store *store); +/* + * Add the pack to the store so that contained objects become accessible via + * the store. This moves ownership into the store. + */ +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -196,8 +203,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -void install_packed_git(struct repository *r, struct packed_git *pack); - struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); struct multi_pack_index *get_multi_pack_index(struct odb_source *source); -- GitLab From f65025d54ef26d7cb407b9298c36a6da04efb02f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 2 Sep 2025 10:00:37 +0200 Subject: [PATCH 22/26] packfile: introduce function to load and add packfiles We have a recurring pattern where we essentially perform an upsert of a packfile in case it isn't yet known by the packfile store. The logic to do so is non-trivial as we have to reconstruct the packfile's key, check the map of packfiles, then create the new packfile and finally add it to the store. Introduce a new function that does this dance for us. Refactor callsites to use it. Signed-off-by: Patrick Steinhardt --- builtin/fast-import.c | 4 ++-- builtin/index-pack.c | 10 +++------- midx.c | 23 ++++------------------ packfile.c | 44 ++++++++++++++++++++++++++++++------------- packfile.h | 8 ++++++++ 5 files changed, 48 insertions(+), 41 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index e9d82b31c39..a26e79689d5 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -897,11 +897,11 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1); + new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + idx_name, 1); if (!new_p) die("core git rejected index %s", idx_name); all_packs[pack_id] = new_p; - packfile_store_add_pack(the_repository->objects->packfiles, new_p); free(idx_name); /* Print the boundary */ diff --git a/builtin/index-pack.c b/builtin/index-pack.c index ed490dfad47..2b78ba7fe4d 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,13 +1640,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) { - struct packed_git *p; - p = add_packed_git(the_repository, final_index_name, - strlen(final_index_name), 0); - if (p) - packfile_store_add_pack(the_repository->objects->packfiles, p); - } + if (do_fsck_object) + packfile_store_load_pack(the_repository->objects->packfiles, + final_index_name, 0); if (!from_stdin) { printf("%s\n", hash_to_hex(hash)); diff --git a/midx.c b/midx.c index 69c44be71c5..3faeaf2f8fa 100644 --- a/midx.c +++ b/midx.c @@ -443,7 +443,6 @@ int prepare_midx_pack(struct multi_pack_index *m, { struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; - struct strbuf key = STRBUF_INIT; struct packed_git *p; pack_int_id = midx_for_pack(&m, pack_int_id); @@ -455,25 +454,11 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - - /* pack_map holds the ".pack" name, but we have the .idx */ - strbuf_addbuf(&key, &pack_name); - strbuf_strip_suffix(&key, ".idx"); - strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&r->objects->packfiles->map, - strhash(key.buf), key.buf, - struct packed_git, packmap_ent); - if (!p) { - p = add_packed_git(r, pack_name.buf, pack_name.len, - m->source->local); - if (p) { - packfile_store_add_pack(r->objects->packfiles, p); - list_add_tail(&p->mru, &r->objects->packfiles->mru); - } - } - + p = packfile_store_load_pack(r->objects->packfiles, + pack_name.buf, m->source->local); + if (p) + list_add_tail(&p->mru, &r->objects->packfiles->mru); strbuf_release(&pack_name); - strbuf_release(&key); if (!p) { m->packs[pack_int_id] = MIDX_PACK_ERROR; diff --git a/packfile.c b/packfile.c index af806aba093..9224ca424c1 100644 --- a/packfile.c +++ b/packfile.c @@ -792,6 +792,33 @@ void packfile_store_add_pack(struct packfile_store *store, hashmap_add(&store->map, &pack->packmap_ent); } +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local) +{ + struct strbuf key = STRBUF_INIT; + struct packed_git *p; + + /* + * We're being called with the path to the index file, but `pack_map` + * holds the path to the packfile itself. + */ + strbuf_addstr(&key, idx_path); + strbuf_strip_suffix(&key, ".idx"); + strbuf_addstr(&key, ".pack"); + + p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, + struct packed_git, packmap_ent); + if (!p) { + p = add_packed_git(store->odb->repo, idx_path, + strlen(idx_path), local); + if (p) + packfile_store_add_pack(store, p); + } + + strbuf_release(&key); + return p; +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -891,23 +918,14 @@ static void prepare_pack(const char *full_name, size_t full_name_len, const char *file_name, void *_data) { struct prepare_pack_data *data = (struct prepare_pack_data *)_data; - struct packed_git *p; size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->m && midx_contains_pack(data->m, file_name))) { - struct hashmap_entry hent; - char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name); - unsigned int hash = strhash(pack_name); - hashmap_entry_init(&hent, hash); - - /* Don't reopen a pack we already have. */ - if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { - p = add_packed_git(data->r, full_name, full_name_len, data->local); - if (p) - packfile_store_add_pack(data->r->objects->packfiles, p); - } - free(pack_name); + char *trimmed_path = xstrndup(full_name, full_name_len); + packfile_store_load_pack(data->r->objects->packfiles, + trimmed_path, data->local); + free(trimmed_path); } if (!report_garbage) diff --git a/packfile.h b/packfile.h index ba4b0cef9cb..fcefcbbef65 100644 --- a/packfile.h +++ b/packfile.h @@ -127,6 +127,14 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Open the packfile and add it to the store if it isn't yet known. Returns + * either the newly opened packfile or the preexisting packfile. Returns a + * `NULL` pointer in case the packfile could not be opened. + */ +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local); + struct pack_window { struct pack_window *next; unsigned char *base; -- GitLab From 15e962daead4e6dd76a18f7a9123dadb88245f3d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 2 Sep 2025 08:47:15 +0200 Subject: [PATCH 23/26] packfile: move `get_multi_pack_index()` into "midx.c" The `get_multi_pack_index()` function is declared and implemented in the packfile subsystem, even though it really belongs into the multi-pack index subsystem. The reason for this is likely that it needs to call `packfile_store_prepare()`, which is not exposed by the packfile system. In a subsequent commit we're about to add another caller outside of the packfile system though, so we'll have to expose the function anyway. Do so now already and move `get_multi_pack_index()` into the MIDX subsystem. Signed-off-by: Patrick Steinhardt --- midx.c | 6 ++++++ midx.h | 1 + packfile.c | 8 +------- packfile.h | 10 +++++++++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/midx.c b/midx.c index 3faeaf2f8fa..1d6269f957e 100644 --- a/midx.c +++ b/midx.c @@ -93,6 +93,12 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, return 0; } +struct multi_pack_index *get_multi_pack_index(struct odb_source *source) +{ + packfile_store_prepare(source->odb->packfiles); + return source->midx; +} + static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, const char *midx_name) { diff --git a/midx.h b/midx.h index e241d2d6900..6e54d73503d 100644 --- a/midx.h +++ b/midx.h @@ -94,6 +94,7 @@ void get_midx_chain_filename(struct odb_source *source, struct strbuf *out); void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, const unsigned char *hash, const char *ext); +struct multi_pack_index *get_multi_pack_index(struct odb_source *source); struct multi_pack_index *load_multi_pack_index(struct odb_source *source); int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); struct packed_git *nth_midxed_pack(struct multi_pack_index *m, diff --git a/packfile.c b/packfile.c index 9224ca424c1..7a9193e5ef4 100644 --- a/packfile.c +++ b/packfile.c @@ -1003,7 +1003,7 @@ static void packfile_store_prepare_mru(struct packfile_store *store) list_add_tail(&p->mru, &store->mru); } -static void packfile_store_prepare(struct packfile_store *store) +void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1033,12 +1033,6 @@ struct packed_git *get_packed_git(struct repository *r) return r->objects->packfiles->packs; } -struct multi_pack_index *get_multi_pack_index(struct odb_source *source) -{ - packfile_store_prepare(source->odb->packfiles); - return source->midx; -} - struct packed_git *get_all_packs(struct repository *r) { packfile_store_prepare(r->objects->packfiles); diff --git a/packfile.h b/packfile.h index fcefcbbef65..a9e561ac394 100644 --- a/packfile.h +++ b/packfile.h @@ -112,6 +112,15 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); +/* + * Prepare the packfile store by loading packfiles and multi-pack indices for + * all alternates. This becomes a no-op if the store is already prepared. + * + * It shouldn't typically be necessary to call this function directly, as + * functions that access the store know to prepare it. + */ +void packfile_store_prepare(struct packfile_store *store); + /* * Clear the packfile caches and try to look up any new packfiles that have * appeared since last preparing the packfiles store. @@ -213,7 +222,6 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); -struct multi_pack_index *get_multi_pack_index(struct odb_source *source); struct packed_git *get_all_packs(struct repository *r); /* -- GitLab From 7ba39efd3a7e240dd5d82b561c16725f028ae0cc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 13:08:53 +0200 Subject: [PATCH 24/26] packfile: refactor `get_packed_git()` to work on packfile store The `get_packed_git()` function prepares the packfile store and then returns its packfiles. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt --- builtin/gc.c | 2 +- builtin/grep.c | 2 +- object-name.c | 4 ++-- packfile.c | 6 +++--- packfile.h | 7 ++++++- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index aeca06a08be..ec6735a540a 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1423,7 +1423,7 @@ static int incremental_repack_auto_condition(struct gc_config *cfg UNUSED) if (incremental_repack_auto_limit < 0) return 1; - for (p = get_packed_git(the_repository); + for (p = packfile_store_get_packs(the_repository->objects->packfiles); count < incremental_repack_auto_limit && p; p = p->next) { if (!p->multi_pack_index) diff --git a/builtin/grep.c b/builtin/grep.c index 5df65373337..63a4959568f 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1214,7 +1214,7 @@ int cmd_grep(int argc, if (recurse_submodules) repo_read_gitmodules(the_repository, 1); if (startup_info->have_repository) - (void)get_packed_git(the_repository); + (void)packfile_store_get_packs(the_repository->objects->packfiles); start_threads(&opt); } else { diff --git a/object-name.c b/object-name.c index df9e0c5f020..53356819a3d 100644 --- a/object-name.c +++ b/object-name.c @@ -213,7 +213,7 @@ static void find_short_packed_object(struct disambiguate_state *ds) unique_in_midx(m, ds); } - for (p = get_packed_git(ds->repo); p && !ds->ambiguous; + for (p = packfile_store_get_packs(ds->repo->objects->packfiles); p && !ds->ambiguous; p = p->next) unique_in_pack(p, ds); } @@ -806,7 +806,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) find_abbrev_len_for_midx(m, mad); } - for (p = get_packed_git(mad->repo); p; p = p->next) + for (p = packfile_store_get_packs(mad->repo->objects->packfiles); p; p = p->next) find_abbrev_len_for_pack(p, mad); } diff --git a/packfile.c b/packfile.c index 7a9193e5ef4..b37f43afb58 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,10 +1027,10 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *get_packed_git(struct repository *r) +struct packed_git *packfile_store_get_packs(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); - return r->objects->packfiles->packs; + packfile_store_prepare(store); + return store->packs; } struct packed_git *get_all_packs(struct repository *r) diff --git a/packfile.h b/packfile.h index a9e561ac394..0b691ded7ef 100644 --- a/packfile.h +++ b/packfile.h @@ -136,6 +136,12 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get packs managed by the given store. Does not load the MIDX or any packs + * referenced by it. + */ +struct packed_git *packfile_store_get_packs(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -220,7 +226,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); struct packed_git *get_all_packs(struct repository *r); -- GitLab From 505d32ea3577e8ec198636f2c043dc85ed8b97f2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 15 Sep 2025 09:27:36 +0200 Subject: [PATCH 25/26] packfile: refactor `get_all_packs()` to work on packfile store The `get_all_packs()` function prepares the packfile store and then returns its packfiles. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt --- builtin/cat-file.c | 3 ++- builtin/count-objects.c | 3 ++- builtin/fast-import.c | 6 ++++-- builtin/fsck.c | 11 +++++++---- builtin/gc.c | 8 +++++--- builtin/pack-objects.c | 28 +++++++++++++++++++--------- builtin/pack-redundant.c | 6 ++++-- builtin/repack.c | 9 ++++++--- connected.c | 3 ++- http-backend.c | 5 +++-- http.c | 3 ++- pack-bitmap.c | 4 ++-- pack-objects.c | 3 ++- packfile.c | 12 ++++++------ packfile.h | 7 ++++++- server-info.c | 3 ++- t/helper/test-find-pack.c | 2 +- t/helper/test-pack-mtimes.c | 2 +- 18 files changed, 76 insertions(+), 42 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index fce0b06451c..ee6715fa523 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -852,9 +852,10 @@ static void batch_each_object(struct batch_options *opt, if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *pack; - for (pack = get_all_packs(the_repository); pack; pack = pack->next) { + for (pack = packfile_store_get_all_packs(packs); pack; pack = pack->next) { if (bitmap_index_contains_pack(bitmap, pack) || open_pack_index(pack)) continue; diff --git a/builtin/count-objects.c b/builtin/count-objects.c index a61d3b46aac..f2f407c2a78 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -122,6 +122,7 @@ int cmd_count_objects(int argc, count_loose, count_cruft, NULL, NULL); if (verbose) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; unsigned long num_pack = 0; off_t size_pack = 0; @@ -129,7 +130,7 @@ int cmd_count_objects(int argc, struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index a26e79689d5..b1d5549815a 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -952,6 +952,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -975,7 +976,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1092,6 +1093,7 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); @@ -1175,7 +1177,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/builtin/fsck.c b/builtin/fsck.c index d2eb9d4fbe9..8ee95e0d67c 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -867,19 +867,20 @@ static int mark_packed_for_connectivity(const struct object_id *oid, static int check_pack_rev_indexes(struct repository *r, int show_progress) { + struct packfile_store *packs = r->objects->packfiles; struct progress *progress = NULL; uint32_t pack_count = 0; int res = 0; if (show_progress) { - for (struct packed_git *p = get_all_packs(r); p; p = p->next) + for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) pack_count++; progress = start_delayed_progress(the_repository, "Verifying reverse pack-indexes", pack_count); pack_count = 0; } - for (struct packed_git *p = get_all_packs(r); p; p = p->next) { + for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) { int load_error = load_pack_revindex_from_disk(p); if (load_error < 0) { @@ -999,6 +1000,8 @@ int cmd_fsck(int argc, for_each_packed_object(the_repository, mark_packed_for_connectivity, NULL, 0); } else { + struct packfile_store *packs = the_repository->objects->packfiles; + odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) fsck_source(source); @@ -1009,7 +1012,7 @@ int cmd_fsck(int argc, struct progress *progress = NULL; if (show_progress) { - for (p = get_all_packs(the_repository); p; + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (open_pack_index(p)) continue; @@ -1019,7 +1022,7 @@ int cmd_fsck(int argc, progress = start_progress(the_repository, _("Checking objects"), total); } - for (p = get_all_packs(the_repository); p; + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { /* verify gives error messages itself */ if (verify_pack(the_repository, diff --git a/builtin/gc.c b/builtin/gc.c index ec6735a540a..e19e13d9788 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -487,9 +487,10 @@ static int too_many_loose_objects(struct gc_config *cfg) static struct packed_git *find_base_packs(struct string_list *packs, unsigned long limit) { + struct packfile_store *packfiles = the_repository->objects->packfiles; struct packed_git *p, *base = NULL; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packfiles); p; p = p->next) { if (!p->pack_local || p->is_cruft) continue; if (limit) { @@ -508,13 +509,14 @@ static struct packed_git *find_base_packs(struct string_list *packs, static int too_many_packs(struct gc_config *cfg) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; int cnt; if (cfg->gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) { + for (cnt = 0, p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) continue; if (p->pack_keep) @@ -1492,7 +1494,7 @@ static off_t get_auto_pack_size(void) struct repository *r = the_repository; odb_reprepare(r->objects); - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if (p->pack_size > max_size) { second_largest_size = max_size; max_size = p->pack_size; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1494afcf3df..de351b757ae 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3831,6 +3831,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b) static void read_packs_list_from_stdin(struct rev_info *revs) { + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; @@ -3855,7 +3856,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -4076,6 +4077,7 @@ static void enumerate_cruft_objects(void) static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct rev_info revs; int ret; @@ -4105,7 +4107,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = get_all_packs(the_repository); p; p = p->next) + for (p = packfile_store_get_all_packs(packs); p; p = p->next) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); @@ -4122,6 +4124,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs static void read_cruft_objects(void) { + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list discard_packs = STRING_LIST_INIT_DUP; struct string_list fresh_packs = STRING_LIST_INIT_DUP; @@ -4142,7 +4145,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -4390,11 +4393,12 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { + struct packfile_store *packs = the_repository->objects->packfiles; static struct packed_git *last_found = (void *)1; struct packed_git *p; p = (last_found != (void *)1) ? last_found : - get_all_packs(the_repository); + packfile_store_get_all_packs(packs); while (p) { if ((!p->pack_local || p->pack_keep || @@ -4404,7 +4408,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = get_all_packs(the_repository); + p = packfile_store_get_all_packs(packs); else p = p->next; if (p == last_found) @@ -4436,12 +4440,13 @@ static int loosened_object_can_be_discarded(const struct object_id *oid, static void loosen_unused_packed_objects(void) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; uint32_t i; uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4742,12 +4747,13 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) static void add_extra_kept_packs(const struct string_list *names) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; if (!names->nr) return; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *name = basename(p->pack_name); int i; @@ -5185,8 +5191,10 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -5198,8 +5206,10 @@ int cmd_pack_objects(int argc, * want to unset "local" based on looking at packs, as * it also covers non-local objects */ + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index fe81c293e3a..dd28171f0a1 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -566,7 +566,8 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packed_git *p = get_all_packs(the_repository); + struct packfile_store *packs = the_repository->objects->packfiles; + struct packed_git *p = packfile_store_get_all_packs(packs); if (strlen(filename) < 40) die("Bad pack filename: %s", filename); @@ -581,7 +582,8 @@ static struct pack_list * add_pack_file(const char *filename) static void load_all(void) { - struct packed_git *p = get_all_packs(the_repository); + struct packfile_store *packs = the_repository->objects->packfiles; + struct packed_git *p = packfile_store_get_all_packs(packs); while (p) { add_pack(p); diff --git a/builtin/repack.c b/builtin/repack.c index 5ff27fc8e29..e8730808c53 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -265,10 +265,11 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { int i; const char *base; @@ -497,10 +498,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip @@ -1137,11 +1139,12 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!(p->is_cruft && p->pack_local)) continue; diff --git a/connected.c b/connected.c index d6e9682fd93..b288a18b17c 100644 --- a/connected.c +++ b/connected.c @@ -74,9 +74,10 @@ int check_connected(oid_iterate_fn fn, void *cb_data, */ odb_reprepare(the_repository->objects); do { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_promisor) continue; if (find_pack_entry_one(oid, p)) diff --git a/http-backend.c b/http-backend.c index d5dfe762bb5..9084058f1e9 100644 --- a/http-backend.c +++ b/http-backend.c @@ -603,18 +603,19 @@ static void get_head(struct strbuf *hdr, char *arg UNUSED) static void get_info_packs(struct strbuf *hdr, char *arg UNUSED) { size_t objdirlen = strlen(repo_get_object_directory(the_repository)); + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct packed_git *p; size_t cnt = 0; select_getanyfile(hdr); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/http.c b/http.c index af2120b64c7..077e879de9e 100644 --- a/http.c +++ b/http.c @@ -2408,6 +2408,7 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) static int fetch_and_setup_pack_index(struct packed_git **packs_head, unsigned char *sha1, const char *base_url) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *new_pack, *p; char *tmp_idx = NULL; int ret; @@ -2416,7 +2417,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, * If we already have the pack locally, no need to fetch its index or * even add it to list; we already have all of its objects. */ - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (hasheq(p->hash, sha1, the_repository->hash_algo)) return 0; } diff --git a/pack-bitmap.c b/pack-bitmap.c index 058bdb5d7de..ac71035d771 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -664,7 +664,7 @@ static int open_pack_bitmap(struct repository *r, struct packed_git *p; int ret = -1; - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if (open_pack_bitmap_1(bitmap_git, p) == 0) { ret = 0; /* @@ -3362,7 +3362,7 @@ int verify_bitmap_files(struct repository *r) free(midx_bitmap_name); } - for (struct packed_git *p = get_all_packs(r); + for (struct packed_git *p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { char *pack_bitmap_name = pack_bitmap_filename(p); res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); diff --git a/pack-objects.c b/pack-objects.c index a9d9855063a..d8eb6797354 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -86,6 +86,7 @@ struct object_entry *packlist_find(struct packing_data *pdata, static void prepare_in_pack_by_idx(struct packing_data *pdata) { + struct packfile_store *packs = pdata->repo->objects->packfiles; struct packed_git **mapping, *p; int cnt = 0, nr = 1U << OE_IN_PACK_BITS; @@ -95,7 +96,7 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = get_all_packs(pdata->repo); p; p = p->next, cnt++) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next, cnt++) { if (cnt == nr) { free(mapping); return; diff --git a/packfile.c b/packfile.c index b37f43afb58..cd5431b6aa1 100644 --- a/packfile.c +++ b/packfile.c @@ -1033,11 +1033,11 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) return store->packs; } -struct packed_git *get_all_packs(struct repository *r) +struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); + packfile_store_prepare(store); - for (struct odb_source *source = r->objects->sources; source; source = source->next) { + for (struct odb_source *source = store->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -1045,7 +1045,7 @@ struct packed_git *get_all_packs(struct repository *r) prepare_midx_pack(m, i); } - return r->objects->packfiles->packs; + return store->packs; } struct list_head *get_packed_git_mru(struct repository *r) @@ -2105,7 +2105,7 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { ALLOC_GROW(packs, nr + 1, alloc); @@ -2202,7 +2202,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; - for (p = get_all_packs(repo); p; p = p->next) { + for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && diff --git a/packfile.h b/packfile.h index 0b691ded7ef..1afb9cd6641 100644 --- a/packfile.h +++ b/packfile.h @@ -142,6 +142,12 @@ void packfile_store_add_pack(struct packfile_store *store, */ struct packed_git *packfile_store_get_packs(struct packfile_store *store); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -227,7 +233,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, extern void (*report_garbage)(unsigned seen_bits, const char *path); struct list_head *get_packed_git_mru(struct repository *r); -struct packed_git *get_all_packs(struct repository *r); /* * Give a rough count of objects in the repository. This sacrifices accuracy diff --git a/server-info.c b/server-info.c index 9bb30d9ab71..1d33de821e9 100644 --- a/server-info.c +++ b/server-info.c @@ -287,12 +287,13 @@ static int compare_info(const void *a_, const void *b_) static void init_pack_info(struct repository *r, const char *infofile, int force) { + struct packfile_store *packs = r->objects->packfiles; struct packed_git *p; int stale; int i; size_t alloc = 0; - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index 611a13a3261..e001dc3066d 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -39,7 +39,7 @@ int cmd__find_pack(int argc, const char **argv) if (repo_get_oid(the_repository, argv[0], &oid)) die("cannot parse %s as an object name", argv[0]); - for (p = get_all_packs(the_repository); p; p = p->next) + for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) if (find_pack_entry_one(&oid, p)) { printf("%s\n", p->pack_name); actual_count++; diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index d51aaa3dc40..7c428c16011 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -37,7 +37,7 @@ int cmd__pack_mtimes(int argc, const char **argv) if (argc != 2) usage(pack_mtimes_usage); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) { strbuf_addstr(&buf, basename(p->pack_name)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".mtimes"); -- GitLab From c474d336be54a5f1f5661b3eeedf5145e4dca9cf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 13:08:53 +0200 Subject: [PATCH 26/26] packfile: refactor `get_packed_git_mru()` to work on packfile store The `get_packed_git_mru()` function prepares the packfile store and then returns its packfiles in most-recently-used order. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt --- builtin/pack-objects.c | 4 ++-- packfile.c | 6 +++--- packfile.h | 7 +++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index de351b757ae..61bbbdfb83f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,12 +1748,12 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, get_packed_git_mru(the_repository)) { + list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) list_move(&p->mru, - get_packed_git_mru(the_repository)); + packfile_store_get_packs_mru(the_repository->objects->packfiles)); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index cd5431b6aa1..5a7caec2925 100644 --- a/packfile.c +++ b/packfile.c @@ -1048,10 +1048,10 @@ struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) return store->packs; } -struct list_head *get_packed_git_mru(struct repository *r) +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); - return &r->objects->packfiles->mru; + packfile_store_prepare(store); + return &store->mru; } /* diff --git a/packfile.h b/packfile.h index 1afb9cd6641..e7a5792b6cf 100644 --- a/packfile.h +++ b/packfile.h @@ -148,6 +148,11 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store); */ struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); +/* + * Get all packs in most-recently-used order. + */ +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -232,8 +237,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -struct list_head *get_packed_git_mru(struct repository *r); - /* * Give a rough count of objects in the repository. This sacrifices accuracy * for speed. -- GitLab