diff --git a/Makefile b/Makefile index 4c95affadb5e265a98ffcf485f8c13a6c3a7e167..e3c4bf1b4aaaaf58740ee6716d11a70505b921c4 100644 --- a/Makefile +++ b/Makefile @@ -1136,6 +1136,12 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o +LIB_OBJS += repack-filtered.o +LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/backfill.c b/builtin/backfill.c index 80056abe4730ae6c81da5c6ad9e432be37f6dccc..e80fc1b694df616115ac4b6a538ed22a4e7a772b 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -53,7 +53,7 @@ static void download_batch(struct backfill_context *ctx) * We likely have a new packfile. Add it to the packed list to * avoid possible duplicate downloads of the same objects. */ - reprepare_packed_git(ctx->repo); + odb_reprepare(ctx->repo->objects); } static int fill_missing_blobs(const char *path UNUSED, diff --git a/builtin/cat-file.c b/builtin/cat-file.c index fce0b06451c5cdff3036797ab506dc30498bd68a..0ab076aeb30ad98e2e58dc7b444f1ea722202a89 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -854,7 +854,7 @@ static void batch_each_object(struct batch_options *opt, batch_one_object_bitmapped, &payload)) { struct packed_git *pack; - for (pack = get_all_packs(the_repository); pack; pack = pack->next) { + repo_for_each_pack(the_repository, pack) { if (bitmap_index_contains_pack(bitmap, pack) || open_pack_index(pack)) continue; diff --git a/builtin/count-objects.c b/builtin/count-objects.c index a61d3b46aac6273c3a7ad2c9ac5e5855c3c8bd22..18f6e33b6f913f9a5a4e4656bd9bdbf2c040a42a 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -129,7 +129,7 @@ int cmd_count_objects(int argc, struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2c35f9345d02d7c711ee522de5973b205c85ef51..fea914cf9eb7f265195a9cc9135d92f507017154 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -897,11 +897,11 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1); + new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + idx_name, 1); if (!new_p) die("core git rejected index %s", idx_name); all_packs[pack_id] = new_p; - install_packed_git(the_repository, new_p); free(idx_name); /* Print the boundary */ @@ -952,6 +952,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -975,7 +976,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1092,6 +1093,7 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); @@ -1175,7 +1177,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/builtin/fsck.c b/builtin/fsck.c index d2eb9d4fbe922bc6ada11573ef61c1731b97e162..b1a650c6731d3268c65c2204af1975fe23e6d226 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -868,18 +868,19 @@ static int mark_packed_for_connectivity(const struct object_id *oid, static int check_pack_rev_indexes(struct repository *r, int show_progress) { struct progress *progress = NULL; + struct packed_git *p; uint32_t pack_count = 0; int res = 0; if (show_progress) { - for (struct packed_git *p = get_all_packs(r); p; p = p->next) + repo_for_each_pack(r, p) pack_count++; progress = start_delayed_progress(the_repository, "Verifying reverse pack-indexes", pack_count); pack_count = 0; } - for (struct packed_git *p = get_all_packs(r); p; p = p->next) { + repo_for_each_pack(r, p) { int load_error = load_pack_revindex_from_disk(p); if (load_error < 0) { @@ -1009,8 +1010,7 @@ int cmd_fsck(int argc, struct progress *progress = NULL; if (show_progress) { - for (p = get_all_packs(the_repository); p; - p = p->next) { + repo_for_each_pack(the_repository, p) { if (open_pack_index(p)) continue; total += p->num_objects; @@ -1019,8 +1019,8 @@ int cmd_fsck(int argc, progress = start_progress(the_repository, _("Checking objects"), total); } - for (p = get_all_packs(the_repository); p; - p = p->next) { + + repo_for_each_pack(the_repository, p) { /* verify gives error messages itself */ if (verify_pack(the_repository, p, fsck_obj_buffer, diff --git a/builtin/gc.c b/builtin/gc.c index 03ae4926b209820a9cd83520d18b067bc714500c..541d7471f190728c5ca3100ae129572d02497e50 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -489,7 +489,7 @@ static struct packed_git *find_base_packs(struct string_list *packs, { struct packed_git *p, *base = NULL; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->is_cruft) continue; if (limit) { @@ -509,12 +509,12 @@ static struct packed_git *find_base_packs(struct string_list *packs, static int too_many_packs(struct gc_config *cfg) { struct packed_git *p; - int cnt; + int cnt = 0; if (cfg->gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) continue; if (p->pack_keep) @@ -1042,7 +1042,7 @@ int cmd_gc(int argc, die(FAILED_RUN, "rerere"); report_garbage = report_pack_garbage; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { close_object_store(the_repository->objects); clean_pack_garbage(); @@ -1423,9 +1423,9 @@ static int incremental_repack_auto_condition(struct gc_config *cfg UNUSED) if (incremental_repack_auto_limit < 0) return 1; - for (p = get_packed_git(the_repository); - count < incremental_repack_auto_limit && p; - p = p->next) { + repo_for_each_pack(the_repository, p) { + if (count >= incremental_repack_auto_limit) + break; if (!p->multi_pack_index) count++; } @@ -1491,8 +1491,8 @@ static off_t get_auto_pack_size(void) struct packed_git *p; struct repository *r = the_repository; - reprepare_packed_git(r); - for (p = get_all_packs(r); p; p = p->next) { + odb_reprepare(r->objects); + repo_for_each_pack(r, p) { if (p->pack_size > max_size) { second_largest_size = max_size; max_size = p->pack_size; diff --git a/builtin/grep.c b/builtin/grep.c index 1d97eb2a2ab2cbc7792fb37d15238daf11580a4e..53cccf2d25068c664c27643873b8c6d7fe99d848 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1214,7 +1214,7 @@ int cmd_grep(int argc, if (recurse_submodules) repo_read_gitmodules(the_repository, 1); if (startup_info->have_repository) - (void)get_packed_git(the_repository); + packfile_store_prepare(the_repository->objects->packfiles); start_threads(&opt); } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f91c301bba9fbd9171aba65d708d27f9e58c8012..2b78ba7fe4d14a8bcb12165395ff02deeb8c047f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,13 +1640,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) { - struct packed_git *p; - p = add_packed_git(the_repository, final_index_name, - strlen(final_index_name), 0); - if (p) - install_packed_git(the_repository, p); - } + if (do_fsck_object) + packfile_store_load_pack(the_repository->objects->packfiles, + final_index_name, 0); if (!from_stdin) { printf("%s\n", hash_to_hex(hash)); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5856b5f6bfb6235f366e3f0d7558757b21e51173..3a19bddd574ef219ebbabb1cfcd0cced77267fd1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,12 +1748,12 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, get_packed_git_mru(the_repository)) { + list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) list_move(&p->mru, - get_packed_git_mru(the_repository)); + packfile_store_get_packs_mru(the_repository->objects->packfiles)); if (want != -1) return want; } @@ -3835,7 +3835,6 @@ static void read_packs_list_from_stdin(struct rev_info *revs) struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; struct string_list_item *item = NULL; - struct packed_git *p; while (strbuf_getline(&buf, stdin) != EOF) { @@ -3855,7 +3854,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -4105,7 +4104,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = get_all_packs(the_repository); p; p = p->next) + repo_for_each_pack(the_repository, p) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); @@ -4142,7 +4141,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -4390,11 +4389,12 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { + struct packfile_store *packs = the_repository->objects->packfiles; static struct packed_git *last_found = (void *)1; struct packed_git *p; p = (last_found != (void *)1) ? last_found : - get_all_packs(the_repository); + packfile_store_get_packs(packs); while (p) { if ((!p->pack_local || p->pack_keep || @@ -4404,7 +4404,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = get_all_packs(the_repository); + p = packfile_store_get_packs(packs); else p = p->next; if (p == last_found) @@ -4441,7 +4441,7 @@ static void loosen_unused_packed_objects(void) uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4747,7 +4747,7 @@ static void add_extra_kept_packs(const struct string_list *names) if (!names->nr) return; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *name = basename(p->pack_name); int i; @@ -5186,7 +5186,8 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) + + repo_for_each_pack(the_repository, p) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -5199,7 +5200,8 @@ int cmd_pack_objects(int argc, * it also covers non-local objects */ struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + + repo_for_each_pack(the_repository, p) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index fe81c293e3af6f6b1db03c8e27b5283af8c7d3fe..fca7f195d6d4e0295166763580e78d9ffcc78fc3 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -566,27 +566,23 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packed_git *p = get_all_packs(the_repository); + struct packed_git *p; if (strlen(filename) < 40) die("Bad pack filename: %s", filename); - while (p) { + repo_for_each_pack(the_repository, p) if (strstr(p->pack_name, filename)) return add_pack(p); - p = p->next; - } die("Filename %s not found in packed_git", filename); } static void load_all(void) { - struct packed_git *p = get_all_packs(the_repository); + struct packed_git *p; - while (p) { + repo_for_each_pack(the_repository, p) add_pack(p); - p = p->next; - } } int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) { diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 1113137a6f0b3f961fd02822154a41bcdedb6657..c9288a9c7e382bd19193d8b94268e0d0bec2c7f8 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2389,7 +2389,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) status = finish_command(&child); if (status) return "index-pack abnormal exit"; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); } return NULL; } diff --git a/builtin/repack.c b/builtin/repack.c index c490a51e9192da493dffcbd9b3051bb1fcd90b7b..ad60c4290d44c284a6ec5ea3a8c60715d658cb23 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,27 +3,18 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 @@ -33,8 +24,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -53,31 +42,23 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; }; static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -116,1135 +97,10 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - -struct existing_packs { - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int has_existing_non_kept_packs(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void pack_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void mark_packs_for_deletion_1(struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - pack_mark_for_deletion(item); - } - } -} - -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - pack_mark_retained(item); - strbuf_release(&buf); -} - -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); -} - -static void remove_redundant_pack(const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - -static void remove_redundant_packs_1(struct string_list *packs) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) - continue; - remove_redundant_pack(packdir, item->string); - } -} - -static void remove_redundant_existing_packs(struct existing_packs *existing) -{ - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void collect_pack_filenames(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = get_all_packs(the_repository); p; p = p->next) { - int i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct child_process *cmd = data; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack_data { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack_data *populate_pack_exts(const char *name) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - data->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return data; -} - -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!data->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void repack_promisor_objects(const struct pack_objects_args *args, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - for_each_packed_object(the_repository, write_oid, &cmd, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != the_hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = populate_pack_exts(item->string); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t geometry_pack_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void init_pack_geometry(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args) -{ - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = get_all_packs(the_repository); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); - strbuf_release(&buf); -} - -static void split_pack_geometry(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (geometry_pack_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += geometry_pack_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) -{ - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - remove_redundant_pack(packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void free_pack_geometry(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, - struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - size_t i; - - string_list_sort(include); - - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -struct midx_snapshot_ref_data { - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); - - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(the_repository), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - -static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, - struct string_list *names, - struct pack_geometry *geometry) -{ - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). - */ - if (pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); - FILE *in; - int ret; - - if (!include->nr) - return 0; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); - - ret = start_command(&cmd); - if (ret) - return ret; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) - fprintf(in, "%s\n", item->string); - fclose(in); - - return finish_command(&cmd); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int finish_pack_objects_cmd(struct child_process *cmd, - struct string_list *names, - int local) -{ - FILE *out; - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != the_hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); - - prepare_pack_objects(&cmd, args, destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(&cmd, names, local); -} - -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = get_all_packs(the_repository); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - retain_cruft_pack(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); - - prepare_pack_objects(&cmd, args, destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--honor-pack-keep"); - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(&cmd, names, local); -} - -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1252,18 +108,18 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + struct write_pack_opts opts = { 0 }; + int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1324,7 +180,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -1341,7 +197,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1351,7 +211,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1366,14 +226,14 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1388,26 +248,28 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); - collect_pack_filenames(&existing, &keep_pack_list); + existing.repo = repo; + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1415,8 +277,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -1436,7 +296,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1448,9 +308,9 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1512,7 +372,10 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + opts.packdir = packdir; + opts.destination = packdir; + opts.packtmp = packtmp; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, &names); if (ret) goto cleanup; @@ -1532,12 +395,17 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -1552,9 +420,10 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -1589,11 +458,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; @@ -1601,99 +467,63 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; - - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .packdir = packdir, + .packtmp = packtmp, + }; + + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); - - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); - } - - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&data->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install((struct generated_pack *)item->util, + item->string, packdir, packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); - - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); - - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); - - string_list_clear(&include, 0); + struct repack_write_midx_opts opts = { + .existing = &existing, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; + + ret = write_midx_included_packs(&opts); if (ret) goto cleanup; } - reprepare_packed_git(the_repository); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1701,18 +531,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } @@ -1720,10 +550,7 @@ int cmd_repack(int argc, string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); + pack_geometry_release(&geometry); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); diff --git a/bulk-checkin.c b/bulk-checkin.c index 124c49306769a5bc0fa568dc606ce5eb7370bf7e..2713a0099966c3b1762b76ecb2e768207b93a980 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -95,7 +95,7 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(repo); + odb_reprepare(repo->objects); } /* diff --git a/connected.c b/connected.c index 18c13245d8e40c66a22fa5d7c2d139b85e5113c2..79403108dd8f574af7fcdb478975b17ceba76d25 100644 --- a/connected.c +++ b/connected.c @@ -72,11 +72,11 @@ int check_connected(oid_iterate_fn fn, void *cb_data, * Before checking for promisor packs, be sure we have the * latest pack-files loaded into memory. */ - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); do { struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_promisor) continue; if (find_pack_entry_one(oid, p)) diff --git a/fetch-pack.c b/fetch-pack.c index 6ed566295189dcfa57291e55fc452b1a28d39e4e..fe7a84bf2f97fa89882bbfda079d346c7de21ddd 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1983,7 +1983,7 @@ static void update_shallow(struct fetch_pack_args *args, * remote is shallow, but this is a clone, there are * no objects in repo to worry about. Accept any * shallow points that exist in the pack (iow in repo - * after get_pack() and reprepare_packed_git()) + * after get_pack() and odb_reprepare()) */ struct oid_array extra = OID_ARRAY_INIT; struct object_id *oid = si->shallow->oid; @@ -2108,7 +2108,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, &si, pack_lockfiles); } - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (!args->cloning && args->deepen) { struct check_connected_options opt = CHECK_CONNECTED_INIT; diff --git a/http-backend.c b/http-backend.c index d5dfe762bb51783cf6f81de8cfa79075d26075c3..52f0483dd309d73a1866a1b63161df0fe63d389f 100644 --- a/http-backend.c +++ b/http-backend.c @@ -608,13 +608,13 @@ static void get_info_packs(struct strbuf *hdr, char *arg UNUSED) size_t cnt = 0; select_getanyfile(hdr); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/http.c b/http.c index a7d55dcbbab769859c26da9e1e13eac594e9379e..17130823f006f2cc351a186cba0030d4c08769bc 100644 --- a/http.c +++ b/http.c @@ -2424,7 +2424,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, * If we already have the pack locally, no need to fetch its index or * even add it to list; we already have all of its objects. */ - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (hasheq(p->hash, sha1, the_repository->hash_algo)) return 0; } @@ -2549,7 +2549,7 @@ void http_install_packfile(struct packed_git *p, lst = &((*lst)->next); *lst = (*lst)->next; - install_packed_git(the_repository, p); + packfile_store_add_pack(the_repository->objects->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/http.h b/http.h index 36202139f451ffa1e5b2cbaeae49d0875a4f76a7..e5a5380c6c384e56966d12d0c4d5a9d2e12654c0 100644 --- a/http.h +++ b/http.h @@ -210,7 +210,7 @@ int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); /* - * Remove p from the given list, and invoke install_packed_git() on it. + * Remove p from the given list, and invoke packfile_store_add_pack() on it. * * This is a convenience function for users that have obtained a list of packs * from http_get_info_packs() and have chosen a specific pack to fetch. diff --git a/meson.build b/meson.build index b3dfcc04972601cbc3d05222f72070ddf0c2356f..39152b37ba10a646a7980f81de496aba716c61a2 100644 --- a/meson.build +++ b/meson.build @@ -462,6 +462,12 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', + 'repack-cruft.c', + 'repack-filtered.c', + 'repack-geometry.c', + 'repack-midx.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/midx.c b/midx.c index 7726c13d7e7bc04b9805589b37dd4b3dfa942910..1d6269f957e7819fb62963cde191360422b29229 100644 --- a/midx.c +++ b/midx.c @@ -93,6 +93,12 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, return 0; } +struct multi_pack_index *get_multi_pack_index(struct odb_source *source) +{ + packfile_store_prepare(source->odb->packfiles); + return source->midx; +} + static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, const char *midx_name) { @@ -443,7 +449,6 @@ int prepare_midx_pack(struct multi_pack_index *m, { struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; - struct strbuf key = STRBUF_INIT; struct packed_git *p; pack_int_id = midx_for_pack(&m, pack_int_id); @@ -455,25 +460,11 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - - /* pack_map holds the ".pack" name, but we have the .idx */ - strbuf_addbuf(&key, &pack_name); - strbuf_strip_suffix(&key, ".idx"); - strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&r->objects->pack_map, - strhash(key.buf), key.buf, - struct packed_git, packmap_ent); - if (!p) { - p = add_packed_git(r, pack_name.buf, pack_name.len, - m->source->local); - if (p) { - install_packed_git(r, p); - list_add_tail(&p->mru, &r->objects->packed_git_mru); - } - } - + p = packfile_store_load_pack(r->objects->packfiles, + pack_name.buf, m->source->local); + if (p) + list_add_tail(&p->mru, &r->objects->packfiles->mru); strbuf_release(&pack_name); - strbuf_release(&key); if (!p) { m->packs[pack_int_id] = MIDX_PACK_ERROR; diff --git a/midx.h b/midx.h index e241d2d6900bc3a2946e973a3d37df74aa3cbc4f..6e54d73503d56088923ceff1c69e1f8d6013ac12 100644 --- a/midx.h +++ b/midx.h @@ -94,6 +94,7 @@ void get_midx_chain_filename(struct odb_source *source, struct strbuf *out); void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, const unsigned char *hash, const char *ext); +struct multi_pack_index *get_multi_pack_index(struct odb_source *source); struct multi_pack_index *load_multi_pack_index(struct odb_source *source); int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); struct packed_git *nth_midxed_pack(struct multi_pack_index *m, diff --git a/object-name.c b/object-name.c index 7774991d2813e86d08cc0bd20f370148a36ffe59..766c757042a38950f7b498dc7fa92b72befea264 100644 --- a/object-name.c +++ b/object-name.c @@ -213,9 +213,11 @@ static void find_short_packed_object(struct disambiguate_state *ds) unique_in_midx(m, ds); } - for (p = get_packed_git(ds->repo); p && !ds->ambiguous; - p = p->next) + repo_for_each_pack(ds->repo, p) { + if (ds->ambiguous) + break; unique_in_pack(p, ds); + } } static int finish_object_disambiguation(struct disambiguate_state *ds, @@ -596,7 +598,7 @@ static enum get_oid_result get_short_oid(struct repository *r, * or migrated from loose to packed. */ if (status == MISSING_OBJECT) { - reprepare_packed_git(r); + odb_reprepare(r->objects); find_short_object_filename(&ds); find_short_packed_object(&ds); status = finish_object_disambiguation(&ds, oid); @@ -805,7 +807,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) find_abbrev_len_for_midx(m, mad); } - for (p = get_packed_git(mad->repo); p; p = p->next) + repo_for_each_pack(mad->repo, p) find_abbrev_len_for_pack(p, mad); } diff --git a/odb.c b/odb.c index 75c443fe665be5ba3c857e420d61fa80b83421b9..65a6cc67b61ccf99e7b7293906e6772f2927da9b 100644 --- a/odb.c +++ b/odb.c @@ -694,7 +694,7 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { - reprepare_packed_git(odb->repo); + odb_reprepare(odb->repo->objects); if (find_pack_entry(odb->repo, real, &e)) break; } @@ -996,8 +996,7 @@ struct object_database *odb_new(struct repository *repo) memset(o, 0, sizeof(*o)); o->repo = repo; - INIT_LIST_HEAD(&o->packed_git_mru); - hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); + o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); return o; @@ -1035,19 +1034,34 @@ void odb_clear(struct object_database *o) free((char *) o->cached_objects[i].value.buf); FREE_AND_NULL(o->cached_objects); - INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); + packfile_store_free(o->packfiles); + o->packfiles = NULL; + + string_list_clear(&o->submodule_source_paths, 0); +} + +void odb_reprepare(struct object_database *o) +{ + struct odb_source *source; + + obj_read_lock(); /* - * `close_object_store()` only closes the packfiles, but doesn't free - * them. We thus have to do this manually. + * Reprepare alt odbs, in case the alternates file was modified + * during the course of this process. This only _adds_ odbs to + * the linked list, so existing odbs will continue to exist for + * the lifetime of the process. */ - for (struct packed_git *p = o->packed_git, *next; p; p = next) { - next = p->next; - free(p); - } - o->packed_git = NULL; + o->loaded_alternates = 0; + odb_prepare_alternates(o); - hashmap_clear(&o->pack_map); - string_list_clear(&o->submodule_source_paths, 0); + for (source = o->sources; source; source = source->next) + odb_clear_loose_cache(source); + + o->approximate_object_count_valid = 0; + + packfile_store_reprepare(o->packfiles); + + obj_read_unlock(); } diff --git a/odb.h b/odb.h index bd7374f92f492b6e0b75178e04f8b65b336acf00..cf96a5e834e096c3e545615c833a064d6e8c96f0 100644 --- a/odb.h +++ b/odb.h @@ -3,7 +3,6 @@ #include "hashmap.h" #include "object.h" -#include "list.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -91,6 +90,7 @@ struct odb_source { }; struct packed_git; +struct packfile_store; struct cached_object_entry; struct odb_transaction; @@ -139,20 +139,8 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* - * private data - * - * should only be accessed directly by packfile.c - */ - - struct packed_git *packed_git; - /* A most-recently-used ordered version of the packed_git list. */ - struct list_head packed_git_mru; - - struct { - struct packed_git **packs; - unsigned flags; - } kept_pack_cache; + /* Should only be accessed directly by packfile.c and midx.c. */ + struct packfile_store *packfiles; /* * This is meant to hold a *small* number of objects that you would @@ -163,12 +151,6 @@ struct object_database { struct cached_object_entry *cached_objects; size_t cached_object_nr, cached_object_alloc; - /* - * A map of packfiles to packed_git structs for tracking which - * packs have been loaded already. - */ - struct hashmap pack_map; - /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use @@ -177,12 +159,6 @@ struct object_database { unsigned long approximate_object_count; unsigned approximate_object_count_valid : 1; - /* - * Whether packed_git has already been populated with this repository's - * packs. - */ - unsigned packed_git_initialized : 1; - /* * Submodule source paths that will be added as additional sources to * allow lookup of submodule objects via the main object database. @@ -193,6 +169,12 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Clear caches, reload alternates and then reload object sources so that new + * objects may become accessible. + */ +void odb_reprepare(struct object_database *o); + /* * Find source by its object directory path. Returns a `NULL` pointer in case * the source could not be found. diff --git a/pack-bitmap.c b/pack-bitmap.c index 058bdb5d7ded0b46337015bd05f3b9ca7ebf2ab5..291e1a9cf471583aeed5b928288ca6c2f5d1ceaa 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -664,7 +664,7 @@ static int open_pack_bitmap(struct repository *r, struct packed_git *p; int ret = -1; - for (p = get_all_packs(r); p; p = p->next) { + repo_for_each_pack(r, p) { if (open_pack_bitmap_1(bitmap_git, p) == 0) { ret = 0; /* @@ -3347,6 +3347,7 @@ static int verify_bitmap_file(const struct git_hash_algo *algop, int verify_bitmap_files(struct repository *r) { struct odb_source *source; + struct packed_git *p; int res = 0; odb_prepare_alternates(r->objects); @@ -3362,8 +3363,7 @@ int verify_bitmap_files(struct repository *r) free(midx_bitmap_name); } - for (struct packed_git *p = get_all_packs(r); - p; p = p->next) { + repo_for_each_pack(r, p) { char *pack_bitmap_name = pack_bitmap_filename(p); res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); free(pack_bitmap_name); diff --git a/pack-objects.c b/pack-objects.c index a9d9855063aea85f4b1b8f70f301c644c5d5e225..d6adf0759ccd77baa97c8b99e77fd7ac75921939 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -95,13 +95,13 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = get_all_packs(pdata->repo); p; p = p->next, cnt++) { + repo_for_each_pack(pdata->repo, p) { if (cnt == nr) { free(mapping); return; } p->index = cnt; - mapping[cnt] = p; + mapping[cnt++] = p; } pdata->in_pack_by_idx = mapping; } diff --git a/packfile.c b/packfile.c index acb680966dacf913b172873c6f9dfbd1a8f4157e..1ae2b2fe1eda77a57f007f78726724bd26f6a743 100644 --- a/packfile.c +++ b/packfile.c @@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current) if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packed_git; p; p = p->next) + for (p = current->repo->objects->packfiles->packs; p; p = p->next) scan_windows(p, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -362,13 +362,8 @@ void close_pack(struct packed_git *p) void close_object_store(struct object_database *o) { struct odb_source *source; - struct packed_git *p; - for (p = o->packed_git; p; p = p->next) - if (p->do_not_close) - BUG("want to close pack marked 'do-not-close'"); - else - close_pack(p); + packfile_store_close(o->packfiles); for (source = o->sources; source; source = source->next) { if (source->midx) @@ -468,7 +463,7 @@ static int close_one_pack(struct repository *r) struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packed_git; p; p = p->next) { + for (p = r->objects->packfiles->packs; p; p = p->next) { if (p->pack_fd == -1) continue; find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); @@ -784,16 +779,44 @@ struct packed_git *add_packed_git(struct repository *r, const char *path, return p; } -void install_packed_git(struct repository *r, struct packed_git *pack) +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack) { if (pack->pack_fd != -1) pack_open_fds++; - pack->next = r->objects->packed_git; - r->objects->packed_git = pack; + pack->next = store->packs; + store->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&r->objects->pack_map, &pack->packmap_ent); + hashmap_add(&store->map, &pack->packmap_ent); +} + +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local) +{ + struct strbuf key = STRBUF_INIT; + struct packed_git *p; + + /* + * We're being called with the path to the index file, but `pack_map` + * holds the path to the packfile itself. + */ + strbuf_addstr(&key, idx_path); + strbuf_strip_suffix(&key, ".idx"); + strbuf_addstr(&key, ".pack"); + + p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, + struct packed_git, packmap_ent); + if (!p) { + p = add_packed_git(store->odb->repo, idx_path, + strlen(idx_path), local); + if (p) + packfile_store_add_pack(store, p); + } + + strbuf_release(&key); + return p; } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -895,23 +918,14 @@ static void prepare_pack(const char *full_name, size_t full_name_len, const char *file_name, void *_data) { struct prepare_pack_data *data = (struct prepare_pack_data *)_data; - struct packed_git *p; size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->m && midx_contains_pack(data->m, file_name))) { - struct hashmap_entry hent; - char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name); - unsigned int hash = strhash(pack_name); - hashmap_entry_init(&hent, hash); - - /* Don't reopen a pack we already have. */ - if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) { - p = add_packed_git(data->r, full_name, full_name_len, data->local); - if (p) - install_packed_git(data->r, p); - } - free(pack_name); + char *trimmed_path = xstrndup(full_name, full_name_len); + packfile_store_load_pack(data->r->objects->packfiles, + trimmed_path, data->local); + free(trimmed_path); } if (!report_garbage) @@ -951,40 +965,6 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -static void prepare_packed_git(struct repository *r); -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) -{ - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - struct packed_git *p; - - prepare_packed_git(r); - - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects; - } - - for (p = r->objects->packed_git; p; p = p->next) { - if (open_pack_index(p)) - continue; - count += p->num_objects; - } - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; - } - return r->objects->approximate_object_count; -} - DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); static int sort_pack(const struct packed_git *a, const struct packed_git *b) @@ -1013,80 +993,45 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) return -1; } -static void rearrange_packed_git(struct repository *r) -{ - sort_packs(&r->objects->packed_git, sort_pack); -} - -static void prepare_packed_git_mru(struct repository *r) +static void packfile_store_prepare_mru(struct packfile_store *store) { struct packed_git *p; - INIT_LIST_HEAD(&r->objects->packed_git_mru); + INIT_LIST_HEAD(&store->mru); - for (p = r->objects->packed_git; p; p = p->next) - list_add_tail(&p->mru, &r->objects->packed_git_mru); + for (p = store->packs; p; p = p->next) + list_add_tail(&p->mru, &store->mru); } -static void prepare_packed_git(struct repository *r) +void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; - if (r->objects->packed_git_initialized) + if (store->initialized) return; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { + odb_prepare_alternates(store->odb); + for (source = store->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - rearrange_packed_git(r); + sort_packs(&store->packs, sort_pack); - prepare_packed_git_mru(r); - r->objects->packed_git_initialized = 1; + packfile_store_prepare_mru(store); + store->initialized = true; } -void reprepare_packed_git(struct repository *r) +void packfile_store_reprepare(struct packfile_store *store) { - struct odb_source *source; - - obj_read_lock(); - - /* - * Reprepare alt odbs, in case the alternates file was modified - * during the course of this process. This only _adds_ odbs to - * the linked list, so existing odbs will continue to exist for - * the lifetime of the process. - */ - r->objects->loaded_alternates = 0; - odb_prepare_alternates(r->objects); - - for (source = r->objects->sources; source; source = source->next) - odb_clear_loose_cache(source); - - r->objects->approximate_object_count_valid = 0; - r->objects->packed_git_initialized = 0; - prepare_packed_git(r); - obj_read_unlock(); + store->initialized = false; + packfile_store_prepare(store); } -struct packed_git *get_packed_git(struct repository *r) +struct packed_git *packfile_store_get_packs(struct packfile_store *store) { - prepare_packed_git(r); - return r->objects->packed_git; -} - -struct multi_pack_index *get_multi_pack_index(struct odb_source *source) -{ - prepare_packed_git(source->odb->repo); - return source->midx; -} + packfile_store_prepare(store); -struct packed_git *get_all_packs(struct repository *r) -{ - prepare_packed_git(r); - - for (struct odb_source *source = r->objects->sources; source; source = source->next) { + for (struct odb_source *source = store->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -1094,13 +1039,46 @@ struct packed_git *get_all_packs(struct repository *r) prepare_midx_pack(m, i); } - return r->objects->packed_git; + return store->packs; +} + +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) +{ + packfile_store_prepare(store); + return &store->mru; } -struct list_head *get_packed_git_mru(struct repository *r) +/* + * Give a fast, rough count of the number of objects in the repository. This + * ignores loose objects completely. If you have a lot of them, then either + * you should repack because your performance will be awful, or they are + * all unreachable objects about to be pruned, in which case they're not really + * interesting as a measure of repo size in the first place. + */ +unsigned long repo_approximate_object_count(struct repository *r) { - prepare_packed_git(r); - return &r->objects->packed_git_mru; + if (!r->objects->approximate_object_count_valid) { + struct odb_source *source; + unsigned long count = 0; + struct packed_git *p; + + packfile_store_prepare(r->objects->packfiles); + + for (source = r->objects->sources; source; source = source->next) { + struct multi_pack_index *m = get_multi_pack_index(source); + if (m) + count += m->num_objects; + } + + for (p = r->objects->packfiles->packs; p; p = p->next) { + if (open_pack_index(p)) + continue; + count += p->num_objects; + } + r->objects->approximate_object_count = count; + r->objects->approximate_object_count_valid = 1; + } + return r->objects->approximate_object_count; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -1155,7 +1133,7 @@ unsigned long get_size_from_delta(struct packed_git *p, * * Other worrying sections could be the call to close_pack_fd(), * which can close packs even with in-use windows, and to - * reprepare_packed_git(). Regarding the former, mmap doc says: + * odb_reprepare(). Regarding the former, mmap doc says: * "closing the file descriptor does not unmap the region". And * for the latter, it won't re-open already available packs. */ @@ -1219,7 +1197,7 @@ const struct packed_git *has_packed_and_bad(struct repository *r, { struct packed_git *p; - for (p = r->objects->packed_git; p; p = p->next) + for (p = r->objects->packfiles->packs; p; p = p->next) if (oidset_contains(&p->bad_objects, oid)) return p; return NULL; @@ -2074,19 +2052,19 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa { struct list_head *pos; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (struct odb_source *source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packed_git) + if (!r->objects->packfiles->packs) return 0; - list_for_each(pos, &r->objects->packed_git_mru) { + list_for_each(pos, &r->objects->packfiles->mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packed_git_mru); + list_move(&p->mru, &r->objects->packfiles->mru); return 1; } } @@ -2096,19 +2074,19 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { - if (!r->objects->kept_pack_cache.packs) + if (!r->objects->packfiles->kept_cache.packs) return; - if (r->objects->kept_pack_cache.flags == flags) + if (r->objects->packfiles->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->kept_pack_cache.packs); - r->objects->kept_pack_cache.flags = 0; + FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); + r->objects->packfiles->kept_cache.flags = 0; } struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) { maybe_invalidate_kept_pack_cache(r, flags); - if (!r->objects->kept_pack_cache.packs) { + if (!r->objects->packfiles->kept_cache.packs) { struct packed_git **packs = NULL; size_t nr = 0, alloc = 0; struct packed_git *p; @@ -2121,7 +2099,7 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - for (p = get_all_packs(r); p; p = p->next) { + repo_for_each_pack(r, p) { if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { ALLOC_GROW(packs, nr + 1, alloc); @@ -2131,11 +2109,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->kept_pack_cache.packs = packs; - r->objects->kept_pack_cache.flags = flags; + r->objects->packfiles->kept_cache.packs = packs; + r->objects->packfiles->kept_cache.flags = flags; } - return r->objects->kept_pack_cache.packs; + return r->objects->packfiles->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, @@ -2218,7 +2196,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; - for (p = get_all_packs(repo); p; p = p->next) { + repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && @@ -2332,3 +2310,46 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l *len = hdr - out; return 0; } + +static int pack_map_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *entry, + const struct hashmap_entry *entry2, + const void *keydata) +{ + const char *key = keydata; + const struct packed_git *pg1, *pg2; + + pg1 = container_of(entry, const struct packed_git, packmap_ent); + pg2 = container_of(entry2, const struct packed_git, packmap_ent); + + return strcmp(pg1->pack_name, key ? key : pg2->pack_name); +} + +struct packfile_store *packfile_store_new(struct object_database *odb) +{ + struct packfile_store *store; + CALLOC_ARRAY(store, 1); + store->odb = odb; + INIT_LIST_HEAD(&store->mru); + hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); + return store; +} + +void packfile_store_free(struct packfile_store *store) +{ + for (struct packed_git *p = store->packs, *next; p; p = next) { + next = p->next; + free(p); + } + hashmap_clear(&store->map); + free(store); +} + +void packfile_store_close(struct packfile_store *store) +{ + for (struct packed_git *p = store->packs; p; p = p->next) { + if (p->do_not_close) + BUG("want to close pack marked 'do-not-close'"); + close_pack(p); + } +} diff --git a/packfile.h b/packfile.h index f16753f2a9bb4c18e832041a9145a3a5c79f492c..c9d0b93446b5f569aadb12f48fce8ffc6aace59d 100644 --- a/packfile.h +++ b/packfile.h @@ -52,19 +52,116 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; -static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; +/* + * A store that manages packfiles for a given object database. + */ +struct packfile_store { + struct object_database *odb; - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); + /* + * The list of packfiles in the order in which they are being added to + * the store. + */ + struct packed_git *packs; - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} + /* + * Cache of packfiles which are marked as "kept", either because there + * is an on-disk ".keep" file or because they are marked as "kept" in + * memory. + * + * Should not be accessed directly, but via `kept_pack_cache()`. The + * list of packs gets invalidated when the stored flags and the flags + * passed to `kept_pack_cache()` mismatch. + */ + struct { + struct packed_git **packs; + unsigned flags; + } kept_cache; + + /* A most-recently-used ordered version of the packs list. */ + struct list_head mru; + + /* + * A map of packfile names to packed_git structs for tracking which + * packs have been loaded already. + */ + struct hashmap map; + + /* + * Whether packfiles have already been populated with this store's + * packs. + */ + bool initialized; +}; + +/* + * Allocate and initialize a new empty packfile store for the given object + * database. + */ +struct packfile_store *packfile_store_new(struct object_database *odb); + +/* + * Free the packfile store and all its associated state. All packfiles + * tracked by the store will be closed. + */ +void packfile_store_free(struct packfile_store *store); + +/* + * Close all packfiles associated with this store. The packfiles won't be + * free'd, so they can be re-opened at a later point in time. + */ +void packfile_store_close(struct packfile_store *store); + +/* + * Prepare the packfile store by loading packfiles and multi-pack indices for + * all alternates. This becomes a no-op if the store is already prepared. + * + * It shouldn't typically be necessary to call this function directly, as + * functions that access the store know to prepare it. + */ +void packfile_store_prepare(struct packfile_store *store); + +/* + * Clear the packfile caches and try to look up any new packfiles that have + * appeared since last preparing the packfiles store. + * + * This function must be called under the `odb_read_lock()`. + */ +void packfile_store_reprepare(struct packfile_store *store); + +/* + * Add the pack to the store so that contained objects become accessible via + * the store. This moves ownership into the store. + */ +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack); + +/* + * Load and iterate through all packs of the given repository. This helper + * function will yield packfiles from all object sources connected to the + * repository. + */ +#define repo_for_each_pack(repo, p) \ + for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) + +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packed_git *packfile_store_get_packs(struct packfile_store *store); + +/* + * Get all packs in most-recently-used order. + */ +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); + +/* + * Open the packfile and add it to the store if it isn't yet known. Returns + * either the newly opened packfile or the preexisting packfile. Returns a + * `NULL` pointer in case the packfile could not be opened. + */ +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local); struct pack_window { struct pack_window *next; @@ -142,14 +239,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -void reprepare_packed_git(struct repository *r); -void install_packed_git(struct repository *r, struct packed_git *pack); - -struct packed_git *get_packed_git(struct repository *r); -struct list_head *get_packed_git_mru(struct repository *r); -struct multi_pack_index *get_multi_pack_index(struct odb_source *source); -struct packed_git *get_all_packs(struct repository *r); - /* * Give a rough count of objects in the repository. This sacrifices accuracy * for speed. diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 0000000000000000000000000000000000000000..544a6f2df3a9e91b80618fda6d089a85736af555 --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,98 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + repo_for_each_pack(existing->repo, p) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 0000000000000000000000000000000000000000..96c7b02bb6c6997868b2ee751e22a5bef6ebc3df --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 0000000000000000000000000000000000000000..4e51003ecccf067b5c5ea4748e13ac0ff3a77d16 --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,232 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include +#include "repack.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args) +{ + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + repo_for_each_pack(existing->repo, p) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!args->pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 0000000000000000000000000000000000000000..6f6202c5bccd89a9087bd86a80b7f56d0678f7fb --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,372 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "run-command.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 0000000000000000000000000000000000000000..8bf42fc7157d2882e648c54569a19d3ea2324c9b --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "run-command.h" +#include "hex.h" +#include "repository.h" +#include "packfile.h" +#include "path.h" +#include "pack.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.c b/repack.c new file mode 100644 index 0000000000000000000000000000000000000000..d690e6871844b0cc63e902a91f4bb63b022ff09b --- /dev/null +++ b/repack.c @@ -0,0 +1,360 @@ +#include "git-compat-util.h" +#include "dir.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" +#include "path.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "tempfile.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} + +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + +int write_pack_opts_is_local(struct write_pack_opts *opts) +{ + const char *scratch; + return skip_prefix(opts->destination, opts->packdir, &scratch); +} + +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + int local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + repo_for_each_pack(existing->repo, p) { + size_t i; + const char *base; + + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); +} + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h new file mode 100644 index 0000000000000000000000000000000000000000..15886c348862d52a36552806e9c69f00e28d5885 --- /dev/null +++ b/repack.h @@ -0,0 +1,146 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" +#include "string-list.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + int delta_base_offset; + int pack_kept_objects; + struct list_objects_filter_options filter_options; +}; + +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} + +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); +void pack_objects_args_release(struct pack_objects_args *args); + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *packdir; + const char *packtmp; +}; + +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); +int write_pack_opts_is_local(struct write_pack_opts *opts); + +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; + struct string_list midx_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + +struct tempfile; + +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + +#endif /* REPACK_H */ diff --git a/server-info.c b/server-info.c index 9bb30d9ab71d2231905a031a7e8f173e31585a67..b9a710544ab2854879593961dc9b2fc3350435ea 100644 --- a/server-info.c +++ b/server-info.c @@ -292,7 +292,7 @@ static void init_pack_info(struct repository *r, const char *infofile, int force int i; size_t alloc = 0; - for (p = get_all_packs(r); p; p = p->next) { + repo_for_each_pack(r, p) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index 611a13a32610d2f291d89ef1aca12aab1383c335..fc4b8a77b3007a1f28d89fce94f579b4a6f45f9c 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -39,11 +39,12 @@ int cmd__find_pack(int argc, const char **argv) if (repo_get_oid(the_repository, argv[0], &oid)) die("cannot parse %s as an object name", argv[0]); - for (p = get_all_packs(the_repository); p; p = p->next) + repo_for_each_pack(the_repository, p) { if (find_pack_entry_one(&oid, p)) { printf("%s\n", p->pack_name); actual_count++; } + } if (count > -1 && count != actual_count) die("bad packfile count %d instead of %d", actual_count, count); diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index d51aaa3dc40d1287c60d2058b94404df777e7ee4..7a8ee1de24ba83c1593960868a1b4dfdc57c24fa 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -37,7 +37,7 @@ int cmd__pack_mtimes(int argc, const char **argv) if (argc != 2) usage(pack_mtimes_usage); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { strbuf_addstr(&buf, basename(p->pack_name)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".mtimes"); diff --git a/transport-helper.c b/transport-helper.c index 0789e5bca53282d29f30eea9a2eca129e115f40f..4d95d84f9e4d05db5117016bcdacadf3a4fe46b2 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -450,7 +450,7 @@ static int fetch_with_fetch(struct transport *transport, } strbuf_release(&buf); - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); return 0; }