diff --git a/Makefile b/Makefile index 7ea149598d8ed870e385e6a67d31338ed20a6f53..fb965e6acbbbf3df8715ce776f9b36120f9e5b1e 100644 --- a/Makefile +++ b/Makefile @@ -1249,6 +1249,12 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o +LIB_OBJS += repack-filtered.o +LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/cat-file.c b/builtin/cat-file.c index ee6715fa523ce6094dd65a83b2f8b413f3854a87..0ab076aeb30ad98e2e58dc7b444f1ea722202a89 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -852,10 +852,9 @@ static void batch_each_object(struct batch_options *opt, if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *pack; - for (pack = packfile_store_get_all_packs(packs); pack; pack = pack->next) { + repo_for_each_pack(the_repository, pack) { if (bitmap_index_contains_pack(bitmap, pack) || open_pack_index(pack)) continue; diff --git a/builtin/count-objects.c b/builtin/count-objects.c index f2f407c2a7818334d9fce189228a76aac0c245f7..18f6e33b6f913f9a5a4e4656bd9bdbf2c040a42a 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -122,7 +122,6 @@ int cmd_count_objects(int argc, count_loose, count_cruft, NULL, NULL); if (verbose) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; unsigned long num_pack = 0; off_t size_pack = 0; @@ -130,7 +129,7 @@ int cmd_count_objects(int argc, struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 606c6aea82380e7c744839be3bdc1019169bb99a..6fe6e9bc61d81b83119de46f4024d7050edfa5e2 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -978,7 +978,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1179,7 +1179,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/builtin/fsck.c b/builtin/fsck.c index 8ee95e0d67cf374b9814c92558ce969796300a7b..b1a650c6731d3268c65c2204af1975fe23e6d226 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -867,20 +867,20 @@ static int mark_packed_for_connectivity(const struct object_id *oid, static int check_pack_rev_indexes(struct repository *r, int show_progress) { - struct packfile_store *packs = r->objects->packfiles; struct progress *progress = NULL; + struct packed_git *p; uint32_t pack_count = 0; int res = 0; if (show_progress) { - for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) + repo_for_each_pack(r, p) pack_count++; progress = start_delayed_progress(the_repository, "Verifying reverse pack-indexes", pack_count); pack_count = 0; } - for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(r, p) { int load_error = load_pack_revindex_from_disk(p); if (load_error < 0) { @@ -1000,8 +1000,6 @@ int cmd_fsck(int argc, for_each_packed_object(the_repository, mark_packed_for_connectivity, NULL, 0); } else { - struct packfile_store *packs = the_repository->objects->packfiles; - odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) fsck_source(source); @@ -1012,8 +1010,7 @@ int cmd_fsck(int argc, struct progress *progress = NULL; if (show_progress) { - for (p = packfile_store_get_all_packs(packs); p; - p = p->next) { + repo_for_each_pack(the_repository, p) { if (open_pack_index(p)) continue; total += p->num_objects; @@ -1022,8 +1019,8 @@ int cmd_fsck(int argc, progress = start_progress(the_repository, _("Checking objects"), total); } - for (p = packfile_store_get_all_packs(packs); p; - p = p->next) { + + repo_for_each_pack(the_repository, p) { /* verify gives error messages itself */ if (verify_pack(the_repository, p, fsck_obj_buffer, diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d9788076cf05029561a4fdcfea9f1fc095..541d7471f190728c5ca3100ae129572d02497e50 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -487,10 +487,9 @@ static int too_many_loose_objects(struct gc_config *cfg) static struct packed_git *find_base_packs(struct string_list *packs, unsigned long limit) { - struct packfile_store *packfiles = the_repository->objects->packfiles; struct packed_git *p, *base = NULL; - for (p = packfile_store_get_all_packs(packfiles); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->is_cruft) continue; if (limit) { @@ -509,14 +508,13 @@ static struct packed_git *find_base_packs(struct string_list *packs, static int too_many_packs(struct gc_config *cfg) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - int cnt; + int cnt = 0; if (cfg->gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) continue; if (p->pack_keep) @@ -1425,9 +1423,9 @@ static int incremental_repack_auto_condition(struct gc_config *cfg UNUSED) if (incremental_repack_auto_limit < 0) return 1; - for (p = packfile_store_get_packs(the_repository->objects->packfiles); - count < incremental_repack_auto_limit && p; - p = p->next) { + repo_for_each_pack(the_repository, p) { + if (count >= incremental_repack_auto_limit) + break; if (!p->multi_pack_index) count++; } @@ -1494,7 +1492,7 @@ static off_t get_auto_pack_size(void) struct repository *r = the_repository; odb_reprepare(r->objects); - for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { + repo_for_each_pack(r, p) { if (p->pack_size > max_size) { second_largest_size = max_size; max_size = p->pack_size; diff --git a/builtin/grep.c b/builtin/grep.c index 13841fbf00fed1be07d203a5141271b7c7012b92..53cccf2d25068c664c27643873b8c6d7fe99d848 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1214,7 +1214,7 @@ int cmd_grep(int argc, if (recurse_submodules) repo_read_gitmodules(the_repository, 1); if (startup_info->have_repository) - (void)packfile_store_get_packs(the_repository->objects->packfiles); + packfile_store_prepare(the_repository->objects->packfiles); start_threads(&opt); } else { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5bdc44fb2de1fa2b0ee6f7adc49a6f63201c284d..3b73ab9f614b66881475f1b5e494bf02a4ca5228 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1706,8 +1706,8 @@ static int want_object_in_pack_mtime(const struct object_id *oid, uint32_t found_mtime) { int want; + struct packfile_list_entry *e; struct odb_source *source; - struct list_head *pos; if (!exclude && local) { /* @@ -1748,12 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - list_move(&p->mru, - packfile_store_get_packs_mru(the_repository->objects->packfiles)); + packfile_list_prepend(&the_repository->objects->packfiles->packs, p); if (want != -1) return want; } @@ -3831,12 +3830,10 @@ static int pack_mtime_cmp(const void *_a, const void *_b) static void read_packs_list_from_stdin(struct rev_info *revs) { - struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; struct string_list_item *item = NULL; - struct packed_git *p; while (strbuf_getline(&buf, stdin) != EOF) { @@ -3856,7 +3853,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -4077,7 +4074,6 @@ static void enumerate_cruft_objects(void) static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct rev_info revs; int ret; @@ -4107,7 +4103,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = packfile_store_get_all_packs(packs); p; p = p->next) + repo_for_each_pack(the_repository, p) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); @@ -4124,7 +4120,6 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs static void read_cruft_objects(void) { - struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list discard_packs = STRING_LIST_INIT_DUP; struct string_list fresh_packs = STRING_LIST_INIT_DUP; @@ -4145,7 +4140,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -4393,27 +4388,27 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { - struct packfile_store *packs = the_repository->objects->packfiles; static struct packed_git *last_found = (void *)1; struct packed_git *p; - p = (last_found != (void *)1) ? last_found : - packfile_store_get_all_packs(packs); + if (last_found != (void *)1 && find_pack_entry_one(oid, last_found)) + return 1; - while (p) { - if ((!p->pack_local || p->pack_keep || - p->pack_keep_in_core) && - find_pack_entry_one(oid, p)) { + repo_for_each_pack(the_repository, p) { + if ((!p->pack_local || p->pack_keep || p->pack_keep_in_core) && + find_pack_entry_one(oid, p)) { last_found = p; return 1; } - if (p == last_found) - p = packfile_store_get_all_packs(packs); - else - p = p->next; - if (p == last_found) - p = p->next; + + /* + * We have already checked `last_found`, so there is no need to + * re-check here. + */ + if (p == last_found && last_found != (void *)1) + continue; } + return 0; } @@ -4440,13 +4435,12 @@ static int loosened_object_can_be_discarded(const struct object_id *oid, static void loosen_unused_packed_objects(void) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; uint32_t i; uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4747,13 +4741,12 @@ static void get_object_list(struct rev_info *revs, struct strvec *argv) static void add_extra_kept_packs(const struct string_list *names) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; if (!names->nr) return; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *name = basename(p->pack_name); int i; @@ -5191,10 +5184,9 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) + repo_for_each_pack(the_repository, p) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -5206,10 +5198,9 @@ int cmd_pack_objects(int argc, * want to unset "local" based on looking at packs, as * it also covers non-local objects */ - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index 80743d8806c7cf7a818dde91135ed98c45249207..e4ecf774ca640559950bc9f55a91db962fe2596b 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -566,29 +566,23 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p = packfile_store_get_all_packs(packs); + struct packed_git *p; if (strlen(filename) < 40) die("Bad pack filename: %s", filename); - while (p) { + repo_for_each_pack(the_repository, p) if (strstr(p->pack_name, filename)) return add_pack(p); - p = p->next; - } die("Filename %s not found in packed_git", filename); } static void load_all(void) { - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p = packfile_store_get_all_packs(packs); + struct packed_git *p; - while (p) { + repo_for_each_pack(the_repository, p) add_pack(p); - p = p->next; - } } int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) { diff --git a/builtin/repack.c b/builtin/repack.c index e8730808c535a96732610d8250a31e5b0ced68a7..ad60c4290d44c284a6ec5ea3a8c60715d658cb23 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,27 +3,18 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 @@ -33,8 +24,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -53,31 +42,23 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; }; static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -116,1138 +97,10 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - -struct existing_packs { - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int has_existing_non_kept_packs(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void pack_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void mark_packs_for_deletion_1(struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - pack_mark_for_deletion(item); - } - } -} - -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - pack_mark_retained(item); - strbuf_release(&buf); -} - -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); -} - -static void remove_redundant_pack(const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - -static void remove_redundant_packs_1(struct string_list *packs) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) - continue; - remove_redundant_pack(packdir, item->string); - } -} - -static void remove_redundant_existing_packs(struct existing_packs *existing) -{ - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void collect_pack_filenames(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - int i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct child_process *cmd = data; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack_data { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack_data *populate_pack_exts(const char *name) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - data->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return data; -} - -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!data->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void repack_promisor_objects(const struct pack_objects_args *args, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - for_each_packed_object(the_repository, write_oid, &cmd, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != the_hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = populate_pack_exts(item->string); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t geometry_pack_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void init_pack_geometry(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args) -{ - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); - strbuf_release(&buf); -} - -static void split_pack_geometry(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (geometry_pack_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += geometry_pack_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) -{ - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - remove_redundant_pack(packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void free_pack_geometry(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, - struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - size_t i; - - string_list_sort(include); - - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -struct midx_snapshot_ref_data { - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); - - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(the_repository), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - -static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, - struct string_list *names, - struct pack_geometry *geometry) -{ - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). - */ - if (pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); - FILE *in; - int ret; - - if (!include->nr) - return 0; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); - - ret = start_command(&cmd); - if (ret) - return ret; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) - fprintf(in, "%s\n", item->string); - fclose(in); - - return finish_command(&cmd); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int finish_pack_objects_cmd(struct child_process *cmd, - struct string_list *names, - int local) -{ - FILE *out; - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != the_hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); - - prepare_pack_objects(&cmd, args, destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(&cmd, names, local); -} - -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - retain_cruft_pack(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); - - prepare_pack_objects(&cmd, args, destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--honor-pack-keep"); - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(&cmd, names, local); -} - -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1255,18 +108,18 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + struct write_pack_opts opts = { 0 }; + int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1327,7 +180,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -1344,7 +197,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1354,7 +211,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1369,14 +226,14 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1391,26 +248,28 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); - collect_pack_filenames(&existing, &keep_pack_list); + existing.repo = repo; + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1418,8 +277,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -1439,7 +296,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1451,9 +308,9 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1515,7 +372,10 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + opts.packdir = packdir; + opts.destination = packdir; + opts.packtmp = packtmp; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, &names); if (ret) goto cleanup; @@ -1535,12 +395,17 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -1555,9 +420,10 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -1592,11 +458,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; @@ -1604,99 +467,63 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; - - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .packdir = packdir, + .packtmp = packtmp, + }; + + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); - - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); - } - - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&data->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install((struct generated_pack *)item->util, + item->string, packdir, packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); - - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); - - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); - - string_list_clear(&include, 0); + struct repack_write_midx_opts opts = { + .existing = &existing, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; + + ret = write_midx_included_packs(&opts); if (ret) goto cleanup; } - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1704,18 +531,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } @@ -1723,10 +550,7 @@ int cmd_repack(int argc, string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); + pack_geometry_release(&geometry); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); diff --git a/connected.c b/connected.c index b288a18b17c33fcead8b14947dfce38276c3d093..79403108dd8f574af7fcdb478975b17ceba76d25 100644 --- a/connected.c +++ b/connected.c @@ -74,10 +74,9 @@ int check_connected(oid_iterate_fn fn, void *cb_data, */ odb_reprepare(the_repository->objects); do { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_promisor) continue; if (find_pack_entry_one(oid, p)) diff --git a/http-backend.c b/http-backend.c index 9084058f1e9f138bda8525ce6db52df24b2fa95e..52f0483dd309d73a1866a1b63161df0fe63d389f 100644 --- a/http-backend.c +++ b/http-backend.c @@ -603,19 +603,18 @@ static void get_head(struct strbuf *hdr, char *arg UNUSED) static void get_info_packs(struct strbuf *hdr, char *arg UNUSED) { size_t objdirlen = strlen(repo_get_object_directory(the_repository)); - struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct packed_git *p; size_t cnt = 0; select_getanyfile(hdr); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/http-push.c b/http-push.c index a1c01e3b9b93a3e2be1311521fc69c2568748cd6..d86ce7711982066c704662f1d55ad8b0a411cbea 100644 --- a/http-push.c +++ b/http-push.c @@ -104,7 +104,7 @@ struct repo { int has_info_refs; int can_update_info_refs; int has_info_packs; - struct packed_git *packs; + struct packfile_list packs; struct remote_lock *locks; }; @@ -311,7 +311,7 @@ static void start_fetch_packed(struct transfer_request *request) struct transfer_request *check_request = request_queue_head; struct http_pack_request *preq; - target = find_oid_pack(&request->obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &request->obj->oid); if (!target) { fprintf(stderr, "Unable to fetch %s, will not be able to update server info refs\n", oid_to_hex(&request->obj->oid)); repo->can_update_info_refs = 0; @@ -683,7 +683,7 @@ static int add_send_request(struct object *obj, struct remote_lock *lock) get_remote_object_list(obj->oid.hash[0]); if (obj->flags & (REMOTE | PUSHING)) return 0; - target = find_oid_pack(&obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &obj->oid); if (target) { obj->flags |= REMOTE; return 0; diff --git a/http-walker.c b/http-walker.c index 0f7ae46d7f12c0a53ea7d0ff7139c3bb56313fb6..e886e6486646d17d5b74f4db89b2f27c35d938c5 100644 --- a/http-walker.c +++ b/http-walker.c @@ -15,7 +15,7 @@ struct alt_base { char *base; int got_indices; - struct packed_git *packs; + struct packfile_list packs; struct alt_base *next; }; @@ -324,11 +324,8 @@ static void process_alternates_response(void *callback_data) } else if (is_alternate_allowed(target.buf)) { warning("adding alternate object store: %s", target.buf); - newalt = xmalloc(sizeof(*newalt)); - newalt->next = NULL; + CALLOC_ARRAY(newalt, 1); newalt->base = strbuf_detach(&target, NULL); - newalt->got_indices = 0; - newalt->packs = NULL; while (tail->next != NULL) tail = tail->next; @@ -435,7 +432,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, if (fetch_indices(walker, repo)) return -1; - target = find_oid_pack(oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, oid); if (!target) return -1; close_pack_index(target); @@ -584,17 +581,15 @@ static void cleanup(struct walker *walker) if (data) { alt = data->alt; while (alt) { - struct packed_git *pack; + struct packfile_list_entry *e; alt_next = alt->next; - pack = alt->packs; - while (pack) { - struct packed_git *pack_next = pack->next; - close_pack(pack); - free(pack); - pack = pack_next; + for (e = alt->packs.head; e; e = e->next) { + close_pack(e->pack); + free(e->pack); } + packfile_list_clear(&alt->packs); free(alt->base); free(alt); @@ -612,14 +607,11 @@ struct walker *get_http_walker(const char *url) struct walker_data *data = xmalloc(sizeof(struct walker_data)); struct walker *walker = xmalloc(sizeof(struct walker)); - data->alt = xmalloc(sizeof(*data->alt)); + CALLOC_ARRAY(data->alt, 1); data->alt->base = xstrdup(url); for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s) *s = 0; - data->alt->got_indices = 0; - data->alt->packs = NULL; - data->alt->next = NULL; data->got_alternates = -1; walker->corrupt_object_found = 0; diff --git a/http.c b/http.c index 7e3af1e72f740124462a6de7901f9346b607b0e2..41f850db16d19f653e0ac7af2878ed1e34fee22f 100644 --- a/http.c +++ b/http.c @@ -2413,10 +2413,10 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) return tmp; } -static int fetch_and_setup_pack_index(struct packed_git **packs_head, - unsigned char *sha1, const char *base_url) +static int fetch_and_setup_pack_index(struct packfile_list *packs, + unsigned char *sha1, + const char *base_url) { - struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *new_pack, *p; char *tmp_idx = NULL; int ret; @@ -2425,7 +2425,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, * If we already have the pack locally, no need to fetch its index or * even add it to list; we already have all of its objects. */ - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (hasheq(p->hash, sha1, the_repository->hash_algo)) return 0; } @@ -2449,12 +2449,11 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, if (ret) return -1; - new_pack->next = *packs_head; - *packs_head = new_pack; + packfile_list_prepend(packs, new_pack); return 0; } -int http_get_info_packs(const char *base_url, struct packed_git **packs_head) +int http_get_info_packs(const char *base_url, struct packfile_list *packs) { struct http_get_options options = {0}; int ret = 0; @@ -2478,7 +2477,7 @@ int http_get_info_packs(const char *base_url, struct packed_git **packs_head) !parse_oid_hex(data, &oid, &data) && skip_prefix(data, ".pack", &data) && (*data == '\n' || *data == '\0')) { - fetch_and_setup_pack_index(packs_head, oid.hash, base_url); + fetch_and_setup_pack_index(packs, oid.hash, base_url); } else { data = strchrnul(data, '\n'); } @@ -2542,14 +2541,9 @@ int finish_http_pack_request(struct http_pack_request *preq) } void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from) + struct packfile_list *list_to_remove_from) { - struct packed_git **lst = list_to_remove_from; - - while (*lst != p) - lst = &((*lst)->next); - *lst = (*lst)->next; - + packfile_list_remove(list_to_remove_from, p); packfile_store_add_pack(the_repository->objects->packfiles, p); } diff --git a/http.h b/http.h index 553e16205ce2014a96a2e005a0036b789c2a5b03..f9d459340476e470e05adf31527ae8e01f590b7a 100644 --- a/http.h +++ b/http.h @@ -2,6 +2,7 @@ #define HTTP_H struct packed_git; +struct packfile_list; #include "git-zlib.h" @@ -190,7 +191,7 @@ struct curl_slist *http_append_auth_header(const struct credential *c, /* Helpers for fetching packs */ int http_get_info_packs(const char *base_url, - struct packed_git **packs_head); + struct packfile_list *packs); /* Helper for getting Accept-Language header */ const char *http_get_accept_language_header(void); @@ -226,7 +227,7 @@ void release_http_pack_request(struct http_pack_request *preq); * from http_get_info_packs() and have chosen a specific pack to fetch. */ void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from); + struct packfile_list *list_to_remove_from); /* Helpers for fetching object */ struct http_object_request { diff --git a/meson.build b/meson.build index ec55d6a5fdfae299cb5b3bd33b98a3a067307da4..81a3d5b96ae0ede5a0ca8a25564407216befba20 100644 --- a/meson.build +++ b/meson.build @@ -462,6 +462,12 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', + 'repack-cruft.c', + 'repack-filtered.c', + 'repack-geometry.c', + 'repack-midx.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/midx.c b/midx.c index 1d6269f957e7819fb62963cde191360422b29229..24e1e721754d0cbffed53d7545f12bca77d157bd 100644 --- a/midx.c +++ b/midx.c @@ -462,8 +462,6 @@ int prepare_midx_pack(struct multi_pack_index *m, m->pack_names[pack_int_id]); p = packfile_store_load_pack(r->objects->packfiles, pack_name.buf, m->source->local); - if (p) - list_add_tail(&p->mru, &r->objects->packfiles->mru); strbuf_release(&pack_name); if (!p) { diff --git a/object-name.c b/object-name.c index f6902e140dd43eb7bd878306be4cfb26b050fbfe..766c757042a38950f7b498dc7fa92b72befea264 100644 --- a/object-name.c +++ b/object-name.c @@ -213,9 +213,11 @@ static void find_short_packed_object(struct disambiguate_state *ds) unique_in_midx(m, ds); } - for (p = packfile_store_get_packs(ds->repo->objects->packfiles); p && !ds->ambiguous; - p = p->next) + repo_for_each_pack(ds->repo, p) { + if (ds->ambiguous) + break; unique_in_pack(p, ds); + } } static int finish_object_disambiguation(struct disambiguate_state *ds, @@ -805,7 +807,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) find_abbrev_len_for_midx(m, mad); } - for (p = packfile_store_get_packs(mad->repo->objects->packfiles); p; p = p->next) + repo_for_each_pack(mad->repo, p) find_abbrev_len_for_pack(p, mad); } diff --git a/pack-bitmap.c b/pack-bitmap.c index ac71035d7715f7f9a86468fd2754bb5283a80e41..291e1a9cf471583aeed5b928288ca6c2f5d1ceaa 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -664,7 +664,7 @@ static int open_pack_bitmap(struct repository *r, struct packed_git *p; int ret = -1; - for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { + repo_for_each_pack(r, p) { if (open_pack_bitmap_1(bitmap_git, p) == 0) { ret = 0; /* @@ -3347,6 +3347,7 @@ static int verify_bitmap_file(const struct git_hash_algo *algop, int verify_bitmap_files(struct repository *r) { struct odb_source *source; + struct packed_git *p; int res = 0; odb_prepare_alternates(r->objects); @@ -3362,8 +3363,7 @@ int verify_bitmap_files(struct repository *r) free(midx_bitmap_name); } - for (struct packed_git *p = packfile_store_get_all_packs(r->objects->packfiles); - p; p = p->next) { + repo_for_each_pack(r, p) { char *pack_bitmap_name = pack_bitmap_filename(p); res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); free(pack_bitmap_name); diff --git a/pack-objects.c b/pack-objects.c index 9d6ee72569ab3b8b8d7b4170728c46070e4fdd5c..48510dd343c1df317fd8b3089384bf7e47fad21a 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -87,7 +87,6 @@ struct object_entry *packlist_find(struct packing_data *pdata, static void prepare_in_pack_by_idx(struct packing_data *pdata) { - struct packfile_store *packs = pdata->repo->objects->packfiles; struct packed_git **mapping, *p; int cnt = 0, nr = 1U << OE_IN_PACK_BITS; @@ -97,13 +96,13 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = packfile_store_get_all_packs(packs); p; p = p->next, cnt++) { + repo_for_each_pack(pdata->repo, p) { if (cnt == nr) { free(mapping); return; } p->index = cnt; - mapping[cnt] = p; + mapping[cnt++] = p; } pdata->in_pack_by_idx = mapping; } diff --git a/packfile.c b/packfile.c index 5a7caec29259771d2f15e9ac4979c0eec964251a..8785e3971041a1ad6b81808033d008f0614814f1 100644 --- a/packfile.c +++ b/packfile.c @@ -47,6 +47,89 @@ static size_t pack_mapped; #define SZ_FMT PRIuMAX static inline uintmax_t sz_fmt(size_t s) { return s; } +void packfile_list_clear(struct packfile_list *list) +{ + struct packfile_list_entry *e, *next; + + for (e = list->head; e; e = next) { + next = e->next; + free(e); + } + + list->head = list->tail = NULL; +} + +static struct packfile_list_entry *packfile_list_remove_internal(struct packfile_list *list, + struct packed_git *pack) +{ + struct packfile_list_entry *e, *prev; + + for (e = list->head, prev = NULL; e; prev = e, e = e->next) { + if (e->pack != pack) + continue; + + if (prev) + prev->next = e->next; + if (list->head == e) + list->head = e->next; + if (list->tail == e) + list->tail = prev; + + return e; + } + + return NULL; +} + +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack) +{ + free(packfile_list_remove_internal(list, pack)); +} + +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = list->head; + + list->head = entry; + if (!list->tail) + list->tail = entry; +} + +void packfile_list_append(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = NULL; + + if (list->tail) { + list->tail->next = entry; + list->tail = entry; + } else { + list->head = list->tail = entry; + } +} + +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid) +{ + for (; packs; packs = packs->next) + if (find_pack_entry_one(oid, packs->pack)) + return packs->pack; + return NULL; +} + void pack_report(struct repository *repo) { fprintf(stderr, @@ -273,13 +356,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct packed_git *current) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packfiles->packs; p; p = p->next) - scan_windows(p, &lru_p, &lru_w, &lru_l); + for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -459,14 +543,15 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (p->pack_fd == -1) + for (e = r->objects->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) continue; - find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); } if (lru_p) @@ -785,11 +870,8 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - pack->next = store->packs; - store->packs = pack; - - hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&store->map, &pack->packmap_ent); + packfile_list_append(&store->packs, pack); + strmap_put(&store->packs_by_path, pack->pack_name, pack); } struct packed_git *packfile_store_load_pack(struct packfile_store *store, @@ -806,8 +888,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, strbuf_strip_suffix(&key, ".idx"); strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, - struct packed_git, packmap_ent); + p = strmap_get(&store->packs_by_path, key.buf); if (!p) { p = add_packed_git(store->odb->repo, idx_path, strlen(idx_path), local); @@ -965,9 +1046,10 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); +DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next); -static int sort_pack(const struct packed_git *a, const struct packed_git *b) +static int sort_pack(const struct packfile_list_entry *a, + const struct packfile_list_entry *b) { int st; @@ -977,7 +1059,7 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * remote ones could be on a network mounted filesystem. * Favor local ones for these reasons. */ - st = a->pack_local - b->pack_local; + st = a->pack->pack_local - b->pack->pack_local; if (st) return -st; @@ -986,23 +1068,13 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * and more recent objects tend to get accessed more * often. */ - if (a->mtime < b->mtime) + if (a->pack->mtime < b->pack->mtime) return 1; - else if (a->mtime == b->mtime) + else if (a->pack->mtime == b->pack->mtime) return 0; return -1; } -static void packfile_store_prepare_mru(struct packfile_store *store) -{ - struct packed_git *p; - - INIT_LIST_HEAD(&store->mru); - - for (p = store->packs; p; p = p->next) - list_add_tail(&p->mru, &store->mru); -} - void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1015,9 +1087,12 @@ void packfile_store_prepare(struct packfile_store *store) prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - sort_packs(&store->packs, sort_pack); - packfile_store_prepare_mru(store); + sort_packs(&store->packs.head, sort_pack); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + if (!e->next) + store->packs.tail = e; + store->initialized = true; } @@ -1027,13 +1102,7 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *packfile_store_get_packs(struct packfile_store *store) -{ - packfile_store_prepare(store); - return store->packs; -} - -struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store) { packfile_store_prepare(store); @@ -1045,13 +1114,7 @@ struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) prepare_midx_pack(m, i); } - return store->packs; -} - -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) -{ - packfile_store_prepare(store); - return &store->mru; + return store->packs.head; } /* @@ -1068,16 +1131,16 @@ unsigned long repo_approximate_object_count(struct repository *r) unsigned long count = 0; struct packed_git *p; - packfile_store_prepare(r->objects->packfiles); + odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = get_multi_pack_index(source); if (m) - count += m->num_objects; + count += m->num_objects + m->num_objects_in_base; } - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (open_pack_index(p)) + repo_for_each_pack(r, p) { + if (open_pack_index(p) || p->multi_pack_index) continue; count += p->num_objects; } @@ -1201,11 +1264,11 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packed_git *p; + struct packfile_list_entry *e; - for (p = r->objects->packfiles->packs; p; p = p->next) - if (oidset_contains(&p->bad_objects, oid)) - return p; + for (e = r->objects->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; return NULL; } @@ -2013,19 +2076,6 @@ int is_pack_valid(struct packed_git *p) return !open_packed_git(p); } -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs) -{ - struct packed_git *p; - - for (p = packs; p; p = p->next) { - if (find_pack_entry_one(oid, p)) - return p; - } - return NULL; - -} - static int fill_pack_entry(const struct object_id *oid, struct pack_entry *e, struct packed_git *p) @@ -2056,7 +2106,7 @@ static int fill_pack_entry(const struct object_id *oid, int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct list_head *pos; + struct packfile_list_entry *l; packfile_store_prepare(r->objects->packfiles); @@ -2064,13 +2114,15 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs) + if (!r->objects->packfiles->packs.head) return 0; - list_for_each(pos, &r->objects->packfiles->mru) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (l = r->objects->packfiles->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packfiles->mru); + if (!r->objects->packfiles->skip_mru_updates) + packfile_list_prepend(&r->objects->packfiles->packs, p); return 1; } } @@ -2105,7 +2157,7 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { + repo_for_each_pack(r, p) { if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { ALLOC_GROW(packs, nr + 1, alloc); @@ -2202,7 +2254,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; - for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) { + repo->objects->packfiles->skip_mru_updates = true; + repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && @@ -2222,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, if (r) break; } + repo->objects->packfiles->skip_mru_updates = false; + return r ? r : pack_errors; } @@ -2317,45 +2372,30 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -static int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct packfile_store *packfile_store_new(struct object_database *odb) { struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; - INIT_LIST_HEAD(&store->mru); - hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); + strmap_init(&store->packs_by_path); return store; } void packfile_store_free(struct packfile_store *store) { - for (struct packed_git *p = store->packs, *next; p; p = next) { - next = p->next; - free(p); - } - hashmap_clear(&store->map); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + free(e->pack); + packfile_list_clear(&store->packs); + + strmap_clear(&store->packs_by_path, 0); free(store); } void packfile_store_close(struct packfile_store *store) { - for (struct packed_git *p = store->packs; p; p = p->next) { - if (p->do_not_close) + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) { + if (e->pack->do_not_close) BUG("want to close pack marked 'do-not-close'"); - close_pack(p); + close_pack(e->pack); } } diff --git a/packfile.h b/packfile.h index e7a5792b6cf691453e116ce541abf6e3280066f6..27ba607e7c5eae20fe86ab4fd672705605f879c4 100644 --- a/packfile.h +++ b/packfile.h @@ -5,14 +5,12 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "strmap.h" /* in odb.h */ struct object_info; struct packed_git { - struct hashmap_entry packmap_ent; - struct packed_git *next; - struct list_head mru; struct pack_window *windows; off_t pack_size; const void *index_data; @@ -52,6 +50,28 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +struct packfile_list { + struct packfile_list_entry *head, *tail; +}; + +struct packfile_list_entry { + struct packfile_list_entry *next; + struct packed_git *pack; +}; + +void packfile_list_clear(struct packfile_list *list); +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack); +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack); +void packfile_list_append(struct packfile_list *list, struct packed_git *pack); + +/* + * Find the pack within the "packs" list whose index contains the object + * "oid". For general object lookups, you probably don't want this; use + * find_pack_entry() instead. + */ +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid); + /* * A store that manages packfiles for a given object database. */ @@ -59,10 +79,10 @@ struct packfile_store { struct object_database *odb; /* - * The list of packfiles in the order in which they are being added to - * the store. + * The list of packfiles in the order in which they have been most + * recently used. */ - struct packed_git *packs; + struct packfile_list packs; /* * Cache of packfiles which are marked as "kept", either because there @@ -78,20 +98,32 @@ struct packfile_store { unsigned flags; } kept_cache; - /* A most-recently-used ordered version of the packs list. */ - struct list_head mru; - /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. */ - struct hashmap map; + struct strmap packs_by_path; /* * Whether packfiles have already been populated with this store's * packs. */ bool initialized; + + /* + * Usually, packfiles will be reordered to the front of the `packs` + * list whenever an object is looked up via them. This has the effect + * that packs that contain a lot of accessed objects will be located + * towards the front. + * + * This is usually desireable, but there are exceptions. One exception + * is when the looking up multiple objects in a loop for each packfile. + * In that case, we may easily end up with an infinite loop as the + * packfiles get reordered to the front repeatedly. + * + * Setting this field to `true` thus disables these reorderings. + */ + bool skip_mru_updates; }; /* @@ -137,21 +169,19 @@ void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); /* - * Get packs managed by the given store. Does not load the MIDX or any packs - * referenced by it. + * Load and iterate through all packs of the given repository. This helper + * function will yield packfiles from all object sources connected to the + * repository. */ -struct packed_git *packfile_store_get_packs(struct packfile_store *store); +#define repo_for_each_pack(repo, p) \ + for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ + ((p) = (e ? e->pack : NULL)); e = e->next) /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. */ -struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); - -/* - * Get all packs in most-recently-used order. - */ -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); /* * Open the packfile and add it to the store if it isn't yet known. Returns @@ -243,14 +273,6 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); */ unsigned long repo_approximate_object_count(struct repository *r); -/* - * Find the pack within the "packs" list whose index contains the object "oid". - * For general object lookups, you probably don't want this; use - * find_pack_entry() instead. - */ -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs); - void pack_report(struct repository *repo); /* diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 0000000000000000000000000000000000000000..544a6f2df3a9e91b80618fda6d089a85736af555 --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,98 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + repo_for_each_pack(existing->repo, p) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 0000000000000000000000000000000000000000..96c7b02bb6c6997868b2ee751e22a5bef6ebc3df --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 0000000000000000000000000000000000000000..4e51003ecccf067b5c5ea4748e13ac0ff3a77d16 --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,232 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include +#include "repack.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args) +{ + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + repo_for_each_pack(existing->repo, p) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!args->pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 0000000000000000000000000000000000000000..6f6202c5bccd89a9087bd86a80b7f56d0678f7fb --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,372 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "run-command.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 0000000000000000000000000000000000000000..8bf42fc7157d2882e648c54569a19d3ea2324c9b --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "run-command.h" +#include "hex.h" +#include "repository.h" +#include "packfile.h" +#include "path.h" +#include "pack.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.c b/repack.c new file mode 100644 index 0000000000000000000000000000000000000000..d690e6871844b0cc63e902a91f4bb63b022ff09b --- /dev/null +++ b/repack.c @@ -0,0 +1,360 @@ +#include "git-compat-util.h" +#include "dir.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" +#include "path.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "tempfile.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} + +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + +int write_pack_opts_is_local(struct write_pack_opts *opts) +{ + const char *scratch; + return skip_prefix(opts->destination, opts->packdir, &scratch); +} + +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + int local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + repo_for_each_pack(existing->repo, p) { + size_t i; + const char *base; + + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); +} + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h new file mode 100644 index 0000000000000000000000000000000000000000..15886c348862d52a36552806e9c69f00e28d5885 --- /dev/null +++ b/repack.h @@ -0,0 +1,146 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" +#include "string-list.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + int delta_base_offset; + int pack_kept_objects; + struct list_objects_filter_options filter_options; +}; + +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} + +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); +void pack_objects_args_release(struct pack_objects_args *args); + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *packdir; + const char *packtmp; +}; + +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); +int write_pack_opts_is_local(struct write_pack_opts *opts); + +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; + struct string_list midx_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + +struct tempfile; + +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + +#endif /* REPACK_H */ diff --git a/server-info.c b/server-info.c index 1d33de821e9f5e5b470dd618127b8354fb580009..b9a710544ab2854879593961dc9b2fc3350435ea 100644 --- a/server-info.c +++ b/server-info.c @@ -287,13 +287,12 @@ static int compare_info(const void *a_, const void *b_) static void init_pack_info(struct repository *r, const char *infofile, int force) { - struct packfile_store *packs = r->objects->packfiles; struct packed_git *p; int stale; int i; size_t alloc = 0; - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + repo_for_each_pack(r, p) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index e001dc3066db70ed08da78f1566eb9f764d93619..fc4b8a77b3007a1f28d89fce94f579b4a6f45f9c 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -39,11 +39,12 @@ int cmd__find_pack(int argc, const char **argv) if (repo_get_oid(the_repository, argv[0], &oid)) die("cannot parse %s as an object name", argv[0]); - for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) + repo_for_each_pack(the_repository, p) { if (find_pack_entry_one(&oid, p)) { printf("%s\n", p->pack_name); actual_count++; } + } if (count > -1 && count != actual_count) die("bad packfile count %d instead of %d", actual_count, count); diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index 7c428c16011a23d18855e87865e1d21d99bb27e0..7a8ee1de24ba83c1593960868a1b4dfdc57c24fa 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -37,7 +37,7 @@ int cmd__pack_mtimes(int argc, const char **argv) if (argc != 2) usage(pack_mtimes_usage); - for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) { + repo_for_each_pack(the_repository, p) { strbuf_addstr(&buf, basename(p->pack_name)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".mtimes");