diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 2f71934218332215b5a0a39180aaf53b17abfecd..d0c38f03fabd6041a9cdca8f6c6a9b89da819163 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -16,19 +16,36 @@ detach. maintenance.strategy:: This string config option provides a way to specify one of a few - recommended schedules for background maintenance. This only affects - which tasks are run during `git maintenance run --schedule=X` - commands, provided no `--task=` arguments are provided. - Further, if a `maintenance..schedule` config value is set, - then that value is used instead of the one provided by - `maintenance.strategy`. The possible strategy strings are: + recommended strategies for repository maintenance. This affects + which tasks are run during `git maintenance run`, provided no + `--task=` arguments are provided. This setting impacts manual + maintenance, auto-maintenance as well as scheduled maintenance. The + tasks that run may be different depending on the maintenance type. + -* `none`: This default setting implies no tasks are run at any schedule. +The maintenance strategy can be further tweaked by setting +`maintenance..enabled` and `maintenance..schedule`. If set, these +values are used instead of the defaults provided by `maintenance.strategy`. ++ +The possible strategies are: ++ +* `none`: This strategy implies no tasks are run at all. This is the default + strategy for scheduled maintenance. +* `gc`: This strategy runs the `gc` task. This is the default strategy for + manual maintenance. +* `geometric`: This strategy performs geometric repacking of packfiles and + keeps auxiliary data structures up-to-date. The strategy expires data in the + reflog and removes worktrees that cannot be located anymore. When the + geometric repacking strategy would decide to do an all-into-one repack, then + the strategy generates a cruft pack for all unreachable objects. Objects that + are already part of a cruft pack will be expired. ++ +This repacking strategy is a full replacement for the `gc` strategy and is +recommended for large repositories. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` task, but runs the `prefetch` and `commit-graph` tasks hourly, the `loose-objects` and `incremental-repack` tasks daily, and the `pack-refs` - task weekly. + task weekly. Manual repository maintenance uses the `gc` task. maintenance..enabled:: This boolean config option controls whether the maintenance task @@ -75,6 +92,22 @@ maintenance.incremental-repack.auto:: number of pack-files not in the multi-pack-index is at least the value of `maintenance.incremental-repack.auto`. The default value is 10. +maintenance.geometric-repack.auto:: + This integer config option controls how often the `geometric-repack` + task should be run as part of `git maintenance run --auto`. If zero, + then the `geometric-repack` task will not run with the `--auto` + option. A negative value will force the task to run every time. + Otherwise, a positive value implies the command should run either when + there are packfiles that need to be merged together to retain the + geometric progression, or when there are at least this many loose + objects that would be written into a new packfile. The default value is + 100. + +maintenance.geometric-repack.splitFactor:: + This integer config option controls the factor used for the geometric + sequence. See the `--geometric=` option in linkgit:git-repack[1] for + more details. Defaults to `2`. + maintenance.reflog-expire.auto:: This integer config option controls how often the `reflog-expire` task should be run as part of `git maintenance run --auto`. If zero, then diff --git a/Makefile b/Makefile index f79c905bdcbf8f99ca7d5e85658a13fdddad05d6..06fc80ab98ca32df0ac31727cd84651ff86a24df 100644 --- a/Makefile +++ b/Makefile @@ -1249,6 +1249,12 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o +LIB_OBJS += repack-filtered.o +LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d9788076cf05029561a4fdcfea9f1fc095..9739bb0ea246d90d7f69e71a03becd3340605405 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -34,6 +34,7 @@ #include "pack-objects.h" #include "path.h" #include "reflog.h" +#include "repack.h" #include "rerere.h" #include "blob.h" #include "tree.h" @@ -55,7 +56,6 @@ static const char * const builtin_gc_usage[] = { }; static timestamp_t gc_log_expire_time; -static struct strvec repack = STRVEC_INIT; static struct tempfile *pidfile; static struct lock_file log_lock; static struct string_list pack_garbage = STRING_LIST_INIT_DUP; @@ -255,6 +255,7 @@ enum maintenance_task_label { TASK_PREFETCH, TASK_LOOSE_OBJECTS, TASK_INCREMENTAL_REPACK, + TASK_GEOMETRIC_REPACK, TASK_GC, TASK_COMMIT_GRAPH, TASK_PACK_REFS, @@ -448,7 +449,7 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED) return should_gc; } -static int too_many_loose_objects(struct gc_config *cfg) +static int too_many_loose_objects(int limit) { /* * Quickly check if a "gc" is needed, by estimating how @@ -470,7 +471,7 @@ static int too_many_loose_objects(struct gc_config *cfg) if (!dir) return 0; - auto_threshold = DIV_ROUND_UP(cfg->gc_auto_threshold, 256); + auto_threshold = DIV_ROUND_UP(limit, 256); while ((ent = readdir(dir)) != NULL) { if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || ent->d_name[hexsz_loose] != '\0') @@ -618,48 +619,50 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg, return os_cache + heap; } -static int keep_one_pack(struct string_list_item *item, void *data UNUSED) +static int keep_one_pack(struct string_list_item *item, void *data) { - strvec_pushf(&repack, "--keep-pack=%s", basename(item->string)); + struct strvec *args = data; + strvec_pushf(args, "--keep-pack=%s", basename(item->string)); return 0; } static void add_repack_all_option(struct gc_config *cfg, - struct string_list *keep_pack) + struct string_list *keep_pack, + struct strvec *args) { if (cfg->prune_expire && !strcmp(cfg->prune_expire, "now") && !(cfg->cruft_packs && cfg->repack_expire_to)) - strvec_push(&repack, "-a"); + strvec_push(args, "-a"); else if (cfg->cruft_packs) { - strvec_push(&repack, "--cruft"); + strvec_push(args, "--cruft"); if (cfg->prune_expire) - strvec_pushf(&repack, "--cruft-expiration=%s", cfg->prune_expire); + strvec_pushf(args, "--cruft-expiration=%s", cfg->prune_expire); if (cfg->max_cruft_size) - strvec_pushf(&repack, "--max-cruft-size=%lu", + strvec_pushf(args, "--max-cruft-size=%lu", cfg->max_cruft_size); if (cfg->repack_expire_to) - strvec_pushf(&repack, "--expire-to=%s", cfg->repack_expire_to); + strvec_pushf(args, "--expire-to=%s", cfg->repack_expire_to); } else { - strvec_push(&repack, "-A"); + strvec_push(args, "-A"); if (cfg->prune_expire) - strvec_pushf(&repack, "--unpack-unreachable=%s", cfg->prune_expire); + strvec_pushf(args, "--unpack-unreachable=%s", cfg->prune_expire); } if (keep_pack) - for_each_string_list(keep_pack, keep_one_pack, NULL); + for_each_string_list(keep_pack, keep_one_pack, args); if (cfg->repack_filter && *cfg->repack_filter) - strvec_pushf(&repack, "--filter=%s", cfg->repack_filter); + strvec_pushf(args, "--filter=%s", cfg->repack_filter); if (cfg->repack_filter_to && *cfg->repack_filter_to) - strvec_pushf(&repack, "--filter-to=%s", cfg->repack_filter_to); + strvec_pushf(args, "--filter-to=%s", cfg->repack_filter_to); } -static void add_repack_incremental_option(void) +static void add_repack_incremental_option(struct strvec *args) { - strvec_push(&repack, "--no-write-bitmap-index"); + strvec_push(args, "--no-write-bitmap-index"); } -static int need_to_gc(struct gc_config *cfg) +static int need_to_gc(struct gc_config *cfg, struct strvec *repack_args) { /* * Setting gc.auto to 0 or negative can disable the @@ -700,10 +703,10 @@ static int need_to_gc(struct gc_config *cfg) string_list_clear(&keep_pack, 0); } - add_repack_all_option(cfg, &keep_pack); + add_repack_all_option(cfg, &keep_pack, repack_args); string_list_clear(&keep_pack, 0); - } else if (too_many_loose_objects(cfg)) - add_repack_incremental_option(); + } else if (too_many_loose_objects(cfg->gc_auto_threshold)) + add_repack_incremental_option(repack_args); else return 0; @@ -852,6 +855,7 @@ int cmd_gc(int argc, int keep_largest_pack = -1; int skip_foreground_tasks = 0; timestamp_t dummy; + struct strvec repack_args = STRVEC_INIT; struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT; struct gc_config cfg = GC_CONFIG_INIT; const char *prune_expire_sentinel = "sentinel"; @@ -891,7 +895,7 @@ int cmd_gc(int argc, show_usage_with_options_if_asked(argc, argv, builtin_gc_usage, builtin_gc_options); - strvec_pushl(&repack, "repack", "-d", "-l", NULL); + strvec_pushl(&repack_args, "repack", "-d", "-l", NULL); gc_config(&cfg); @@ -914,14 +918,14 @@ int cmd_gc(int argc, die(_("failed to parse prune expiry value %s"), cfg.prune_expire); if (aggressive) { - strvec_push(&repack, "-f"); + strvec_push(&repack_args, "-f"); if (cfg.aggressive_depth > 0) - strvec_pushf(&repack, "--depth=%d", cfg.aggressive_depth); + strvec_pushf(&repack_args, "--depth=%d", cfg.aggressive_depth); if (cfg.aggressive_window > 0) - strvec_pushf(&repack, "--window=%d", cfg.aggressive_window); + strvec_pushf(&repack_args, "--window=%d", cfg.aggressive_window); } if (opts.quiet) - strvec_push(&repack, "-q"); + strvec_push(&repack_args, "-q"); if (opts.auto_flag) { if (cfg.detach_auto && opts.detach < 0) @@ -930,7 +934,7 @@ int cmd_gc(int argc, /* * Auto-gc should be least intrusive as possible. */ - if (!need_to_gc(&cfg)) { + if (!need_to_gc(&cfg, &repack_args)) { ret = 0; goto out; } @@ -952,7 +956,7 @@ int cmd_gc(int argc, find_base_packs(&keep_pack, cfg.big_pack_threshold); } - add_repack_all_option(&cfg, &keep_pack); + add_repack_all_option(&cfg, &keep_pack, &repack_args); string_list_clear(&keep_pack, 0); } @@ -1014,9 +1018,9 @@ int cmd_gc(int argc, repack_cmd.git_cmd = 1; repack_cmd.close_object_store = 1; - strvec_pushv(&repack_cmd.args, repack.v); + strvec_pushv(&repack_cmd.args, repack_args.v); if (run_command(&repack_cmd)) - die(FAILED_RUN, repack.v[0]); + die(FAILED_RUN, repack_args.v[0]); if (cfg.prune_expire) { struct child_process prune_cmd = CHILD_PROCESS_INIT; @@ -1055,7 +1059,7 @@ int cmd_gc(int argc, !opts.quiet && !daemonized ? COMMIT_GRAPH_WRITE_PROGRESS : 0, NULL); - if (opts.auto_flag && too_many_loose_objects(&cfg)) + if (opts.auto_flag && too_many_loose_objects(cfg.gc_auto_threshold)) warning(_("There are too many unreachable loose objects; " "run 'git prune' to remove them.")); @@ -1067,6 +1071,7 @@ int cmd_gc(int argc, out: maintenance_run_opts_release(&opts); + strvec_clear(&repack_args); gc_config_release(&cfg); return 0; } @@ -1269,6 +1274,19 @@ static int maintenance_task_gc_background(struct maintenance_run_opts *opts, return run_command(&child); } +static int gc_condition(struct gc_config *cfg) +{ + /* + * Note that it's fine to drop the repack arguments here, as we execute + * git-gc(1) as a separate child process anyway. So it knows to compute + * these arguments again. + */ + struct strvec repack_args = STRVEC_INIT; + int ret = need_to_gc(cfg, &repack_args); + strvec_clear(&repack_args); + return ret; +} + static int prune_packed(struct maintenance_run_opts *opts) { struct child_process child = CHILD_PROCESS_INIT; @@ -1550,6 +1568,108 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts return 0; } +static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts, + struct gc_config *cfg) +{ + struct pack_geometry geometry = { + .split_factor = 2, + }; + struct pack_objects_args po_args = { + .local = 1, + }; + struct existing_packs existing_packs = EXISTING_PACKS_INIT; + struct string_list kept_packs = STRING_LIST_INIT_DUP; + struct child_process child = CHILD_PROCESS_INIT; + int ret; + + repo_config_get_int(the_repository, "maintenance.geometric-repack.splitFactor", + &geometry.split_factor); + + existing_packs.repo = the_repository; + existing_packs_collect(&existing_packs, &kept_packs); + pack_geometry_init(&geometry, &existing_packs, &po_args); + pack_geometry_split(&geometry); + + child.git_cmd = 1; + + strvec_pushl(&child.args, "repack", "-d", "-l", NULL); + if (geometry.split < geometry.pack_nr) + strvec_pushf(&child.args, "--geometric=%d", + geometry.split_factor); + else + add_repack_all_option(cfg, NULL, &child.args); + if (opts->quiet) + strvec_push(&child.args, "--quiet"); + if (the_repository->settings.core_multi_pack_index) + strvec_push(&child.args, "--write-midx"); + + if (run_command(&child)) { + ret = error(_("failed to perform geometric repack")); + goto out; + } + + ret = 0; + +out: + existing_packs_release(&existing_packs); + pack_geometry_release(&geometry); + return ret; +} + +static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED) +{ + struct pack_geometry geometry = { + .split_factor = 2, + }; + struct pack_objects_args po_args = { + .local = 1, + }; + struct existing_packs existing_packs = EXISTING_PACKS_INIT; + struct string_list kept_packs = STRING_LIST_INIT_DUP; + int auto_value = 100; + int ret; + + repo_config_get_int(the_repository, "maintenance.geometric-repack.auto", + &auto_value); + if (!auto_value) + return 0; + if (auto_value < 0) + return 1; + + repo_config_get_int(the_repository, "maintenance.geometric-repack.splitFactor", + &geometry.split_factor); + + existing_packs.repo = the_repository; + existing_packs_collect(&existing_packs, &kept_packs); + pack_geometry_init(&geometry, &existing_packs, &po_args); + pack_geometry_split(&geometry); + + /* + * When we'd merge at least two packs with one another we always + * perform the repack. + */ + if (geometry.split) { + ret = 1; + goto out; + } + + /* + * Otherwise, we estimate the number of loose objects to determine + * whether we want to create a new packfile or not. + */ + if (too_many_loose_objects(auto_value)) { + ret = 1; + goto out; + } + + ret = 0; + +out: + existing_packs_release(&existing_packs); + pack_geometry_release(&geometry); + return ret; +} + typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts, struct gc_config *cfg); typedef int (*maintenance_auto_fn)(struct gc_config *cfg); @@ -1592,11 +1712,16 @@ static const struct maintenance_task tasks[] = { .background = maintenance_task_incremental_repack, .auto_condition = incremental_repack_auto_condition, }, + [TASK_GEOMETRIC_REPACK] = { + .name = "geometric-repack", + .background = maintenance_task_geometric_repack, + .auto_condition = geometric_repack_auto_condition, + }, [TASK_GC] = { .name = "gc", .foreground = maintenance_task_gc_foreground, .background = maintenance_task_gc_background, - .auto_condition = need_to_gc, + .auto_condition = gc_condition, }, [TASK_COMMIT_GRAPH] = { .name = "commit-graph", @@ -1702,39 +1827,91 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts, return result; } +enum maintenance_type { + /* As invoked via `git maintenance run --schedule=`. */ + MAINTENANCE_TYPE_SCHEDULED = (1 << 0), + /* As invoked via `git maintenance run` and with `--auto`. */ + MAINTENANCE_TYPE_MANUAL = (1 << 1), +}; + struct maintenance_strategy { struct { - int enabled; + unsigned type; enum schedule_priority schedule; } tasks[TASK__COUNT]; }; static const struct maintenance_strategy none_strategy = { 0 }; -static const struct maintenance_strategy default_strategy = { + +static const struct maintenance_strategy gc_strategy = { .tasks = { - [TASK_GC].enabled = 1, + [TASK_GC].type = MAINTENANCE_TYPE_MANUAL | MAINTENANCE_TYPE_SCHEDULED, + [TASK_GC].schedule = SCHEDULE_DAILY, }, }; + static const struct maintenance_strategy incremental_strategy = { .tasks = { - [TASK_COMMIT_GRAPH].enabled = 1, + [TASK_COMMIT_GRAPH].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_COMMIT_GRAPH].schedule = SCHEDULE_HOURLY, - [TASK_PREFETCH].enabled = 1, + [TASK_PREFETCH].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_PREFETCH].schedule = SCHEDULE_HOURLY, - [TASK_INCREMENTAL_REPACK].enabled = 1, + [TASK_INCREMENTAL_REPACK].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_INCREMENTAL_REPACK].schedule = SCHEDULE_DAILY, - [TASK_LOOSE_OBJECTS].enabled = 1, + [TASK_LOOSE_OBJECTS].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY, - [TASK_PACK_REFS].enabled = 1, + [TASK_PACK_REFS].type = MAINTENANCE_TYPE_SCHEDULED, [TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY, + + /* + * Historically, the "incremental" strategy was only available + * in the context of scheduled maintenance when set up via + * "maintenance.strategy". We have later expanded that config + * to also cover manual maintenance. + * + * To retain backwards compatibility with the previous status + * quo we thus run git-gc(1) in case manual maintenance was + * requested. This is the same as the default strategy, which + * would have been in use beforehand. + */ + [TASK_GC].type = MAINTENANCE_TYPE_MANUAL, }, }; +static const struct maintenance_strategy geometric_strategy = { + .tasks = { + [TASK_COMMIT_GRAPH].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_COMMIT_GRAPH].schedule = SCHEDULE_HOURLY, + [TASK_GEOMETRIC_REPACK].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_GEOMETRIC_REPACK].schedule = SCHEDULE_DAILY, + [TASK_PACK_REFS].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_PACK_REFS].schedule = SCHEDULE_DAILY, + [TASK_RERERE_GC].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_RERERE_GC].schedule = SCHEDULE_WEEKLY, + [TASK_REFLOG_EXPIRE].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_REFLOG_EXPIRE].schedule = SCHEDULE_WEEKLY, + [TASK_WORKTREE_PRUNE].type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + [TASK_WORKTREE_PRUNE].schedule = SCHEDULE_WEEKLY, + }, +}; + +static struct maintenance_strategy parse_maintenance_strategy(const char *name) +{ + if (!strcasecmp(name, "incremental")) + return incremental_strategy; + if (!strcasecmp(name, "gc")) + return gc_strategy; + if (!strcasecmp(name, "geometric")) + return geometric_strategy; + die(_("unknown maintenance strategy: '%s'"), name); +} + static void initialize_task_config(struct maintenance_run_opts *opts, const struct string_list *selected_tasks) { struct strbuf config_name = STRBUF_INIT; struct maintenance_strategy strategy; + enum maintenance_type type; const char *config_str; /* @@ -1762,19 +1939,20 @@ static void initialize_task_config(struct maintenance_run_opts *opts, * - Unscheduled maintenance uses our default strategy. * * Both of these are affected by the gitconfig though, which may - * override specific aspects of our strategy. + * override specific aspects of our strategy. Furthermore, both + * strategies can be overridden by setting "maintenance.strategy". */ if (opts->schedule) { strategy = none_strategy; - - if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) { - if (!strcasecmp(config_str, "incremental")) - strategy = incremental_strategy; - } + type = MAINTENANCE_TYPE_SCHEDULED; } else { - strategy = default_strategy; + strategy = gc_strategy; + type = MAINTENANCE_TYPE_MANUAL; } + if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) + strategy = parse_maintenance_strategy(config_str); + for (size_t i = 0; i < TASK__COUNT; i++) { int config_value; @@ -1782,8 +1960,8 @@ static void initialize_task_config(struct maintenance_run_opts *opts, strbuf_addf(&config_name, "maintenance.%s.enabled", tasks[i].name); if (!repo_config_get_bool(the_repository, config_name.buf, &config_value)) - strategy.tasks[i].enabled = config_value; - if (!strategy.tasks[i].enabled) + strategy.tasks[i].type = config_value ? type : 0; + if (!(strategy.tasks[i].type & type)) continue; if (opts->schedule) { diff --git a/builtin/repack.c b/builtin/repack.c index e8730808c535a96732610d8250a31e5b0ced68a7..ad60c4290d44c284a6ec5ea3a8c60715d658cb23 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,27 +3,18 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 @@ -33,8 +24,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -53,31 +42,23 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; }; static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -116,1138 +97,10 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - -struct existing_packs { - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int has_existing_non_kept_packs(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void pack_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void mark_packs_for_deletion_1(struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - pack_mark_for_deletion(item); - } - } -} - -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - pack_mark_retained(item); - strbuf_release(&buf); -} - -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); -} - -static void remove_redundant_pack(const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - -static void remove_redundant_packs_1(struct string_list *packs) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) - continue; - remove_redundant_pack(packdir, item->string); - } -} - -static void remove_redundant_existing_packs(struct existing_packs *existing) -{ - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void collect_pack_filenames(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - int i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct child_process *cmd = data; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack_data { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack_data *populate_pack_exts(const char *name) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - data->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return data; -} - -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!data->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void repack_promisor_objects(const struct pack_objects_args *args, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - for_each_packed_object(the_repository, write_oid, &cmd, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != the_hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = populate_pack_exts(item->string); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t geometry_pack_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void init_pack_geometry(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args) -{ - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); - strbuf_release(&buf); -} - -static void split_pack_geometry(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (geometry_pack_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += geometry_pack_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) -{ - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - remove_redundant_pack(packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void free_pack_geometry(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, - struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - size_t i; - - string_list_sort(include); - - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -struct midx_snapshot_ref_data { - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); - - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(the_repository), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - -static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, - struct string_list *names, - struct pack_geometry *geometry) -{ - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). - */ - if (pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); - FILE *in; - int ret; - - if (!include->nr) - return 0; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); - - ret = start_command(&cmd); - if (ret) - return ret; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) - fprintf(in, "%s\n", item->string); - fclose(in); - - return finish_command(&cmd); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int finish_pack_objects_cmd(struct child_process *cmd, - struct string_list *names, - int local) -{ - FILE *out; - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != the_hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); - - prepare_pack_objects(&cmd, args, destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(&cmd, names, local); -} - -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packfile_store *packs = the_repository->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - retain_cruft_pack(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); - - prepare_pack_objects(&cmd, args, destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--honor-pack-keep"); - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(&cmd, names, local); -} - -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1255,18 +108,18 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + struct write_pack_opts opts = { 0 }; + int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1327,7 +180,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -1344,7 +197,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1354,7 +211,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1369,14 +226,14 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1391,26 +248,28 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); - collect_pack_filenames(&existing, &keep_pack_list); + existing.repo = repo; + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1418,8 +277,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -1439,7 +296,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1451,9 +308,9 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1515,7 +372,10 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + opts.packdir = packdir; + opts.destination = packdir; + opts.packtmp = packtmp; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, &names); if (ret) goto cleanup; @@ -1535,12 +395,17 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -1555,9 +420,10 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -1592,11 +458,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; @@ -1604,99 +467,63 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; - - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .packdir = packdir, + .packtmp = packtmp, + }; + + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); - - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); - } - - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&data->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install((struct generated_pack *)item->util, + item->string, packdir, packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); - - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); - - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); - - string_list_clear(&include, 0); + struct repack_write_midx_opts opts = { + .existing = &existing, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; + + ret = write_midx_included_packs(&opts); if (ret) goto cleanup; } - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1704,18 +531,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } @@ -1723,10 +550,7 @@ int cmd_repack(int argc, string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); + pack_geometry_release(&geometry); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); diff --git a/meson.build b/meson.build index cee9424475990410fff0477a2985b142f44c59b2..a5b760b2f9fae7bef53eea2ff0a7133e1b75351f 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,12 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', + 'repack-cruft.c', + 'repack-filtered.c', + 'repack-geometry.c', + 'repack-midx.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 0000000000000000000000000000000000000000..accb98bcdbcb90930b40ae6f8d44f0e6a091b1b3 --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,99 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 0000000000000000000000000000000000000000..96c7b02bb6c6997868b2ee751e22a5bef6ebc3df --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 0000000000000000000000000000000000000000..56d651c9cee81a8e41c796d63d4ad4f09c9c3b39 --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,233 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include +#include "repack.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!args->pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 0000000000000000000000000000000000000000..6f6202c5bccd89a9087bd86a80b7f56d0678f7fb --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,372 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "run-command.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 0000000000000000000000000000000000000000..8bf42fc7157d2882e648c54569a19d3ea2324c9b --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "run-command.h" +#include "hex.h" +#include "repository.h" +#include "packfile.h" +#include "path.h" +#include "pack.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.c b/repack.c new file mode 100644 index 0000000000000000000000000000000000000000..1982a48165019a7796e08fa98bb51fb56fa9752a --- /dev/null +++ b/repack.c @@ -0,0 +1,361 @@ +#include "git-compat-util.h" +#include "dir.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" +#include "path.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "tempfile.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} + +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + +int write_pack_opts_is_local(struct write_pack_opts *opts) +{ + const char *scratch; + return skip_prefix(opts->destination, opts->packdir, &scratch); +} + +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + int local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + size_t i; + const char *base; + + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); +} + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h new file mode 100644 index 0000000000000000000000000000000000000000..15886c348862d52a36552806e9c69f00e28d5885 --- /dev/null +++ b/repack.h @@ -0,0 +1,146 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" +#include "string-list.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + int delta_base_offset; + int pack_kept_objects; + struct list_objects_filter_options filter_options; +}; + +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} + +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); +void pack_objects_args_release(struct pack_objects_args *args); + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *packdir; + const char *packtmp; +}; + +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); +int write_pack_opts_is_local(struct write_pack_opts *opts); + +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; + struct string_list midx_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + +struct tempfile; + +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + +#endif /* REPACK_H */ diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index ddd273d8dc24fba42418180f8015d0198736a29d..aedb9e7e8e3cb39fcca8b9ab595e83db2d89cb9f 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -465,6 +465,177 @@ test_expect_success 'maintenance.incremental-repack.auto (when config is unset)' ) ' +run_and_verify_geometric_pack () { + EXPECTED_PACKS="$1" && + + # Verify that we perform a geometric repack. + rm -f "trace2.txt" && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --geometric=2 \ + --quiet --write-midx packfiles && + test_line_count = "$EXPECTED_PACKS" packfiles && + + # And verify that there are no loose objects anymore. + cat >expect <<-\EOF && + info + pack + EOF + ls .git/objects >actual && + test_cmp expect actual +} + +test_expect_success 'geometric repacking task' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + test_commit initial && + + # The initial repack causes an all-into-one repack. + GIT_TRACE2_EVENT="$(pwd)/initial-repack.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago --quiet --write-midx before && + run_and_verify_geometric_pack 1 && + ls -l .git/objects/pack >after && + test_cmp before after && + + # This incremental change creates a new packfile that only + # soaks up loose objects. The packfiles are not getting merged + # at this point. + test_commit loose && + run_and_verify_geometric_pack 2 && + + # Both packfiles have 3 objects, so the next run would cause us + # to merge all packfiles together. This should be turned into + # an all-into-one-repack. + GIT_TRACE2_EVENT="$(pwd)/all-into-one-repack.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago --quiet --write-midx /dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago --quiet --write-midx packs && + test_line_count = 2 packs && + ls .git/objects/pack/*.mtimes >cruft && + test_line_count = 1 cruft + ) +' + +test_geometric_repack_needed () { + NEEDED="$1" + GEOMETRIC_CONFIG="$2" && + rm -f trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git ${GEOMETRIC_CONFIG:+-c maintenance.geometric-repack.$GEOMETRIC_CONFIG} \ + maintenance run --auto --task=geometric-repack 2>/dev/null && + case "$NEEDED" in + true) + test_grep "\[\"git\",\"repack\"," trace2.txt;; + false) + ! test_grep "\[\"git\",\"repack\"," trace2.txt;; + *) + BUG "invalid parameter: $NEEDED";; + esac +} + +test_expect_success 'geometric repacking with --auto' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + # An empty repository does not need repacking, except when + # explicitly told to do it. + test_geometric_repack_needed false && + test_geometric_repack_needed false auto=0 && + test_geometric_repack_needed false auto=1 && + test_geometric_repack_needed true auto=-1 && + + test_oid_init && + + # Loose objects cause a repack when crossing the limit. Note + # that the number of objects gets extrapolated by having a look + # at the "objects/17/" shard. + test_commit "$(test_oid blob17_1)" && + test_geometric_repack_needed false && + test_commit "$(test_oid blob17_2)" && + test_geometric_repack_needed false auto=257 && + test_geometric_repack_needed true auto=256 && + + # Force another repack. + test_commit first && + test_commit second && + test_geometric_repack_needed true auto=-1 && + + # We now have two packfiles that would be merged together. As + # such, the repack should always happen unless the user has + # disabled the auto task. + test_geometric_repack_needed false auto=0 && + test_geometric_repack_needed true auto=9000 + ) +' + +test_expect_success 'geometric repacking honors configured split factor' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + + # Create three different packs with 9, 2 and 1 object, respectively. + # This is done so that only a subset of packs would be merged + # together so that we can verify that `git repack` receives the + # correct geometric factor. + for i in $(test_seq 9) + do + echo first-$i | git hash-object -w --stdin -t blob || return 1 + done && + git repack --geometric=2 -d && + + for i in $(test_seq 2) + do + echo second-$i | git hash-object -w --stdin -t blob || return 1 + done && + git repack --geometric=2 -d && + + echo third | git hash-object -w --stdin -t blob && + git repack --geometric=2 -d && + + test_geometric_repack_needed false splitFactor=2 && + test_geometric_repack_needed true splitFactor=3 && + test_subcommand git repack -d -l --geometric=3 --quiet --write-midx expect && + rm -f trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git -c maintenance.strategy=$STRATEGY maintenance run --quiet "$@" && + sed -n 's/{"event":"child_start","sid":"[^/"]*",.*,"argv":\["\(.*\)\"]}/\1/p' actual + test_cmp expect actual +} + +test_expect_success 'maintenance.strategy is respected' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + + test_must_fail git -c maintenance.strategy=unknown maintenance run 2>err && + test_grep "unknown maintenance strategy: .unknown." err && + + test_strategy incremental <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF + + test_strategy incremental --schedule=weekly <<-\EOF && + git pack-refs --all --prune + git prune-packed --quiet + git multi-pack-index write --no-progress + git multi-pack-index expire --no-progress + git multi-pack-index repack --no-progress --batch-size=1 + git commit-graph write --split --reachable --no-progress + EOF + + test_strategy gc <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF + + test_strategy gc --schedule=weekly <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF + + test_strategy geometric <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git repack -d -l --geometric=2 --quiet --write-midx + git commit-graph write --split --reachable --no-progress + git worktree prune --expire 3.months.ago + git rerere gc + EOF + + test_strategy geometric --schedule=weekly <<-\EOF + git pack-refs --all --prune + git reflog expire --all + git repack -d -l --geometric=2 --quiet --write-midx + git commit-graph write --split --reachable --no-progress + git worktree prune --expire 3.months.ago + git rerere gc + EOF + ) +' + test_expect_success 'register and unregister' ' test_when_finished git config --global --unset-all maintenance.repo && @@ -1093,6 +1334,11 @@ test_expect_success 'fails when running outside of a repository' ' nongit test_must_fail git maintenance unregister ' +test_expect_success 'fails when configured to use an invalid strategy' ' + test_must_fail git -c maintenance.strategy=invalid maintenance run --schedule=hourly 2>err && + test_grep "unknown maintenance strategy: .invalid." err +' + test_expect_success 'register and unregister bare repo' ' test_when_finished "git config --global --unset-all maintenance.repo || :" && test_might_fail git config --global --unset-all maintenance.repo &&