diff --git a/builtin/last-modified.c b/builtin/last-modified.c index ae8b36a2c3515c6a3931b04bad6c95fae36ac592..40e520ba18429a103bd04f9813c80e448762c1d0 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -2,26 +2,32 @@ #include "bloom.h" #include "builtin.h" #include "commit-graph.h" +#include "commit-slab.h" #include "commit.h" #include "config.h" -#include "environment.h" #include "diff.h" #include "diffcore.h" #include "environment.h" +#include "ewah/ewok.h" #include "hashmap.h" #include "hex.h" -#include "log-tree.h" #include "object-name.h" #include "object.h" #include "parse-options.h" +#include "prio-queue.h" #include "quote.h" #include "repository.h" #include "revision.h" +/* Remember to update object flag allocation in object.h */ +#define PARENT1 (1u<<16) /* used instead of SEEN */ +#define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */ + struct last_modified_entry { struct hashmap_entry hashent; struct object_id oid; struct bloom_key key; + size_t diff_idx; const char path[FLEX_ARRAY]; }; @@ -37,13 +43,35 @@ static int last_modified_entry_hashcmp(const void *unused UNUSED, return strcmp(ent1->path, path ? path : ent2->path); } +/* + * Hold a bitmap for each commit we're working with. Each bit represents a path + * in `lm->all_paths`. Active bit means the path still needs to be dealt with. + */ +define_commit_slab(commit_bitmaps, struct bitmap *); + struct last_modified { struct hashmap paths; struct rev_info rev; bool recursive; bool show_trees; + + const char **all_paths; + size_t all_paths_nr; + struct commit_bitmaps commit_bitmaps; + + /* 'scratch' bitmap to avoid allocating every proccess_parent() */ + struct bitmap *scratch; }; +static struct bitmap *get_bitmap(struct last_modified *lm, struct commit *c) +{ + struct bitmap **bitmap = commit_bitmaps_at(&lm->commit_bitmaps, c); + if (!*bitmap) + *bitmap = bitmap_word_alloc(lm->all_paths_nr / BITS_IN_EWORD); + + return *bitmap; +} + static void last_modified_release(struct last_modified *lm) { struct hashmap_iter iter; @@ -54,6 +82,8 @@ static void last_modified_release(struct last_modified *lm) hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); release_revisions(&lm->rev); + + free(lm->all_paths); } struct last_modified_callback_data { @@ -196,7 +226,36 @@ static void last_modified_diff(struct diff_queue_struct *q, } } -static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) +static size_t path_idx(struct last_modified *lm, char *path) +{ + struct last_modified_entry *ent; + ent = hashmap_get_entry_from_hash(&lm->paths, strhash(path), path, + struct last_modified_entry, hashent); + + return ent ? ent->diff_idx : -1; +} + +static void pass_to_parent(struct last_modified *lm, + struct bitmap *c, + struct bitmap *p, + size_t pos) +{ + struct last_modified_entry *ent; + struct hashmap_iter iter; + + bitmap_unset(c, pos); + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (ent->diff_idx == pos) { + bitmap_set(p, pos); + break; + } + } +} + +static bool maybe_changed_path(struct last_modified *lm, + struct commit *origin, + struct bitmap *active) { struct bloom_filter *filter; struct last_modified_entry *ent; @@ -213,6 +272,9 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) return true; hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (active && !bitmap_get(active, ent->diff_idx)) + continue; + if (bloom_filter_contains(filter, &ent->key, lm->rev.bloom_filter_settings)) return true; @@ -220,42 +282,197 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) return false; } +static void process_parent(struct last_modified *lm, + struct prio_queue *queue, + struct commit *c, struct bitmap *active_c, + struct commit *parent, int parent_i) +{ + size_t i; + struct bitmap *active_p; + + repo_parse_commit(lm->rev.repo, parent); + active_p = get_bitmap(lm, parent); + + /* + * The first time entering this function for this commit (i.e. first parent) + * see if Bloom filters will tell us it's worth to do the diff. + */ + if (parent_i || maybe_changed_path(lm, c, active_c)) { + diff_tree_oid(&parent->object.oid, + &c->object.oid, "", &lm->rev.diffopt); + diffcore_std(&lm->rev.diffopt); + } + + /* + * Otherwise, test each path for TREESAME-ness against the parent. If + * a path is TREESAME, pass it on to this parent. + * + * First, collect all paths that are *not* TREESAME in 'scratch'. + * Then, pass paths that *are* TREESAME and active to the parent. + */ + for (i = 0; i < diff_queued_diff.nr; i++) { + struct diff_filepair *fp = diff_queued_diff.queue[i]; + size_t k = path_idx(lm, fp->two->path); + if (0 <= k && bitmap_get(active_c, k)) + bitmap_set(lm->scratch, k); + diff_free_filepair(fp); + } + for (i = 0; i < lm->all_paths_nr; i++) { + if (bitmap_get(active_c, i) && !bitmap_get(lm->scratch, i)) + pass_to_parent(lm, active_c, active_p, i); + } + + /* + * If parent has any active paths, put it on the queue (if not already). + */ + if (!bitmap_is_empty(active_p) && !(parent->object.flags & PARENT1)) { + parent->object.flags |= PARENT1; + prio_queue_put(queue, parent); + } + + memset(lm->scratch->words, 0x0, lm->scratch->word_alloc); + diff_queued_diff.nr = 0; + diff_queue_clear(&diff_queued_diff); +} + static int last_modified_run(struct last_modified *lm) { + int max_count, queue_popped = 0; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; + struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date }; + struct commit_list *list; struct last_modified_callback_data data = { .lm = lm }; lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; lm->rev.diffopt.format_callback = last_modified_diff; lm->rev.diffopt.format_callback_data = &data; + lm->rev.no_walk = 1; prepare_revision_walk(&lm->rev); - while (hashmap_get_size(&lm->paths)) { - data.commit = get_revision(&lm->rev); - if (!data.commit) - BUG("paths remaining beyond boundary in last-modified"); + max_count = lm->rev.max_count; + + init_commit_bitmaps(&lm->commit_bitmaps); + lm->scratch = bitmap_word_alloc(lm->all_paths_nr); + + /* + * lm->rev.commits holds the set of boundary commits for our walk. + * + * Loop through each such commit, and place it in the appropriate queue. + */ + for (list = lm->rev.commits; list; list = list->next) { + struct commit *c = list->item; + + if (c->object.flags & BOTTOM) { + prio_queue_put(¬_queue, c); + c->object.flags |= PARENT2; + } else if (!(c->object.flags & PARENT1)) { + /* + * If the commit is a starting point (and hasn't been + * seen yet), then initialize the set of interesting + * paths, too. + */ + struct bitmap *active; + + prio_queue_put(&queue, c); + c->object.flags |= PARENT1; - if (data.commit->object.flags & BOUNDARY) { + active = get_bitmap(lm, c); + for (size_t i = 0; i < lm->all_paths_nr; i++) + bitmap_set(active, i); + } + } + + while (queue.nr) { + int parent_i; + struct commit_list *p; + struct commit *c = prio_queue_get(&queue); + struct bitmap *active_c = get_bitmap(lm, c); + + if ((0 <= max_count && max_count < ++queue_popped) || + (c->object.flags & PARENT2)) { + /* + * Either a boundary commit, or we have already seen too + * many others. Either way, stop here. + */ + c->object.flags |= PARENT2 | BOUNDARY; + data.commit = c; diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, - &data.commit->object.oid, "", - &lm->rev.diffopt); + &c->object.oid, + "", &lm->rev.diffopt); diff_flush(&lm->rev.diffopt); + goto cleanup; + } - break; + /* + * Otherwise, make sure that 'c' isn't reachable from anything + * in the '--not' queue. + */ + repo_parse_commit(lm->rev.repo, c); + + while (not_queue.nr) { + struct commit_list *np; + struct commit *n = prio_queue_get(¬_queue); + + repo_parse_commit(lm->rev.repo, n); + + for (np = n->parents; np; np = np->next) { + if (!(np->item->object.flags & PARENT2)) { + prio_queue_put(¬_queue, np->item); + np->item->object.flags |= PARENT2; + } + } + + if (commit_graph_generation(n) < commit_graph_generation(c)) + break; } - if (!maybe_changed_path(lm, data.commit)) - continue; + /* + * Look at each parent and pass on each path that's TREESAME + * with that parent. Stop early when no active paths remain. + */ + for (p = c->parents, parent_i = 0; p; p = p->next, parent_i++) { + process_parent(lm, &queue, + c, active_c, + p->item, parent_i); + + if (bitmap_is_empty(active_c)) + break; + } - log_tree_commit(&lm->rev, data.commit); + /* + * Paths that remain active, or not TREESAME with any parent, + * were changed by 'c'. + */ + if (!bitmap_is_empty(active_c)) { + data.commit = c; + for (size_t i = 0; i < lm->all_paths_nr; i++) { + if (bitmap_get(active_c, i)) + mark_path(lm->all_paths[i], NULL, &data); + } + } + +cleanup: + bitmap_free(active_c); } + if (hashmap_get_size(&lm->paths)) + BUG("paths remaining beyond boundary in last-modified"); + + clear_prio_queue(¬_queue); + clear_prio_queue(&queue); + clear_commit_bitmaps(&lm->commit_bitmaps); + bitmap_free(lm->scratch); + return 0; } static int last_modified_init(struct last_modified *lm, struct repository *r, const char *prefix, int argc, const char **argv) { + struct hashmap_iter iter; + struct last_modified_entry *ent; + hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); repo_init_revisions(r, &lm->rev, prefix); @@ -280,6 +497,13 @@ static int last_modified_init(struct last_modified *lm, struct repository *r, if (populate_paths_from_revs(lm) < 0) return error(_("unable to setup last-modified")); + lm->all_paths = xcalloc(hashmap_get_size(&lm->paths), sizeof(const char *)); + lm->all_paths_nr = 0; + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + ent->diff_idx = lm->all_paths_nr++; + lm->all_paths[ent->diff_idx] = ent->path; + } + return 0; } diff --git a/object.h b/object.h index 8c3c1c46e1bf04e8e51d7b72c88bc352077c54d0..fa504a09c0a48dbab1a3f108cc438267ffc89cd4 100644 --- a/object.h +++ b/object.h @@ -75,6 +75,7 @@ void object_array_init(struct object_array *array); * http-push.c: 11-----14 * commit-graph.c: 15 * commit-reach.c: 16-----19 + * builtin/last-modified.c: 1617 * sha1-name.c: 20 * list-objects-filter.c: 21 * bloom.c: 2122 diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh index 61f00bc15c3b2d170d981038f0ebb48fd4bf31c2..a4c1114ee28f7fca858e425c0eeb69f3f5c98d0f 100755 --- a/t/t8020-last-modified.sh +++ b/t/t8020-last-modified.sh @@ -57,9 +57,9 @@ test_expect_success 'last-modified recursive' ' test_expect_success 'last-modified recursive with show-trees' ' check_last_modified -r -t <<-\EOF - 3 a 3 a/b 3 a/b/file + 3 a 2 a/file 1 file EOF