From d5a2359633e6d3f71cc5e711ece08d131614d366 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:23 +0200 Subject: [PATCH 1/3] last-modified: new subcommand to show when files were last modified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to git-blame(1), introduce a new subcommand git-last-modified(1). This command shows the most recent modification to paths in a tree. It does so by expanding the tree at a given commit, taking note of the current state of each path, and then walking backwards through history looking for commits where each path changed into its final commit ID. Based-on-patch-by: Jeff King Improved-by: Ævar Arnfjörð Bjarmason Signed-off-by: Toon Claes --- .gitignore | 1 + Documentation/git-last-modified.adoc | 54 +++++ Documentation/meson.build | 1 + Makefile | 1 + builtin.h | 1 + builtin/last-modified.c | 281 +++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + t/meson.build | 1 + t/t8020-last-modified.sh | 210 ++++++++++++++++++++ 11 files changed, 553 insertions(+) create mode 100644 Documentation/git-last-modified.adoc create mode 100644 builtin/last-modified.c create mode 100755 t/t8020-last-modified.sh diff --git a/.gitignore b/.gitignore index 04c444404e4..a36ee944433 100644 --- a/.gitignore +++ b/.gitignore @@ -87,6 +87,7 @@ /git-init-db /git-interpret-trailers /git-instaweb +/git-last-modified /git-log /git-ls-files /git-ls-remote diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc new file mode 100644 index 00000000000..35bd4a1dd06 --- /dev/null +++ b/Documentation/git-last-modified.adoc @@ -0,0 +1,54 @@ +git-last-modified(1) +==================== + +NAME +---- +git-last-modified - EXPERIMENTAL: Show when files were last modified + + +SYNOPSIS +-------- +[synopsis] +git last-modified [--recursive] [--show-trees] [] [[--] ...] + +DESCRIPTION +----------- + +Shows which commit last modified each of the relevant files and subdirectories. +A commit renaming a path, or changing it's mode is also taken into account. + +THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. + +OPTIONS +------- + +-r:: +--recursive:: + Instead of showing tree entries, step into subtrees and show all entries + inside them recursively. + +-t:: +--show-trees:: + Show tree entries even when recursing into them. It has no effect + without `--recursive`. + +:: + Only traverse commits in the specified revision range. When no + `` is specified, it defaults to `HEAD` (i.e. the whole + history leading to the current commit). For a complete list of ways to + spell ``, see the 'Specifying Ranges' section of + linkgit:gitrevisions[7]. + +[--] ...:: + For each __ given, the commit which last modified it is returned. + Without an optional path parameter, all files and subdirectories + in path traversal the are included in the output. + +SEE ALSO +-------- +linkgit:git-blame[1], +linkgit:git-log[1]. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index 4404c623f00..a8ac5285f0a 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -74,6 +74,7 @@ manpages = { 'git-init.adoc' : 1, 'git-instaweb.adoc' : 1, 'git-interpret-trailers.adoc' : 1, + 'git-last-modified.adoc' : 1, 'git-log.adoc' : 1, 'git-ls-files.adoc' : 1, 'git-ls-remote.adoc' : 1, diff --git a/Makefile b/Makefile index e11340c1ae7..b0b3a30daa2 100644 --- a/Makefile +++ b/Makefile @@ -1265,6 +1265,7 @@ BUILTIN_OBJS += builtin/hook.o BUILTIN_OBJS += builtin/index-pack.o BUILTIN_OBJS += builtin/init-db.o BUILTIN_OBJS += builtin/interpret-trailers.o +BUILTIN_OBJS += builtin/last-modified.o BUILTIN_OBJS += builtin/log.o BUILTIN_OBJS += builtin/ls-files.o BUILTIN_OBJS += builtin/ls-remote.o diff --git a/builtin.h b/builtin.h index bff13e3069b..6ed6759ec4e 100644 --- a/builtin.h +++ b/builtin.h @@ -176,6 +176,7 @@ int cmd_hook(int argc, const char **argv, const char *prefix, struct repository int cmd_index_pack(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_init_db(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_interpret_trailers(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_last_modified(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log_reflog(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_ls_files(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/last-modified.c b/builtin/last-modified.c new file mode 100644 index 00000000000..364493ac694 --- /dev/null +++ b/builtin/last-modified.c @@ -0,0 +1,281 @@ +#include "git-compat-util.h" +#include "builtin.h" +#include "commit.h" +#include "config.h" +#include "diff.h" +#include "diffcore.h" +#include "environment.h" +#include "hashmap.h" +#include "hex.h" +#include "log-tree.h" +#include "object-name.h" +#include "object.h" +#include "parse-options.h" +#include "quote.h" +#include "repository.h" +#include "revision.h" + +struct last_modified_entry { + struct hashmap_entry hashent; + struct object_id oid; + const char path[FLEX_ARRAY]; +}; + +static int last_modified_entry_hashcmp(const void *unused UNUSED, + const struct hashmap_entry *hent1, + const struct hashmap_entry *hent2, + const void *path) +{ + const struct last_modified_entry *ent1 = + container_of(hent1, const struct last_modified_entry, hashent); + const struct last_modified_entry *ent2 = + container_of(hent2, const struct last_modified_entry, hashent); + return strcmp(ent1->path, path ? path : ent2->path); +} + +struct last_modified { + struct hashmap paths; + struct rev_info rev; + bool recursive; + bool show_trees; +}; + +static void last_modified_release(struct last_modified *lm) +{ + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); + release_revisions(&lm->rev); +} + +struct last_modified_callback_data { + struct last_modified *lm; + struct commit *commit; +}; + +static void add_path_from_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *data) +{ + struct last_modified *lm = data; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + struct last_modified_entry *ent; + const char *path = p->two->path; + + FLEX_ALLOC_STR(ent, path, path); + oidcpy(&ent->oid, &p->two->oid); + hashmap_entry_init(&ent->hashent, strhash(ent->path)); + hashmap_add(&lm->paths, &ent->hashent); + } +} + +static int populate_paths_from_revs(struct last_modified *lm) +{ + int num_interesting = 0; + struct diff_options diffopt; + + /* + * Create a copy of `struct diff_options`. In this copy a callback is + * set that when called adds entries to `paths` in `struct last_modified`. + * This copy is used to diff the tree of the target revision against an + * empty tree. This results in all paths in the target revision being + * listed. After `paths` is populated, we don't need this copy no more. + */ + memcpy(&diffopt, &lm->rev.diffopt, sizeof(diffopt)); + copy_pathspec(&diffopt.pathspec, &lm->rev.diffopt.pathspec); + diffopt.output_format = DIFF_FORMAT_CALLBACK; + diffopt.format_callback = add_path_from_diff; + diffopt.format_callback_data = lm; + + for (size_t i = 0; i < lm->rev.pending.nr; i++) { + struct object_array_entry *obj = lm->rev.pending.objects + i; + + if (obj->item->flags & UNINTERESTING) + continue; + + if (num_interesting++) + return error(_("last-modified can only operate on one tree at a time")); + + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &obj->item->oid, "", &diffopt); + diff_flush(&diffopt); + } + clear_pathspec(&diffopt.pathspec); + + return 0; +} + +static void last_modified_emit(struct last_modified *lm, + const char *path, const struct commit *commit) + +{ + if (commit->object.flags & BOUNDARY) + putchar('^'); + printf("%s\t", oid_to_hex(&commit->object.oid)); + + if (lm->rev.diffopt.line_termination) + write_name_quoted(path, stdout, '\n'); + else + printf("%s%c", path, '\0'); +} + +static void mark_path(const char *path, const struct object_id *oid, + struct last_modified_callback_data *data) +{ + struct last_modified_entry *ent; + + /* Is it even a path that we are interested in? */ + ent = hashmap_get_entry_from_hash(&data->lm->paths, strhash(path), path, + struct last_modified_entry, hashent); + if (!ent) + return; + + /* + * Is it arriving at a version of interest, or is it from a side branch + * which did not contribute to the final state? + */ + if (!oideq(oid, &ent->oid)) + return; + + last_modified_emit(data->lm, path, data->commit); + + hashmap_remove(&data->lm->paths, &ent->hashent, path); + free(ent); +} + +static void last_modified_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *cbdata) +{ + struct last_modified_callback_data *data = cbdata; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + switch (p->status) { + case DIFF_STATUS_DELETED: + /* + * There's no point in feeding a deletion, as it could + * not have resulted in our current state, which + * actually has the file. + */ + break; + + default: + /* + * Otherwise, we care only that we somehow arrived at + * a final oid state. Note that this covers some + * potentially controversial areas, including: + * + * 1. A rename or copy will be found, as it is the + * first time the content has arrived at the given + * path. + * + * 2. Even a non-content modification like a mode or + * type change will trigger it. + * + * We take the inclusive approach for now, and find + * anything which impacts the path. Options to tweak + * the behavior (e.g., to "--follow" the content across + * renames) can come later. + */ + mark_path(p->two->path, &p->two->oid, data); + break; + } + } +} + +static int last_modified_run(struct last_modified *lm) +{ + struct last_modified_callback_data data = { .lm = lm }; + + lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; + lm->rev.diffopt.format_callback = last_modified_diff; + lm->rev.diffopt.format_callback_data = &data; + + prepare_revision_walk(&lm->rev); + + while (hashmap_get_size(&lm->paths)) { + data.commit = get_revision(&lm->rev); + if (!data.commit) + BUG("paths remaining beyond boundary in last-modified"); + + if (data.commit->object.flags & BOUNDARY) { + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &data.commit->object.oid, "", + &lm->rev.diffopt); + diff_flush(&lm->rev.diffopt); + } else { + log_tree_commit(&lm->rev, data.commit); + } + } + + return 0; +} + +static int last_modified_init(struct last_modified *lm, struct repository *r, + const char *prefix, int argc, const char **argv) +{ + hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); + + repo_init_revisions(r, &lm->rev, prefix); + lm->rev.def = "HEAD"; + lm->rev.combine_merges = 1; + lm->rev.show_root_diff = 1; + lm->rev.boundary = 1; + lm->rev.no_commit_id = 1; + lm->rev.diff = 1; + lm->rev.diffopt.flags.recursive = lm->recursive; + lm->rev.diffopt.flags.tree_in_recursive = lm->show_trees; + + argc = setup_revisions(argc, argv, &lm->rev, NULL); + if (argc > 1) { + error(_("unknown last-modified argument: %s"), argv[1]); + return argc; + } + + if (populate_paths_from_revs(lm) < 0) + return error(_("unable to setup last-modified")); + + return 0; +} + +int cmd_last_modified(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + int ret; + struct last_modified lm = { 0 }; + + const char * const last_modified_usage[] = { + N_("git last-modified [--recursive] [--show-trees] " + "[] [[--] ...]"), + NULL + }; + + struct option last_modified_options[] = { + OPT_BOOL('r', "recursive", &lm.recursive, + N_("recurse into subtrees")), + OPT_BOOL('t', "show-trees", &lm.show_trees, + N_("show tree entries when recursing into subtrees")), + OPT_END() + }; + + argc = parse_options(argc, argv, prefix, last_modified_options, + last_modified_usage, + PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); + + repo_config(repo, git_default_config, NULL); + + ret = last_modified_init(&lm, repo, prefix, argc, argv); + if (ret > 0) + usage_with_options(last_modified_usage, + last_modified_options); + if (ret) + goto out; + + ret = last_modified_run(&lm); + if (ret) + goto out; + +out: + last_modified_release(&lm); + + return ret; +} diff --git a/command-list.txt b/command-list.txt index b7ade3ab9f3..b715777b248 100644 --- a/command-list.txt +++ b/command-list.txt @@ -124,6 +124,7 @@ git-index-pack plumbingmanipulators git-init mainporcelain init git-instaweb ancillaryinterrogators complete git-interpret-trailers purehelpers +git-last-modified plumbinginterrogators git-log mainporcelain info git-ls-files plumbinginterrogators git-ls-remote plumbinginterrogators diff --git a/git.c b/git.c index 07a5fe39fb6..76a0b2a1a44 100644 --- a/git.c +++ b/git.c @@ -565,6 +565,7 @@ static struct cmd_struct commands[] = { { "init", cmd_init_db }, { "init-db", cmd_init_db }, { "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY }, + { "last-modified", cmd_last_modified, RUN_SETUP }, { "log", cmd_log, RUN_SETUP }, { "ls-files", cmd_ls_files, RUN_SETUP }, { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, diff --git a/meson.build b/meson.build index 5dd299b4962..cff7125638d 100644 --- a/meson.build +++ b/meson.build @@ -607,6 +607,7 @@ builtin_sources = [ 'builtin/index-pack.c', 'builtin/init-db.c', 'builtin/interpret-trailers.c', + 'builtin/last-modified.c', 'builtin/log.c', 'builtin/ls-files.c', 'builtin/ls-remote.c', diff --git a/t/meson.build b/t/meson.build index bbeba1a8d50..68656fe08af 100644 --- a/t/meson.build +++ b/t/meson.build @@ -946,6 +946,7 @@ integration_tests = [ 't8012-blame-colors.sh', 't8013-blame-ignore-revs.sh', 't8014-blame-ignore-fuzzy.sh', + 't8020-last-modified.sh', 't9001-send-email.sh', 't9002-column.sh', 't9003-help-autocorrect.sh', diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh new file mode 100755 index 00000000000..5eb4cef0359 --- /dev/null +++ b/t/t8020-last-modified.sh @@ -0,0 +1,210 @@ +#!/bin/sh + +test_description='last-modified tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + test_commit 1 file && + mkdir a && + test_commit 2 a/file && + mkdir a/b && + test_commit 3 a/b/file +' + +test_expect_success 'cannot run last-modified on two trees' ' + test_must_fail git last-modified HEAD HEAD~1 +' + +check_last_modified() { + local indir= && + while test $# != 0 + do + case "$1" in + -C) + indir="$2" + shift + ;; + *) + break + ;; + esac && + shift + done && + + cat >expect && + test_when_finished "rm -f tmp.*" && + git ${indir:+-C "$indir"} last-modified "$@" >tmp.1 && + git name-rev --annotate-stdin --name-only --tags \ + tmp.2 && + tr '\t' ' ' actual && + test_cmp expect actual +} + +test_expect_success 'last-modified non-recursive' ' + check_last_modified <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified recursive' ' + check_last_modified -r <<-\EOF + 3 a/b/file + 2 a/file + 1 file + EOF +' + +test_expect_success 'last-modified recursive with show-trees' ' + check_last_modified -r -t <<-\EOF + 3 a + 3 a/b + 3 a/b/file + 2 a/file + 1 file + EOF +' + +test_expect_success 'last-modified non-recursive with show-trees' ' + check_last_modified -t <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified subdir' ' + check_last_modified a <<-\EOF + 3 a + EOF +' + +test_expect_success 'last-modified subdir recursive' ' + check_last_modified -r a <<-\EOF + 3 a/b/file + 2 a/file + EOF +' + +test_expect_success 'last-modified from non-HEAD commit' ' + check_last_modified HEAD^ <<-\EOF + 2 a + 1 file + EOF +' + +test_expect_success 'last-modified from subdir defaults to root' ' + check_last_modified -C a <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified from subdir uses relative pathspecs' ' + check_last_modified -C a -r b <<-\EOF + 3 a/b/file + EOF +' + +test_expect_success 'limit last-modified traversal by count' ' + check_last_modified -1 <<-\EOF + 3 a + ^2 file + EOF +' + +test_expect_success 'limit last-modified traversal by commit' ' + check_last_modified HEAD~2..HEAD <<-\EOF + 3 a + ^1 file + EOF +' + +test_expect_success 'only last-modified files in the current tree' ' + git rm -rf a && + git commit -m "remove a" && + check_last_modified <<-\EOF + 1 file + EOF +' + +test_expect_success 'cross merge boundaries in blaming' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit m1 && + git checkout HEAD^ && + git rm -rf . && + test_commit m2 && + git merge m1 && + check_last_modified <<-\EOF + m2 m2.t + m1 m1.t + EOF +' + +test_expect_success 'last-modified merge for resolved conflicts' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit c1 conflict && + git checkout HEAD^ && + git rm -rf . && + test_commit c2 conflict && + test_must_fail git merge c1 && + test_commit resolved conflict && + check_last_modified conflict <<-\EOF + resolved conflict + EOF +' + + +# Consider `file` with this content through history: +# +# A---B---B-------B---B +# \ / +# C---D +test_expect_success 'last-modified merge ignores content from branch' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit a1 file A && + test_commit a2 file B && + test_commit a3 file C && + test_commit a4 file D && + git checkout a2 && + git merge --no-commit --no-ff a4 && + git checkout a2 -- file && + git merge --continue && + check_last_modified <<-\EOF + a2 file + EOF +' + +# Consider `file` with this content through history: +# +# A---B---B---C---D---B---B +# \ / +# B-------B +test_expect_success 'last-modified merge undoes changes' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit b1 file A && + test_commit b2 file B && + test_commit b3 file C && + test_commit b4 file D && + git checkout b2 && + test_commit b5 file2 2 && + git checkout b4 && + git merge --no-commit --no-ff b5 && + git checkout b2 -- file && + git merge --continue && + check_last_modified <<-\EOF + b5 file2 + b2 file + EOF +' + +test_expect_success 'last-modified complains about unknown arguments' ' + test_must_fail git last-modified --foo 2>err && + grep "unknown last-modified argument: --foo" err +' + +test_done -- GitLab From 7537f0e597a72c97df61beeaa0e81bafeb8be6d9 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:24 +0200 Subject: [PATCH 2/3] t/perf: add last-modified perf script This just runs some simple last-modified commands. We already test correctness in the regular suite, so this is just about finding performance regressions from one version to another. Based-on-patch-by: Jeff King Signed-off-by: Toon Claes --- t/meson.build | 1 + t/perf/p8020-last-modified.sh | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100755 t/perf/p8020-last-modified.sh diff --git a/t/meson.build b/t/meson.build index 68656fe08af..21d5e99bf53 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1140,6 +1140,7 @@ benchmarks = [ 'perf/p7820-grep-engines.sh', 'perf/p7821-grep-engines-fixed.sh', 'perf/p7822-grep-perl-character.sh', + 'perf/p8020-last-modified.sh', 'perf/p9210-scalar.sh', 'perf/p9300-fast-import-export.sh', ] diff --git a/t/perf/p8020-last-modified.sh b/t/perf/p8020-last-modified.sh new file mode 100755 index 00000000000..cb1f98d3db9 --- /dev/null +++ b/t/perf/p8020-last-modified.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='last-modified perf tests' +. ./perf-lib.sh + +test_perf_default_repo + +test_perf 'top-level last-modified' ' + git last-modified HEAD +' + +test_perf 'top-level recursive last-modified' ' + git last-modified -r HEAD +' + +test_perf 'subdir last-modified' ' + git ls-tree -d HEAD >subtrees && + path="$(head -n 1 subtrees | cut -f2)" && + git last-modified -r HEAD -- "$path" +' + +test_done -- GitLab From ebc7b061df1ff0d9c9051e19fe84a771143488f0 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:25 +0200 Subject: [PATCH 3/3] last-modified: use Bloom filters when available Our 'git last-modified' performs a revision walk, and computes a diff at each point in the walk to figure out whether a given revision changed any of the paths it considers interesting. When changed-path Bloom filters are available, we can avoid computing many such diffs. Before computing a diff, we first check if any of the remaining paths of interest were possibly changed at a given commit by consulting its Bloom filter. If any of them are, we are resigned to compute the diff. If none of those queries returned "maybe", we know that the given commit doesn't contain any changed paths which are interesting to us. So, we can avoid computing it in this case. Comparing the perf test results on git.git: Test HEAD~ HEAD ------------------------------------------------------------------------------------ 8020.1: top-level last-modified 4.49(4.34+0.11) 2.22(2.05+0.09) -50.6% 8020.2: top-level recursive last-modified 5.64(5.45+0.11) 5.62(5.30+0.11) -0.4% 8020.3: subdir last-modified 0.11(0.06+0.04) 0.07(0.03+0.04) -36.4% Based-on-patch-by: Taylor Blau Signed-off-by: Toon Claes --- builtin/last-modified.c | 48 +++++++++++++++++++++++++++++++++++++++-- commit-graph.c | 7 +++++- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/builtin/last-modified.c b/builtin/last-modified.c index 364493ac694..82c57398277 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -1,5 +1,7 @@ #include "git-compat-util.h" +#include "bloom.h" #include "builtin.h" +#include "commit-graph.h" #include "commit.h" #include "config.h" #include "diff.h" @@ -18,6 +20,7 @@ struct last_modified_entry { struct hashmap_entry hashent; struct object_id oid; + struct bloom_key key; const char path[FLEX_ARRAY]; }; @@ -42,6 +45,12 @@ struct last_modified { static void last_modified_release(struct last_modified *lm) { + struct hashmap_iter iter; + struct last_modified_entry *ent; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) + bloom_key_clear(&ent->key); + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); release_revisions(&lm->rev); } @@ -63,6 +72,9 @@ static void add_path_from_diff(struct diff_queue_struct *q, FLEX_ALLOC_STR(ent, path, path); oidcpy(&ent->oid, &p->two->oid); + if (lm->rev.bloom_filter_settings) + bloom_key_fill(&ent->key, path, strlen(path), + lm->rev.bloom_filter_settings); hashmap_entry_init(&ent->hashent, strhash(ent->path)); hashmap_add(&lm->paths, &ent->hashent); } @@ -139,6 +151,7 @@ static void mark_path(const char *path, const struct object_id *oid, last_modified_emit(data->lm, path, data->commit); hashmap_remove(&data->lm->paths, &ent->hashent, path); + bloom_key_clear(&ent->key); free(ent); } @@ -182,6 +195,30 @@ static void last_modified_diff(struct diff_queue_struct *q, } } +static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) +{ + struct bloom_filter *filter; + struct last_modified_entry *ent; + struct hashmap_iter iter; + + if (!lm->rev.bloom_filter_settings) + return true; + + if (commit_graph_generation(origin) == GENERATION_NUMBER_INFINITY) + return true; + + filter = get_bloom_filter(lm->rev.repo, origin); + if (!filter) + return true; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (bloom_filter_contains(filter, &ent->key, + lm->rev.bloom_filter_settings)) + return true; + } + return false; +} + static int last_modified_run(struct last_modified *lm) { struct last_modified_callback_data data = { .lm = lm }; @@ -202,9 +239,14 @@ static int last_modified_run(struct last_modified *lm) &data.commit->object.oid, "", &lm->rev.diffopt); diff_flush(&lm->rev.diffopt); - } else { - log_tree_commit(&lm->rev, data.commit); + + break; } + + if (!maybe_changed_path(lm, data.commit)) + continue; + + log_tree_commit(&lm->rev, data.commit); } return 0; @@ -231,6 +273,8 @@ static int last_modified_init(struct last_modified *lm, struct repository *r, return argc; } + lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo); + if (populate_paths_from_revs(lm) < 0) return error(_("unable to setup last-modified")); diff --git a/commit-graph.c b/commit-graph.c index e0d92b816f5..a74ac342b3c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -821,7 +821,12 @@ int corrected_commit_dates_enabled(struct repository *r) struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r) { - struct commit_graph *g = r->objects->commit_graph; + struct commit_graph *g; + + if (!prepare_commit_graph(r)) + return NULL; + + g = r->objects->commit_graph; while (g) { if (g->bloom_filter_settings) return g->bloom_filter_settings; -- GitLab