From 6bde5d43b769509141d7ebc0b2476bc2035bc673 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 15 Jul 2025 13:28:26 +0200 Subject: [PATCH 01/11] refs: expose `ref_iterator` via 'refs.h' The `ref_iterator` is an internal structure to the 'refs/' sub-directory, which allows iteration over refs. All reference iteration is built on top of these iterators. External clients of the 'refs' subsystem use the various 'refs_for_each...()' functions to iterate over refs. However since these are wrapper functions, each combination of functionality requires a new wrapper function. This is not feasible as the functions pile up with the increase in requirements. Expose the internal reference iterator, so advanced users can mix and match options as needed. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.h | 147 +++++++++++++++++++++++++++++++++++++++++++ refs/refs-internal.h | 145 +----------------------------------------- 2 files changed, 149 insertions(+), 143 deletions(-) diff --git a/refs.h b/refs.h index 46a6008e07f..7c21aaef3db 100644 --- a/refs.h +++ b/refs.h @@ -1190,4 +1190,151 @@ int repo_migrate_ref_storage_format(struct repository *repo, unsigned int flags, struct strbuf *err); +/* + * Reference iterators + * + * A reference iterator encapsulates the state of an in-progress + * iteration over references. Create an instance of `struct + * ref_iterator` via one of the functions in this module. + * + * A freshly-created ref_iterator doesn't yet point at a reference. To + * advance the iterator, call ref_iterator_advance(). If successful, + * this sets the iterator's refname, oid, and flags fields to describe + * the next reference and returns ITER_OK. The data pointed at by + * refname and oid belong to the iterator; if you want to retain them + * after calling ref_iterator_advance() again or calling + * ref_iterator_free(), you must make a copy. When the iteration has + * been exhausted, ref_iterator_advance() releases any resources + * associated with the iteration, frees the ref_iterator object, and + * returns ITER_DONE. If you want to abort the iteration early, call + * ref_iterator_free(), which also frees the ref_iterator object and + * any associated resources. If there was an internal error advancing + * to the next entry, ref_iterator_advance() aborts the iteration, + * frees the ref_iterator, and returns ITER_ERROR. + * + * The reference currently being looked at can be peeled by calling + * ref_iterator_peel(). This function is often faster than peel_ref(), + * so it should be preferred when iterating over references. + * + * Putting it all together, a typical iteration looks like this: + * + * int ok; + * struct ref_iterator *iter = ...; + * + * while ((ok = ref_iterator_advance(iter)) == ITER_OK) { + * if (want_to_stop_iteration()) { + * ok = ITER_DONE; + * break; + * } + * + * // Access information about the current reference: + * if (!(iter->flags & REF_ISSYMREF)) + * printf("%s is %s\n", iter->refname, oid_to_hex(iter->oid)); + * + * // If you need to peel the reference: + * ref_iterator_peel(iter, &oid); + * } + * + * if (ok != ITER_DONE) + * handle_error(); + * ref_iterator_free(iter); + */ +struct ref_iterator; + +/* + * These flags are passed to refs_ref_iterator_begin() (and do_for_each_ref(), + * which feeds it). + */ +enum do_for_each_ref_flags { + /* + * Include broken references in a do_for_each_ref*() iteration, which + * would normally be omitted. This includes both refs that point to + * missing objects (a true repository corruption), ones with illegal + * names (which we prefer not to expose to callers), as well as + * dangling symbolic refs (i.e., those that point to a non-existent + * ref; this is not a corruption, but as they have no valid oid, we + * omit them from normal iteration results). + */ + DO_FOR_EACH_INCLUDE_BROKEN = (1 << 0), + + /* + * Only include per-worktree refs in a do_for_each_ref*() iteration. + * Normally this will be used with a files ref_store, since that's + * where all reference backends will presumably store their + * per-worktree refs. + */ + DO_FOR_EACH_PER_WORKTREE_ONLY = (1 << 1), + + /* + * Omit dangling symrefs from output; this only has an effect with + * INCLUDE_BROKEN, since they are otherwise not included at all. + */ + DO_FOR_EACH_OMIT_DANGLING_SYMREFS = (1 << 2), + + /* + * Include root refs i.e. HEAD and pseudorefs along with the regular + * refs. + */ + DO_FOR_EACH_INCLUDE_ROOT_REFS = (1 << 3), +}; + +/* + * Return an iterator that goes over each reference in `refs` for + * which the refname begins with prefix. If trim is non-zero, then + * trim that many characters off the beginning of each refname. + * The output is ordered by refname. + */ +struct ref_iterator *refs_ref_iterator_begin( + struct ref_store *refs, + const char *prefix, const char **exclude_patterns, + int trim, enum do_for_each_ref_flags flags); + +/* + * Advance the iterator to the first or next item and return ITER_OK. + * If the iteration is exhausted, free the resources associated with + * the ref_iterator and return ITER_DONE. On errors, free the iterator + * resources and return ITER_ERROR. It is a bug to use ref_iterator or + * call this function again after it has returned ITER_DONE or + * ITER_ERROR. + */ +int ref_iterator_advance(struct ref_iterator *ref_iterator); + +/* + * Seek the iterator to the first reference with the given prefix. + * The prefix is matched as a literal string, without regard for path + * separators. If prefix is NULL or the empty string, seek the iterator to the + * first reference again. + * + * This function is expected to behave as if a new ref iterator with the same + * prefix had been created, but allows reuse of iterators and thus may allow + * the backend to optimize. Parameters other than the prefix that have been + * passed when creating the iterator will remain unchanged. + * + * Returns 0 on success, a negative error code otherwise. + */ +int ref_iterator_seek(struct ref_iterator *ref_iterator, + const char *prefix); + +/* + * If possible, peel the reference currently being viewed by the + * iterator. Return 0 on success. + */ +int ref_iterator_peel(struct ref_iterator *ref_iterator, + struct object_id *peeled); + +/* Free the reference iterator and any associated resources. */ +void ref_iterator_free(struct ref_iterator *ref_iterator); + +/* + * The common backend for the for_each_*ref* functions. Call fn for + * each reference in iter. If the iterator itself ever returns + * ITER_ERROR, return -1. If fn ever returns a non-zero value, stop + * the iteration and return that value. Otherwise, return 0. In any + * case, free the iterator when done. This function is basically an + * adapter between the callback style of reference iteration and the + * iterator style. + */ +int do_for_each_ref_iterator(struct ref_iterator *iter, + each_ref_fn fn, void *cb_data); + #endif /* REFS_H */ diff --git a/refs/refs-internal.h b/refs/refs-internal.h index f8688708519..03f5df04d59 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -244,90 +244,8 @@ const char *find_descendant_ref(const char *dirname, #define SYMREF_MAXDEPTH 5 /* - * These flags are passed to refs_ref_iterator_begin() (and do_for_each_ref(), - * which feeds it). - */ -enum do_for_each_ref_flags { - /* - * Include broken references in a do_for_each_ref*() iteration, which - * would normally be omitted. This includes both refs that point to - * missing objects (a true repository corruption), ones with illegal - * names (which we prefer not to expose to callers), as well as - * dangling symbolic refs (i.e., those that point to a non-existent - * ref; this is not a corruption, but as they have no valid oid, we - * omit them from normal iteration results). - */ - DO_FOR_EACH_INCLUDE_BROKEN = (1 << 0), - - /* - * Only include per-worktree refs in a do_for_each_ref*() iteration. - * Normally this will be used with a files ref_store, since that's - * where all reference backends will presumably store their - * per-worktree refs. - */ - DO_FOR_EACH_PER_WORKTREE_ONLY = (1 << 1), - - /* - * Omit dangling symrefs from output; this only has an effect with - * INCLUDE_BROKEN, since they are otherwise not included at all. - */ - DO_FOR_EACH_OMIT_DANGLING_SYMREFS = (1 << 2), - - /* - * Include root refs i.e. HEAD and pseudorefs along with the regular - * refs. - */ - DO_FOR_EACH_INCLUDE_ROOT_REFS = (1 << 3), -}; - -/* - * Reference iterators - * - * A reference iterator encapsulates the state of an in-progress - * iteration over references. Create an instance of `struct - * ref_iterator` via one of the functions in this module. - * - * A freshly-created ref_iterator doesn't yet point at a reference. To - * advance the iterator, call ref_iterator_advance(). If successful, - * this sets the iterator's refname, oid, and flags fields to describe - * the next reference and returns ITER_OK. The data pointed at by - * refname and oid belong to the iterator; if you want to retain them - * after calling ref_iterator_advance() again or calling - * ref_iterator_free(), you must make a copy. When the iteration has - * been exhausted, ref_iterator_advance() releases any resources - * associated with the iteration, frees the ref_iterator object, and - * returns ITER_DONE. If you want to abort the iteration early, call - * ref_iterator_free(), which also frees the ref_iterator object and - * any associated resources. If there was an internal error advancing - * to the next entry, ref_iterator_advance() aborts the iteration, - * frees the ref_iterator, and returns ITER_ERROR. - * - * The reference currently being looked at can be peeled by calling - * ref_iterator_peel(). This function is often faster than peel_ref(), - * so it should be preferred when iterating over references. - * - * Putting it all together, a typical iteration looks like this: - * - * int ok; - * struct ref_iterator *iter = ...; - * - * while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - * if (want_to_stop_iteration()) { - * ok = ITER_DONE; - * break; - * } - * - * // Access information about the current reference: - * if (!(iter->flags & REF_ISSYMREF)) - * printf("%s is %s\n", iter->refname, oid_to_hex(iter->oid)); - * - * // If you need to peel the reference: - * ref_iterator_peel(iter, &oid); - * } - * - * if (ok != ITER_DONE) - * handle_error(); - * ref_iterator_free(iter); + * Data structure for holding a reference iterator. See refs.h for + * more details and usage instructions. */ struct ref_iterator { struct ref_iterator_vtable *vtable; @@ -337,42 +255,6 @@ struct ref_iterator { unsigned int flags; }; -/* - * Advance the iterator to the first or next item and return ITER_OK. - * If the iteration is exhausted, free the resources associated with - * the ref_iterator and return ITER_DONE. On errors, free the iterator - * resources and return ITER_ERROR. It is a bug to use ref_iterator or - * call this function again after it has returned ITER_DONE or - * ITER_ERROR. - */ -int ref_iterator_advance(struct ref_iterator *ref_iterator); - -/* - * Seek the iterator to the first reference with the given prefix. - * The prefix is matched as a literal string, without regard for path - * separators. If prefix is NULL or the empty string, seek the iterator to the - * first reference again. - * - * This function is expected to behave as if a new ref iterator with the same - * prefix had been created, but allows reuse of iterators and thus may allow - * the backend to optimize. Parameters other than the prefix that have been - * passed when creating the iterator will remain unchanged. - * - * Returns 0 on success, a negative error code otherwise. - */ -int ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix); - -/* - * If possible, peel the reference currently being viewed by the - * iterator. Return 0 on success. - */ -int ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled); - -/* Free the reference iterator and any associated resources. */ -void ref_iterator_free(struct ref_iterator *ref_iterator); - /* * An iterator over nothing (its first ref_iterator_advance() call * returns ITER_DONE). @@ -384,17 +266,6 @@ struct ref_iterator *empty_ref_iterator_begin(void); */ int is_empty_ref_iterator(struct ref_iterator *ref_iterator); -/* - * Return an iterator that goes over each reference in `refs` for - * which the refname begins with prefix. If trim is non-zero, then - * trim that many characters off the beginning of each refname. - * The output is ordered by refname. - */ -struct ref_iterator *refs_ref_iterator_begin( - struct ref_store *refs, - const char *prefix, const char **exclude_patterns, - int trim, enum do_for_each_ref_flags flags); - /* * A callback function used to instruct merge_ref_iterator how to * interleave the entries from iter0 and iter1. The function should @@ -520,18 +391,6 @@ struct ref_iterator_vtable { */ extern struct ref_iterator *current_ref_iter; -/* - * The common backend for the for_each_*ref* functions. Call fn for - * each reference in iter. If the iterator itself ever returns - * ITER_ERROR, return -1. If fn ever returns a non-zero value, stop - * the iteration and return that value. Otherwise, return 0. In any - * case, free the iterator when done. This function is basically an - * adapter between the callback style of reference iteration and the - * iterator style. - */ -int do_for_each_ref_iterator(struct ref_iterator *iter, - each_ref_fn fn, void *cb_data); - struct ref_store; /* refs backends */ -- GitLab From 883a7ea0543426d0ecb172c72c8f706348961605 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 15 Jul 2025 13:28:27 +0200 Subject: [PATCH 02/11] ref-cache: remove unused function 'find_ref_entry()' The 'find_ref_entry' function is no longer used, so remove it. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs/ref-cache.c | 14 -------------- refs/ref-cache.h | 7 ------- 2 files changed, 21 deletions(-) diff --git a/refs/ref-cache.c b/refs/ref-cache.c index c1f1bab1d50..8aaffa8c6b8 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -194,20 +194,6 @@ static struct ref_dir *find_containing_dir(struct ref_dir *dir, return dir; } -struct ref_entry *find_ref_entry(struct ref_dir *dir, const char *refname) -{ - int entry_index; - struct ref_entry *entry; - dir = find_containing_dir(dir, refname); - if (!dir) - return NULL; - entry_index = search_ref_dir(dir, refname, strlen(refname)); - if (entry_index == -1) - return NULL; - entry = dir->entries[entry_index]; - return (entry->flag & REF_DIR) ? NULL : entry; -} - /* * Emit a warning and return true iff ref1 and ref2 have the same name * and the same oid. Die if they have the same name but different diff --git a/refs/ref-cache.h b/refs/ref-cache.h index 5f04e518c37..f635d2d8243 100644 --- a/refs/ref-cache.h +++ b/refs/ref-cache.h @@ -201,13 +201,6 @@ void free_ref_cache(struct ref_cache *cache); */ void add_entry_to_dir(struct ref_dir *dir, struct ref_entry *entry); -/* - * Find the value entry with the given name in dir, sorting ref_dirs - * and recursing into subdirectories as necessary. If the name is not - * found or it corresponds to a directory entry, return NULL. - */ -struct ref_entry *find_ref_entry(struct ref_dir *dir, const char *refname); - /* * Start iterating over references in `cache`. If `prefix` is * specified, only include references whose names start with that -- GitLab From 2b4648b9190b552e942d93363482bd617a510fc1 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 15 Jul 2025 13:28:28 +0200 Subject: [PATCH 03/11] refs: selectively set prefix in the seek functions The ref iterator exposes a `ref_iterator_seek()` function. The name suggests that this would seek the iterator to a specific reference in some ways similar to how `fseek()` works for the filesystem. However, the function actually sets the prefix for refs iteration. So further iteration would only yield references which match the particular prefix. This is a bit confusing. Let's add a 'flags' field to the function, which when set with the 'REF_ITERATOR_SEEK_SET_PREFIX' flag, will set the prefix for the iteration in-line with the existing behavior. Otherwise, the reference backends will simply seek to the specified reference and clears any previously set prefix. This allows users to start iteration from a specific reference. In the packed and reftable backend, since references are available in a sorted list, the changes are simply setting the prefix if needed. The changes on the files-backend are a little more involved, since the files backend uses the 'ref-cache' mechanism. We move out the existing logic within `cache_ref_iterator_seek()` to `cache_ref_iterator_set_prefix()` which is called when the 'REF_ITERATOR_SEEK_SET_PREFIX' flag is set. We then parse the provided seek string and set the required levels and their indexes to ensure that seeking is possible. Helped-by: Patrick Steinhardt Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.c | 6 +-- refs.h | 26 ++++++++----- refs/debug.c | 7 ++-- refs/files-backend.c | 7 ++-- refs/iterator.c | 26 +++++++------ refs/packed-backend.c | 17 ++++++--- refs/ref-cache.c | 85 ++++++++++++++++++++++++++++++++++++++--- refs/refs-internal.h | 7 ++-- refs/reftable-backend.c | 21 ++++++---- 9 files changed, 152 insertions(+), 50 deletions(-) diff --git a/refs.c b/refs.c index dce5c49ca2b..243e6898b80 100644 --- a/refs.c +++ b/refs.c @@ -2666,12 +2666,12 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs if (!initial_transaction) { int ok; - if (!iter) { + if (!iter) iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0, DO_FOR_EACH_INCLUDE_BROKEN); - } else if (ref_iterator_seek(iter, dirname.buf) < 0) { + else if (ref_iterator_seek(iter, dirname.buf, + REF_ITERATOR_SEEK_SET_PREFIX) < 0) goto cleanup; - } while ((ok = ref_iterator_advance(iter)) == ITER_OK) { if (skip && diff --git a/refs.h b/refs.h index 7c21aaef3db..e6780a8848b 100644 --- a/refs.h +++ b/refs.h @@ -1299,21 +1299,29 @@ struct ref_iterator *refs_ref_iterator_begin( */ int ref_iterator_advance(struct ref_iterator *ref_iterator); +enum ref_iterator_seek_flag { + /* + * When the REF_ITERATOR_SEEK_SET_PREFIX flag is set, the iterator's prefix is + * updated to match the provided string, affecting all subsequent iterations. If + * not, the iterator seeks to the specified reference and clears any previously + * set prefix. + */ + REF_ITERATOR_SEEK_SET_PREFIX = (1 << 0), +}; + /* - * Seek the iterator to the first reference with the given prefix. - * The prefix is matched as a literal string, without regard for path - * separators. If prefix is NULL or the empty string, seek the iterator to the + * Seek the iterator to the first reference matching the given seek string. + * The seek string is matched as a literal string, without regard for path + * separators. If seek is NULL or the empty string, seek the iterator to the * first reference again. * - * This function is expected to behave as if a new ref iterator with the same - * prefix had been created, but allows reuse of iterators and thus may allow - * the backend to optimize. Parameters other than the prefix that have been - * passed when creating the iterator will remain unchanged. + * This function is expected to behave as if a new ref iterator has been + * created, but allows reuse of existing iterators for optimization. * * Returns 0 on success, a negative error code otherwise. */ -int ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix); +int ref_iterator_seek(struct ref_iterator *ref_iterator, const char *refname, + unsigned int flags); /* * If possible, peel the reference currently being viewed by the diff --git a/refs/debug.c b/refs/debug.c index 485e3079d7a..da300efaf30 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -170,12 +170,13 @@ static int debug_ref_iterator_advance(struct ref_iterator *ref_iterator) } static int debug_ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) + const char *refname, unsigned int flags) { struct debug_ref_iterator *diter = (struct debug_ref_iterator *)ref_iterator; - int res = diter->iter->vtable->seek(diter->iter, prefix); - trace_printf_key(&trace_refs, "iterator_seek: %s: %d\n", prefix ? prefix : "", res); + int res = diter->iter->vtable->seek(diter->iter, refname, flags); + trace_printf_key(&trace_refs, "iterator_seek: %s flags: %d: %d\n", + refname ? refname : "", flags, res); return res; } diff --git a/refs/files-backend.c b/refs/files-backend.c index bf6f89b1d19..8b282f2a60a 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -929,11 +929,11 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator) } static int files_ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) + const char *refname, unsigned int flags) { struct files_ref_iterator *iter = (struct files_ref_iterator *)ref_iterator; - return ref_iterator_seek(iter->iter0, prefix); + return ref_iterator_seek(iter->iter0, refname, flags); } static int files_ref_iterator_peel(struct ref_iterator *ref_iterator, @@ -2316,7 +2316,8 @@ static int files_reflog_iterator_advance(struct ref_iterator *ref_iterator) } static int files_reflog_iterator_seek(struct ref_iterator *ref_iterator UNUSED, - const char *prefix UNUSED) + const char *refname UNUSED, + unsigned int flags UNUSED) { BUG("ref_iterator_seek() called for reflog_iterator"); } diff --git a/refs/iterator.c b/refs/iterator.c index 766d96e795c..17ef841d8a3 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -15,10 +15,10 @@ int ref_iterator_advance(struct ref_iterator *ref_iterator) return ref_iterator->vtable->advance(ref_iterator); } -int ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) +int ref_iterator_seek(struct ref_iterator *ref_iterator, const char *refname, + unsigned int flags) { - return ref_iterator->vtable->seek(ref_iterator, prefix); + return ref_iterator->vtable->seek(ref_iterator, refname, flags); } int ref_iterator_peel(struct ref_iterator *ref_iterator, @@ -57,7 +57,8 @@ static int empty_ref_iterator_advance(struct ref_iterator *ref_iterator UNUSED) } static int empty_ref_iterator_seek(struct ref_iterator *ref_iterator UNUSED, - const char *prefix UNUSED) + const char *refname UNUSED, + unsigned int flags UNUSED) { return 0; } @@ -224,7 +225,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator) } static int merge_ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) + const char *refname, unsigned int flags) { struct merge_ref_iterator *iter = (struct merge_ref_iterator *)ref_iterator; @@ -234,11 +235,11 @@ static int merge_ref_iterator_seek(struct ref_iterator *ref_iterator, iter->iter0 = iter->iter0_owned; iter->iter1 = iter->iter1_owned; - ret = ref_iterator_seek(iter->iter0, prefix); + ret = ref_iterator_seek(iter->iter0, refname, flags); if (ret < 0) return ret; - ret = ref_iterator_seek(iter->iter1, prefix); + ret = ref_iterator_seek(iter->iter1, refname, flags); if (ret < 0) return ret; @@ -407,13 +408,16 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) } static int prefix_ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) + const char *refname, unsigned int flags) { struct prefix_ref_iterator *iter = (struct prefix_ref_iterator *)ref_iterator; - free(iter->prefix); - iter->prefix = xstrdup_or_null(prefix); - return ref_iterator_seek(iter->iter0, prefix); + + if (flags & REF_ITERATOR_SEEK_SET_PREFIX) { + free(iter->prefix); + iter->prefix = xstrdup_or_null(refname); + } + return ref_iterator_seek(iter->iter0, refname, flags); } static int prefix_ref_iterator_peel(struct ref_iterator *ref_iterator, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 7fd73a0e6da..5fa4ae6655e 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1004,19 +1004,23 @@ static int packed_ref_iterator_advance(struct ref_iterator *ref_iterator) } static int packed_ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) + const char *refname, unsigned int flags) { struct packed_ref_iterator *iter = (struct packed_ref_iterator *)ref_iterator; const char *start; - if (prefix && *prefix) - start = find_reference_location(iter->snapshot, prefix, 0); + if (refname && *refname) + start = find_reference_location(iter->snapshot, refname, 0); else start = iter->snapshot->start; - free(iter->prefix); - iter->prefix = xstrdup_or_null(prefix); + /* Unset any previously set prefix */ + FREE_AND_NULL(iter->prefix); + + if (flags & REF_ITERATOR_SEEK_SET_PREFIX) + iter->prefix = xstrdup_or_null(refname); + iter->pos = start; iter->eof = iter->snapshot->eof; @@ -1194,7 +1198,8 @@ static struct ref_iterator *packed_ref_iterator_begin( iter->repo = ref_store->repo; iter->flags = flags; - if (packed_ref_iterator_seek(&iter->base, prefix) < 0) { + if (packed_ref_iterator_seek(&iter->base, prefix, + REF_ITERATOR_SEEK_SET_PREFIX) < 0) { ref_iterator_free(&iter->base); return NULL; } diff --git a/refs/ref-cache.c b/refs/ref-cache.c index 8aaffa8c6b8..1d95b56d400 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -434,11 +434,9 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) } } -static int cache_ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) +static int cache_ref_iterator_set_prefix(struct cache_ref_iterator *iter, + const char *prefix) { - struct cache_ref_iterator *iter = - (struct cache_ref_iterator *)ref_iterator; struct cache_ref_iterator_level *level; struct ref_dir *dir; @@ -469,6 +467,82 @@ static int cache_ref_iterator_seek(struct ref_iterator *ref_iterator, return 0; } +static int cache_ref_iterator_seek(struct ref_iterator *ref_iterator, + const char *refname, unsigned int flags) +{ + struct cache_ref_iterator *iter = + (struct cache_ref_iterator *)ref_iterator; + + if (flags & REF_ITERATOR_SEEK_SET_PREFIX) { + return cache_ref_iterator_set_prefix(iter, refname); + } else if (refname && *refname) { + struct cache_ref_iterator_level *level; + const char *slash = refname; + struct ref_dir *dir; + + dir = get_ref_dir(iter->cache->root); + + if (iter->prime_dir) + prime_ref_dir(dir, refname); + + iter->levels_nr = 1; + level = &iter->levels[0]; + level->index = -1; + level->dir = dir; + + /* Unset any previously set prefix */ + FREE_AND_NULL(iter->prefix); + + /* + * Breakdown the provided seek path and assign the correct + * indexing to each level as needed. + */ + do { + int len, idx; + int cmp = 0; + + sort_ref_dir(dir); + + slash = strchr(slash, '/'); + len = slash ? slash - refname : (int)strlen(refname); + + for (idx = 0; idx < dir->nr; idx++) { + cmp = strncmp(refname, dir->entries[idx]->name, len); + if (cmp <= 0) + break; + } + /* don't overflow the index */ + idx = idx >= dir->nr ? dir->nr - 1 : idx; + + if (slash) + slash = slash + 1; + + level->index = idx; + if (dir->entries[idx]->flag & REF_DIR) { + /* push down a level */ + dir = get_ref_dir(dir->entries[idx]); + + ALLOC_GROW(iter->levels, iter->levels_nr + 1, + iter->levels_alloc); + level = &iter->levels[iter->levels_nr++]; + level->dir = dir; + level->index = -1; + } else { + /* reduce the index so the leaf node is iterated over */ + if (cmp <= 0 && !slash) + level->index = idx - 1; + /* + * while the seek path may not be exhausted, our + * match is exhausted at a leaf node. + */ + break; + } + } while (slash); + } + + return 0; +} + static int cache_ref_iterator_peel(struct ref_iterator *ref_iterator, struct object_id *peeled) { @@ -509,7 +583,8 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, iter->cache = cache; iter->prime_dir = prime_dir; - if (cache_ref_iterator_seek(&iter->base, prefix) < 0) { + if (cache_ref_iterator_seek(&iter->base, prefix, + REF_ITERATOR_SEEK_SET_PREFIX) < 0) { ref_iterator_free(&iter->base); return NULL; } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 03f5df04d59..40c1c0f93dc 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -353,11 +353,12 @@ void base_ref_iterator_init(struct ref_iterator *iter, typedef int ref_iterator_advance_fn(struct ref_iterator *ref_iterator); /* - * Seek the iterator to the first reference matching the given prefix. Should - * behave the same as if a new iterator was created with the same prefix. + * Seek the iterator to the first matching reference. If the + * REF_ITERATOR_SEEK_SET_PREFIX flag is set, it would behave the same as if a + * new iterator was created with the provided refname as prefix. */ typedef int ref_iterator_seek_fn(struct ref_iterator *ref_iterator, - const char *prefix); + const char *refname, unsigned int flags); /* * Peels the current ref, returning 0 for success or -1 for failure. diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 4c3817f4ec1..c3d48cc4120 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -719,15 +719,20 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) } static int reftable_ref_iterator_seek(struct ref_iterator *ref_iterator, - const char *prefix) + const char *refname, unsigned int flags) { struct reftable_ref_iterator *iter = (struct reftable_ref_iterator *)ref_iterator; - free(iter->prefix); - iter->prefix = xstrdup_or_null(prefix); - iter->prefix_len = prefix ? strlen(prefix) : 0; - iter->err = reftable_iterator_seek_ref(&iter->iter, prefix); + /* Unset any previously set prefix */ + FREE_AND_NULL(iter->prefix); + iter->prefix_len = 0; + + if (flags & REF_ITERATOR_SEEK_SET_PREFIX) { + iter->prefix = xstrdup_or_null(refname); + iter->prefix_len = refname ? strlen(refname) : 0; + } + iter->err = reftable_iterator_seek_ref(&iter->iter, refname); return iter->err; } @@ -839,7 +844,8 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_ if (ret) goto done; - ret = reftable_ref_iterator_seek(&iter->base, prefix); + ret = reftable_ref_iterator_seek(&iter->base, prefix, + REF_ITERATOR_SEEK_SET_PREFIX); if (ret) goto done; @@ -2042,7 +2048,8 @@ static int reftable_reflog_iterator_advance(struct ref_iterator *ref_iterator) } static int reftable_reflog_iterator_seek(struct ref_iterator *ref_iterator UNUSED, - const char *prefix UNUSED) + const char *refname UNUSED, + unsigned int flags UNUSED) { BUG("reftable reflog iterator cannot be seeked"); return -1; -- GitLab From 526530a16a3c345643afb324107b4a216b8d37ff Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 15 Jul 2025 13:28:29 +0200 Subject: [PATCH 04/11] ref-filter: remove unnecessary else clause In 'ref-filter.c', there is an 'else' clause within `do_filter_refs()`. This is unnecessary since the 'if' clause calls `die()`, which would exit the program. So let's remove the unnecessary 'else' clause. This improves readability since the indentation is also reduced and flow is simpler. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- ref-filter.c | 60 ++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 7a274633cfc..da663c7ac89 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -3199,37 +3199,37 @@ static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref /* Simple per-ref filtering */ if (!filter->kind) die("filter_refs: invalid type"); - else { - /* - * For common cases where we need only branches or remotes or tags, - * we only iterate through those refs. If a mix of refs is needed, - * we iterate over all refs and filter out required refs with the help - * of filter_ref_kind(). - */ - if (filter->kind == FILTER_REFS_BRANCHES) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/heads/", NULL, - fn, cb_data); - else if (filter->kind == FILTER_REFS_REMOTES) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/remotes/", NULL, - fn, cb_data); - else if (filter->kind == FILTER_REFS_TAGS) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/tags/", NULL, fn, - cb_data); - else if (filter->kind & FILTER_REFS_REGULAR) - ret = for_each_fullref_in_pattern(filter, fn, cb_data); - /* - * When printing all ref types, HEAD is already included, - * so we don't want to print HEAD again. - */ - if (!ret && !(filter->kind & FILTER_REFS_ROOT_REFS) && - (filter->kind & FILTER_REFS_DETACHED_HEAD)) - refs_head_ref(get_main_ref_store(the_repository), fn, - cb_data); - } + /* + * For common cases where we need only branches or remotes or tags, + * we only iterate through those refs. If a mix of refs is needed, + * we iterate over all refs and filter out required refs with the help + * of filter_ref_kind(). + */ + if (filter->kind == FILTER_REFS_BRANCHES) + ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), + "refs/heads/", NULL, + fn, cb_data); + else if (filter->kind == FILTER_REFS_REMOTES) + ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), + "refs/remotes/", NULL, + fn, cb_data); + else if (filter->kind == FILTER_REFS_TAGS) + ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), + "refs/tags/", NULL, fn, + cb_data); + else if (filter->kind & FILTER_REFS_REGULAR) + ret = for_each_fullref_in_pattern(filter, fn, cb_data); + + /* + * When printing all ref types, HEAD is already included, + * so we don't want to print HEAD again. + */ + if (!ret && !(filter->kind & FILTER_REFS_ROOT_REFS) && + (filter->kind & FILTER_REFS_DETACHED_HEAD)) + refs_head_ref(get_main_ref_store(the_repository), fn, + cb_data); + clear_contains_cache(&filter->internal.contains_cache); clear_contains_cache(&filter->internal.no_contains_cache); -- GitLab From dabecb9db2b25a32d72c90832f338849b665fdf9 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 15 Jul 2025 13:28:30 +0200 Subject: [PATCH 05/11] for-each-ref: introduce a '--start-after' option The `git-for-each-ref(1)` command is used to iterate over references present in a repository. In large repositories with millions of references, it would be optimal to paginate this output such that we can start iteration from a given reference. This would avoid having to iterate over all references from the beginning each time when paginating through results. The previous commit added 'seek' functionality to the reference backends. Utilize this and expose a '--start-after' option in 'git-for-each-ref(1)'. When used, the reference iteration seeks to the lexicographically next reference and iterates from there onward. This enables efficient pagination workflows, where the calling script can remember the last provided reference and use that as the starting point for the next set of references: git for-each-ref --count=100 git for-each-ref --count=100 --start-after=refs/heads/branch-100 git for-each-ref --count=100 --start-after=refs/heads/branch-200 Since the reference iterators only allow seeking to a specified marker via the `ref_iterator_seek()`, we introduce a helper function `start_ref_iterator_after()`, which seeks to next reference by simply adding (char) 1 to the marker. We must note that pagination always continues from the provided marker, as such any concurrent reference updates lexicographically behind the marker will not be output. Document the same. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- Documentation/git-for-each-ref.adoc | 10 +- builtin/for-each-ref.c | 8 ++ ref-filter.c | 78 ++++++++--- ref-filter.h | 1 + t/t6302-for-each-ref-filter.sh | 194 ++++++++++++++++++++++++++++ 5 files changed, 272 insertions(+), 19 deletions(-) diff --git a/Documentation/git-for-each-ref.adoc b/Documentation/git-for-each-ref.adoc index 5ef89fc0fe3..ae61ba642a6 100644 --- a/Documentation/git-for-each-ref.adoc +++ b/Documentation/git-for-each-ref.adoc @@ -14,7 +14,7 @@ SYNOPSIS [--points-at=] [--merged[=]] [--no-merged[=]] [--contains[=]] [--no-contains[=]] - [--exclude= ...] + [--exclude= ...] [--start-after=] DESCRIPTION ----------- @@ -108,6 +108,14 @@ TAB %(refname)`. --include-root-refs:: List root refs (HEAD and pseudorefs) apart from regular refs. +--start-after=:: + Allows paginating the output by skipping references up to and including the + specified marker. When paging, it should be noted that references may be + deleted, modified or added between invocations. Output will only yield those + references which follow the marker lexicographically. Output begins from the + first reference that would come after the marker alphabetically. Cannot be + used with general pattern matching or custom sort options. + FIELD NAMES ----------- diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index 3d2207ec773..3f21598046a 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -13,6 +13,7 @@ static char const * const for_each_ref_usage[] = { N_("git for-each-ref [--points-at ]"), N_("git for-each-ref [--merged []] [--no-merged []]"), N_("git for-each-ref [--contains []] [--no-contains []]"), + N_("git for-each-ref [--start-after ]"), NULL }; @@ -44,6 +45,7 @@ int cmd_for_each_ref(int argc, OPT_GROUP(""), OPT_INTEGER( 0 , "count", &format.array_opts.max_count, N_("show only matched refs")), OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")), + OPT_STRING( 0 , "start-after", &filter.start_after, N_("start-start"), N_("start iteration after the provided marker")), OPT__COLOR(&format.use_color, N_("respect format colors")), OPT_REF_FILTER_EXCLUDE(&filter), OPT_REF_SORT(&sorting_options), @@ -79,6 +81,9 @@ int cmd_for_each_ref(int argc, if (verify_ref_format(&format)) usage_with_options(for_each_ref_usage, opts); + if (filter.start_after && sorting_options.nr > 1) + die(_("cannot use --start-after with custom sort options")); + sorting = ref_sorting_options(&sorting_options); ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase); filter.ignore_case = icase; @@ -100,6 +105,9 @@ int cmd_for_each_ref(int argc, filter.name_patterns = argv; } + if (filter.start_after && filter.name_patterns && filter.name_patterns[0]) + die(_("cannot use --start-after with patterns")); + if (include_root_refs) flags |= FILTER_REFS_ROOT_REFS | FILTER_REFS_DETACHED_HEAD; diff --git a/ref-filter.c b/ref-filter.c index da663c7ac89..c8a6b7f1aff 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2683,6 +2683,41 @@ static int filter_exclude_match(struct ref_filter *filter, const char *refname) return match_pattern(filter->exclude.v, refname, filter->ignore_case); } +/* + * We need to seek to the reference right after a given marker but excluding any + * matching references. So we seek to the lexicographically next reference. + */ +static int start_ref_iterator_after(struct ref_iterator *iter, const char *marker) +{ + struct strbuf sb = STRBUF_INIT; + int ret; + + strbuf_addstr(&sb, marker); + strbuf_addch(&sb, 1); + + ret = ref_iterator_seek(iter, sb.buf, 0); + + strbuf_release(&sb); + return ret; +} + +static int for_each_fullref_with_seek(struct ref_filter *filter, each_ref_fn cb, + void *cb_data, unsigned int flags) +{ + struct ref_iterator *iter; + int ret = 0; + + iter = refs_ref_iterator_begin(get_main_ref_store(the_repository), "", + NULL, 0, flags); + if (filter->start_after) + ret = start_ref_iterator_after(iter, filter->start_after); + + if (ret) + return ret; + + return do_for_each_ref_iterator(iter, cb, cb_data); +} + /* * This is the same as for_each_fullref_in(), but it tries to iterate * only over the patterns we'll care about. Note that it _doesn't_ do a full @@ -2694,8 +2729,8 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, { if (filter->kind & FILTER_REFS_ROOT_REFS) { /* In this case, we want to print all refs including root refs. */ - return refs_for_each_include_root_refs(get_main_ref_store(the_repository), - cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, + DO_FOR_EACH_INCLUDE_ROOT_REFS); } if (!filter->match_as_path) { @@ -2704,8 +2739,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, * prefixes like "refs/heads/" etc. are stripped off, * so we have to look at everything: */ - return refs_for_each_fullref_in(get_main_ref_store(the_repository), - "", NULL, cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, 0); } if (filter->ignore_case) { @@ -2714,14 +2748,12 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, * so just return everything and let the caller * sort it out. */ - return refs_for_each_fullref_in(get_main_ref_store(the_repository), - "", NULL, cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, 0); } if (!filter->name_patterns[0]) { /* no patterns; we have to look at everything */ - return refs_for_each_fullref_in(get_main_ref_store(the_repository), - "", filter->exclude.v, cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, 0); } return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository), @@ -3189,6 +3221,7 @@ void filter_is_base(struct repository *r, static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data) { + const char *prefix = NULL; int ret = 0; filter->kind = type & FILTER_REFS_KIND_MASK; @@ -3207,19 +3240,28 @@ static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref * of filter_ref_kind(). */ if (filter->kind == FILTER_REFS_BRANCHES) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/heads/", NULL, - fn, cb_data); + prefix = "refs/heads/"; else if (filter->kind == FILTER_REFS_REMOTES) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/remotes/", NULL, - fn, cb_data); + prefix = "refs/remotes/"; else if (filter->kind == FILTER_REFS_TAGS) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/tags/", NULL, fn, - cb_data); - else if (filter->kind & FILTER_REFS_REGULAR) + prefix = "refs/tags/"; + + if (prefix) { + struct ref_iterator *iter; + + iter = refs_ref_iterator_begin(get_main_ref_store(the_repository), + "", NULL, 0, 0); + + if (filter->start_after) + ret = start_ref_iterator_after(iter, filter->start_after); + else if (prefix) + ret = ref_iterator_seek(iter, prefix, 1); + + if (!ret) + ret = do_for_each_ref_iterator(iter, fn, cb_data); + } else if (filter->kind & FILTER_REFS_REGULAR) { ret = for_each_fullref_in_pattern(filter, fn, cb_data); + } /* * When printing all ref types, HEAD is already included, diff --git a/ref-filter.h b/ref-filter.h index c98c4fbd4c1..f22ca94b49d 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -64,6 +64,7 @@ struct ref_array { struct ref_filter { const char **name_patterns; + const char *start_after; struct strvec exclude; struct oid_array points_at; struct commit_list *with_commit; diff --git a/t/t6302-for-each-ref-filter.sh b/t/t6302-for-each-ref-filter.sh index bb02b86c163..e097db6b02f 100755 --- a/t/t6302-for-each-ref-filter.sh +++ b/t/t6302-for-each-ref-filter.sh @@ -541,4 +541,198 @@ test_expect_success 'validate worktree atom' ' test_cmp expect actual ' +test_expect_success 'start after with empty value' ' + cat >expect <<-\EOF && + refs/heads/main + refs/heads/main_worktree + refs/heads/side + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after="" >actual && + test_cmp expect actual +' + +test_expect_success 'start after a specific reference' ' + cat >expect <<-\EOF && + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/spot >actual && + test_cmp expect actual +' + +test_expect_success 'start after a specific reference with partial match' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/sp >actual && + test_cmp expect actual +' + +test_expect_success 'start after, just behind a specific reference' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/parrot >actual && + test_cmp expect actual +' + +test_expect_success 'start after with specific directory match' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd >actual && + test_cmp expect actual +' + +test_expect_success 'start after with specific directory and trailing slash' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/ >actual && + test_cmp expect actual +' + +test_expect_success 'start after, just behind a specific directory' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/lost >actual && + test_cmp expect actual +' + +test_expect_success 'start after, overflow specific reference length' ' + cat >expect <<-\EOF && + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/spotnew >actual && + test_cmp expect actual +' + +test_expect_success 'start after, overflow specific reference path' ' + cat >expect <<-\EOF && + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/spot/new >actual && + test_cmp expect actual +' + +test_expect_success 'start after, last reference' ' + cat >expect <<-\EOF && + EOF + git for-each-ref --format="%(refname)" --start-after=refs/tags/two >actual && + test_cmp expect actual +' + +test_expect_success 'start after used with a pattern' ' + cat >expect <<-\EOF && + fatal: cannot use --start-after with patterns + EOF + test_must_fail git for-each-ref --format="%(refname)" --start-after=refs/odd/spot refs/tags 2>actual && + test_cmp expect actual +' + +test_expect_success 'start after used with custom sort order' ' + cat >expect <<-\EOF && + fatal: cannot use --start-after with custom sort options + EOF + test_must_fail git for-each-ref --format="%(refname)" --start-after=refs/odd/spot --sort=author 2>actual && + test_cmp expect actual +' + test_done -- GitLab From 2cfaecfc67ffbb49660131aef4e2b51d665b502d Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 21 Jul 2025 15:57:10 +0200 Subject: [PATCH 06/11] ref-filter: small cleanups and fixes This series contains a few of the small fixes and comments which I've gathered from reviews of my earlier series [1] to add the '--start-after' flag to 'git-for-each-ref(1)'. Individually each patch doesn't hold too much weight on its own, but together these small improvements add up. That said, if these patches are too small for the noise generated, we could simply drop it or combine some commits together. This is based on top of 3f2a94875d (The twelfth batch, 2025-07-21) with 'kn/for-each-ref-skip' merged in. [1]: https://lore.kernel.org/r/20250701-306-git-for-each-ref-pagination-v1-0-4f0ae7c0688f@gmail.com Signed-off-by: Karthik Nayak --- Changes in v2: - In Patch 2/5, fix a whitespace issue and convert 'git-for-each-ref(1)' documentation to use the new synopsis block. - Link to v1: https://lore.kernel.org/r/20250724-kn-small-cleanups-v1-0-0c70f591de3e@gmail.com --- b4-submit-tracking --- { "series": { "revision": 2, "change-id": "20250721-kn-small-cleanups-a01fe5d2756d", "prefixes": [], "history": { "v1": [ "20250724-kn-small-cleanups-v1-0-0c70f591de3e@gmail.com" ] } } } -- GitLab From cb34545b3a3006ee56973aab7bcfe448af2e2a69 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 21 Jul 2025 17:33:23 +0200 Subject: [PATCH 07/11] ref-cache: use 'size_t' instead of int for length The commit 090eb5336c (refs: selectively set prefix in the seek functions, 2025-07-15) modified the ref-cache iterator to support seeking to a specified marker without setting the prefix. The commit adds and uses an integer 'len' to capture the length of the seek marker to compare with the entries of a given directory. Since the type of the variable is 'int', this is met with a typecast of converting a `strlen` to 'int' so it can be assigned to the 'len' variable. This is whole operation is a bit wrong: 1. Since the 'len' variable is eventually used in a 'strncmp', it should have been of type 'size_t'. 2. This also truncates the value provided from 'strlen' to an int, which could cause a large refname to produce a negative number. Let's do the correct thing here and simply use 'size_t' for `len`. Signed-off-by: Karthik Nayak --- refs/ref-cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/refs/ref-cache.c b/refs/ref-cache.c index 1d95b56d400..8df7ae43e56 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -498,13 +498,14 @@ static int cache_ref_iterator_seek(struct ref_iterator *ref_iterator, * indexing to each level as needed. */ do { - int len, idx; + int idx; + size_t len; int cmp = 0; sort_ref_dir(dir); slash = strchr(slash, '/'); - len = slash ? slash - refname : (int)strlen(refname); + len = slash ? (size_t)(slash - refname) : strlen(refname); for (idx = 0; idx < dir->nr; idx++) { cmp = strncmp(refname, dir->entries[idx]->name, len); -- GitLab From 3fab0eda31f19a8042a5d31821041634e71a6904 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 21 Jul 2025 20:10:37 +0200 Subject: [PATCH 08/11] for-each-ref: fix documentation argument ordering Improve the 'git-for-each-ref(1)' documentation with two corrections: 1. Add parentheses around `--exclude=` to indicate this option can be repeated as a complete unit. 2. Move `--stdin | ...` to the end, after all flags, since `` is a positional argument that should appear last in the argument list. While here, change to using the synopsis block which will automatically format placeholders in italics and keywords in monospace. Signed-off-by: Karthik Nayak --- Documentation/git-for-each-ref.adoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/git-for-each-ref.adoc b/Documentation/git-for-each-ref.adoc index ae61ba642a6..ec3b10e14a2 100644 --- a/Documentation/git-for-each-ref.adoc +++ b/Documentation/git-for-each-ref.adoc @@ -7,14 +7,14 @@ git-for-each-ref - Output information on each ref SYNOPSIS -------- -[verse] -'git for-each-ref' [--count=] [--shell|--perl|--python|--tcl] +[synopsis] +git for-each-ref [--count=] [--shell|--perl|--python|--tcl] [(--sort=)...] [--format=] - [--include-root-refs] [ --stdin | ... ] - [--points-at=] + [--include-root-refs] [--points-at=] [--merged[=]] [--no-merged[=]] [--contains[=]] [--no-contains[=]] - [--exclude= ...] [--start-after=] + [(--exclude=)...] [--start-after=] + [ --stdin | ... ] DESCRIPTION ----------- -- GitLab From 49e5aa69f2a67649e6eb47591bf3a31cc00084b0 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 22 Jul 2025 09:58:27 +0200 Subject: [PATCH 09/11] for-each-ref: reword the documentation for '--start-after' The documentation for '--start-after' states that the flag cannot be used with general pattern matching. This is a bit vague, since there is no clear understanding about what 'general' means here. Rewrite the sentence to be more specific. While here, fix a typo in the 'OPT_STRING'. Helped-by: Junio C Hamano Signed-off-by: Karthik Nayak --- Documentation/git-for-each-ref.adoc | 3 ++- builtin/for-each-ref.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/git-for-each-ref.adoc b/Documentation/git-for-each-ref.adoc index ec3b10e14a2..060940904da 100644 --- a/Documentation/git-for-each-ref.adoc +++ b/Documentation/git-for-each-ref.adoc @@ -114,7 +114,8 @@ TAB %(refname)`. deleted, modified or added between invocations. Output will only yield those references which follow the marker lexicographically. Output begins from the first reference that would come after the marker alphabetically. Cannot be - used with general pattern matching or custom sort options. + used with `--sort=` or `--stdin` options, or the __ argument(s) + to limit the refs. FIELD NAMES ----------- diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index 3f21598046a..79a79212c92 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -45,7 +45,7 @@ int cmd_for_each_ref(int argc, OPT_GROUP(""), OPT_INTEGER( 0 , "count", &format.array_opts.max_count, N_("show only matched refs")), OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")), - OPT_STRING( 0 , "start-after", &filter.start_after, N_("start-start"), N_("start iteration after the provided marker")), + OPT_STRING( 0 , "start-after", &filter.start_after, N_("start-after"), N_("start iteration after the provided marker")), OPT__COLOR(&format.use_color, N_("respect format colors")), OPT_REF_FILTER_EXCLUDE(&filter), OPT_REF_SORT(&sorting_options), -- GitLab From 93babd3e71e0de1601378f9938f650a7d3c1c749 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 22 Jul 2025 09:58:46 +0200 Subject: [PATCH 10/11] t6302: add test combining '--start-after' with '--exclude' The '--start-after' doesn't explicitly mention being compatible with the '--exclude' flag, generally only incompatibility is explicitly called out. However, it would be nice to test the compatibility between the two to avoid future regressions. Let's do that. Signed-off-by: Karthik Nayak --- t/t6302-for-each-ref-filter.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t6302-for-each-ref-filter.sh b/t/t6302-for-each-ref-filter.sh index e097db6b02f..9b80ea1e3b7 100755 --- a/t/t6302-for-each-ref-filter.sh +++ b/t/t6302-for-each-ref-filter.sh @@ -712,6 +712,25 @@ test_expect_success 'start after, overflow specific reference path' ' test_cmp expect actual ' +test_expect_success 'start after, with exclude pattern' ' + cat >expect <<-\EOF && + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/spot \ + --exclude=refs/tags/foo >actual && + test_cmp expect actual +' + test_expect_success 'start after, last reference' ' cat >expect <<-\EOF && EOF -- GitLab From ce90149919358b84779ffb70f29f8d4aa1072abe Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 22 Jul 2025 10:06:09 +0200 Subject: [PATCH 11/11] ref-filter: use REF_ITERATOR_SEEK_SET_PREFIX instead of '1' In the commit 51511d68f4 (for-each-ref: introduce a '--start-after' option, 2025-07-15), for introducing the '--start-after' flag, the `ref_iterator_seek()` was modified to also accept a flag. This was to allow the function to also set the prefix when 'REF_ITERATOR_SEEK_SET_PREFIX' was set. In `do_filter_refs()` instead of passing the flag, we pass in '1' which is the value of the flag. While this works, this is definitely hard to read and introduces inconsistency. Change it to use the flag. While here, remove the unnecessary 'if (prefix)' clause in the 'else' statement, since the block already checks for 'prefix'. Reported-by: Junio C Hamano Signed-off-by: Karthik Nayak --- ref-filter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index d5a146de87f..4edf0df4cc2 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -3254,8 +3254,9 @@ static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref if (filter->start_after) ret = start_ref_iterator_after(iter, filter->start_after); - else if (prefix) - ret = ref_iterator_seek(iter, prefix, 1); + else + ret = ref_iterator_seek(iter, prefix, + REF_ITERATOR_SEEK_SET_PREFIX); if (!ret) ret = do_for_each_ref_iterator(iter, fn, cb_data); -- GitLab