From 7e3c63163d9f4854480226845bdfa5f82e20bf6a Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 14 Jul 2025 11:35:16 +0200 Subject: [PATCH 1/8] refs/reftable: add consistency checks The reference subsystems allows for adding backend specific consistency checks. These checks are run as part of 'git refs verify'. While the files backend has some consistency checks added, the reftable backend currently has none. This series first tightens the reftable backend to make it a little more strict and then also adds the required infrastructure and some simple consistency checks. Since the reftable backend is treated as a library within the Git codebase, we don't want to spillover our internal fsck implementation into the library. At the same time, the fsck checks need to access internal structures of the reftable library which aren't exposed outside the library. So we solve this by adding a 'reftable/fsck.[ch]' which implements and exposes a checker for the reftable library and returns specific errors as defined by the library. We then add glue code within 'refs/reftable-backend.c' to map these errors to errors which Git's fsck implementation would understand. This allows us to separate concerns. We add the following consistency checks: 1. Check for validating the reftable table name. This is treated as a warning since the reftable specification only suggests a table name but doesn't enforce it. Also there is a difference in the table name used in Git vs that in jGit. We tighten the reftable backend by raising a REFTABLE_FORMAT_ERROR error when: 1. The 'tables.list' file doesn't have a trailing newline. --- Changes in v6: - In t/t0614-reftable-fsck.sh, create branches instead of root refs. This worked becuase we don't have reference level checks still implemented for reftables. Let's avoid confusion of a breaking test when we add reference level checks. - Link to v5: https://lore.kernel.org/r/20251006-228-reftable-introduce-consistency-checks-v5-0-f196d386214f@gmail.com Changes in v5: - Added documentation around the return value of 'parse_names()'. - Added a test to validate that 'git refs verify' doesn't barf against a clean working repository with multiple reftable tables. - Link to v4: https://lore.kernel.org/all/20250926-228-reftable-introduce-consistency-checks-v4-0-c96fd8551c0d@gmail.com Changes in v4: - The biggest change is to iterate over the tables in a reftable stack for consistency checks instead of all files inside the REFTABLE_DIR. This avoids all race conditions. Also, since we only check the tables in a stack, it no longer makes sense to check file type. - The discussion about update indices was concluded that tables indices in a stack must be strictly monotonically increasing. While modifying the code to do the same. I realized that we already have this check in 'reftable_addition_add()' where we check while adding a new table to the stack: `wr->min_update_index < add->next_update_index`. So I've dropped this patch from the series. - Change parse_names() to accept the output string array as an argument and return an error instead. This makes the flow a little easier to understand. - Link to v3: https://lore.kernel.org/r/20250918-228-reftable-introduce-consistency-checks-v3-0-271af03eb34d@gmail.com Changes in v3: - I took a long hiatus from this topic, mostly due to other priorities. This has been rebased on top of '92c87bdc40 (The eighth batch, 2025-09-12)' since there were conflicts. - Junio suggested that two of the consistency checks (trailing newlines, sequential update indices for tables in stack) should actually be checked during runtime. I have made that change in this version. - I've cleaned up the code and modularized the 'reftable/fsck.c' code. - Invalid table name emits a warning, since the reftable spec doesn't enforce it but only makes a suggestion. - Broken down the commits to make it easier to review. - Link to v2: https://lore.kernel.org/r/20250902-228-reftable-introduce-consistency-checks-v2-0-4f96b3834779@gmail.com Changes in v2: - Ensured that 'struct reftable_fsck_info' is passed around as a pointer, this provides a smaller footprint (pointer size vs struct size). - Run FSCK checks for other worktrees too, even if one of them fails. - Separate messaging for table name vs table check and add additional test. - Use the relative path in messages used. - Small style and typo fixes. - Link to v1: https://lore.kernel.org/r/20250819-228-reftable-introduce-consistency-checks-v1-0-8b8f6879fa9e@gmail.com --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 6, "change-id": "20250714-228-reftable-introduce-consistency-checks-379ded93c544", "prefixes": [], "history": { "v1": [ "20250819-228-reftable-introduce-consistency-checks-v1-0-8b8f6879fa9e@gmail.com" ], "v2": [ "20250902-228-reftable-introduce-consistency-checks-v2-0-4f96b3834779@gmail.com" ], "v3": [ "20250918-228-reftable-introduce-consistency-checks-v3-0-271af03eb34d@gmail.com" ], "v5": [ "20251006-228-reftable-introduce-consistency-checks-v5-0-f196d386214f@gmail.com" ] } } } -- GitLab From 671a79a3af519ff9cb56a3c91255f08528c8dbf2 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Fri, 8 Aug 2025 15:56:03 +0200 Subject: [PATCH 2/8] refs: remove unused headers In the 'refs/' namespace, some of the included header files are not needed, let's remove them. Signed-off-by: Karthik Nayak --- refs/debug.c | 1 - refs/files-backend.c | 1 - refs/reftable-backend.c | 1 - 3 files changed, 3 deletions(-) diff --git a/refs/debug.c b/refs/debug.c index 1cb955961ee..697adbd0dc3 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -1,7 +1,6 @@ #include "git-compat-util.h" #include "hex.h" #include "refs-internal.h" -#include "string-list.h" #include "trace.h" static struct trace_key trace_refs = TRACE_KEY_INIT(REFS); diff --git a/refs/files-backend.c b/refs/files-backend.c index 1b3bf26add9..d4fb0334176 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -20,7 +20,6 @@ #include "../dir-iterator.h" #include "../lockfile.h" #include "../object.h" -#include "../object-file.h" #include "../path.h" #include "../dir.h" #include "../chdir-notify.h" diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 9e889da2ff4..2152349cb9e 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -11,7 +11,6 @@ #include "../hex.h" #include "../iterator.h" #include "../ident.h" -#include "../lockfile.h" #include "../object.h" #include "../path.h" #include "../refs.h" -- GitLab From dbf9df8d3c0a61f26573dc37c27f30f1421a0417 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 8 Sep 2025 21:11:27 +0200 Subject: [PATCH 3/8] refs: move consistency check msg to generic layer The files-backend prints a message before the consistency checks run. Move this to the generic layer so both the files and reftable backend can benefit from this message. Signed-off-by: Karthik Nayak --- refs.c | 4 ++++ refs/files-backend.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/refs.c b/refs.c index 4ff55cf24f6..4a7c394226e 100644 --- a/refs.c +++ b/refs.c @@ -32,6 +32,7 @@ #include "commit.h" #include "wildmatch.h" #include "ident.h" +#include "fsck.h" /* * List of all available backends @@ -323,6 +324,9 @@ int check_refname_format(const char *refname, int flags) int refs_fsck(struct ref_store *refs, struct fsck_options *o, struct worktree *wt) { + if (o->verbose) + fprintf_ln(stderr, _("Checking references consistency")); + return refs->be->fsck(refs, o, wt); } diff --git a/refs/files-backend.c b/refs/files-backend.c index d4fb0334176..603b1343d8b 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3906,8 +3906,6 @@ static int files_fsck_refs(struct ref_store *ref_store, NULL, }; - if (o->verbose) - fprintf_ln(stderr, _("Checking references consistency")); return files_fsck_refs_dir(ref_store, o, "refs", wt, fsck_refs_fn); } -- GitLab From dbc478dbe65d35a067d536b549747ef21a110bfc Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Wed, 24 Sep 2025 20:53:34 +0200 Subject: [PATCH 4/8] reftable: check for trailing newline in 'tables.list' In the reftable format, the 'tables.list' file contains a newline separated list of tables. While we parse this file, we do not check or care about the last newline. Tighten the parser in `parse_names()` to return an appropriate error if the last newline is missing. This requires modification to `parse_names()` to now return the error while accepting the output as a third argument. Signed-off-by: Karthik Nayak --- reftable/basics.c | 37 +++++++++++++++++++++----------- reftable/basics.h | 7 +++--- reftable/stack.c | 7 +----- t/unit-tests/u-reftable-basics.c | 24 +++++++++++++++++---- 4 files changed, 49 insertions(+), 26 deletions(-) diff --git a/reftable/basics.c b/reftable/basics.c index 9988ebd635e..e969927b615 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -195,44 +195,55 @@ size_t names_length(const char **names) return p - names; } -char **parse_names(char *buf, int size) +int parse_names(char *buf, int size, char ***out) { char **names = NULL; size_t names_cap = 0; size_t names_len = 0; char *p = buf; char *end = buf + size; + int err = 0; while (p < end) { char *next = strchr(p, '\n'); - if (next && next < end) { - *next = 0; + if (!next) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } else if (next < end) { + *next = '\0'; } else { next = end; } + if (p < next) { if (REFTABLE_ALLOC_GROW(names, names_len + 1, - names_cap)) - goto err; + names_cap)) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } names[names_len] = reftable_strdup(p); - if (!names[names_len++]) - goto err; + if (!names[names_len++]) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } } p = next + 1; } - if (REFTABLE_ALLOC_GROW(names, names_len + 1, names_cap)) - goto err; + if (REFTABLE_ALLOC_GROW(names, names_len + 1, names_cap)) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } names[names_len] = NULL; - return names; - -err: + *out = names; + return 0; +done: for (size_t i = 0; i < names_len; i++) reftable_free(names[i]); reftable_free(names); - return NULL; + return err; } int names_equal(const char **a, const char **b) diff --git a/reftable/basics.h b/reftable/basics.h index 7d22f962610..e4b83b2b03f 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -167,10 +167,11 @@ void free_names(char **a); /* * Parse a newline separated list of names. `size` is the length of the buffer, - * without terminating '\0'. Empty names are discarded. Returns a `NULL` - * pointer when allocations fail. + * without terminating '\0'. Empty names are discarded. + * + * Returns 0 on success, a reftable error code on error. */ -char **parse_names(char *buf, int size); +int parse_names(char *buf, int size, char ***out); /* compares two NULL-terminated arrays of strings. */ int names_equal(const char **a, const char **b); diff --git a/reftable/stack.c b/reftable/stack.c index f91ce50bcdd..65d89820bd0 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -109,12 +109,7 @@ static int fd_read_lines(int fd, char ***namesp) } buf[size] = 0; - *namesp = parse_names(buf, size); - if (!*namesp) { - err = REFTABLE_OUT_OF_MEMORY_ERROR; - goto done; - } - + err = parse_names(buf, size, namesp); done: reftable_free(buf); return err; diff --git a/t/unit-tests/u-reftable-basics.c b/t/unit-tests/u-reftable-basics.c index a0471083e73..73566ed0eb7 100644 --- a/t/unit-tests/u-reftable-basics.c +++ b/t/unit-tests/u-reftable-basics.c @@ -9,6 +9,7 @@ license that can be found in the LICENSE file or at #include "unit-test.h" #include "lib-reftable.h" #include "reftable/basics.h" +#include "reftable/reftable-error.h" struct integer_needle_lesseq_args { int needle; @@ -79,14 +80,18 @@ void test_reftable_basics__names_equal(void) void test_reftable_basics__parse_names(void) { char in1[] = "line\n"; - char in2[] = "a\nb\nc"; - char **out = parse_names(in1, strlen(in1)); + char in2[] = "a\nb\nc\n"; + char **out = NULL; + int err = parse_names(in1, strlen(in1), &out); + cl_assert(err == 0); cl_assert(out != NULL); cl_assert_equal_s(out[0], "line"); cl_assert(!out[1]); free_names(out); - out = parse_names(in2, strlen(in2)); + out = NULL; + err = parse_names(in2, strlen(in2), &out); + cl_assert(err == 0); cl_assert(out != NULL); cl_assert_equal_s(out[0], "a"); cl_assert_equal_s(out[1], "b"); @@ -95,10 +100,21 @@ void test_reftable_basics__parse_names(void) free_names(out); } +void test_reftable_basics__parse_names_missing_newline(void) +{ + char in1[] = "line\nline2"; + char **out = NULL; + int err = parse_names(in1, strlen(in1), &out); + cl_assert(err == REFTABLE_FORMAT_ERROR); + cl_assert(out == NULL); +} + void test_reftable_basics__parse_names_drop_empty_string(void) { char in[] = "a\n\nb\n"; - char **out = parse_names(in, strlen(in)); + char **out = NULL; + int err = parse_names(in, strlen(in), &out); + cl_assert(err == 0); cl_assert(out != NULL); cl_assert_equal_s(out[0], "a"); /* simply '\n' should be dropped as empty string */ -- GitLab From 062d66f7ed9e6df71a251dcb747c83f7743ee78f Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 8 Sep 2025 14:54:02 +0200 Subject: [PATCH 5/8] Documentation/fsck-msgids: remove duplicate msg id The `gitmodulesLarge` is repeated twice. Remove the second duplicate. Signed-off-by: Karthik Nayak --- Documentation/fsck-msgids.adoc | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/fsck-msgids.adoc b/Documentation/fsck-msgids.adoc index 0ba4f9a27e4..1c912615f99 100644 --- a/Documentation/fsck-msgids.adoc +++ b/Documentation/fsck-msgids.adoc @@ -104,9 +104,6 @@ `gitmodulesParse`:: (INFO) Could not parse `.gitmodules` blob. -`gitmodulesLarge`; - (ERROR) `.gitmodules` blob is too large to parse. - `gitmodulesPath`:: (ERROR) `.gitmodules` path is invalid. -- GitLab From a70974a39c997fc29f288cf3666ac9a95bbe2760 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 8 Sep 2025 14:57:11 +0200 Subject: [PATCH 6/8] fsck: order 'fsck_msg_type' alphabetically The list of 'fsck_msg_type' seem to be alphabetically ordered, but there are a few small misses. Fix this by sorting the sub-sections of the list to maintain alphabetical ordering. Signed-off-by: Karthik Nayak --- fsck.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fsck.h b/fsck.h index dd7df3d5b36..6b0db235e02 100644 --- a/fsck.h +++ b/fsck.h @@ -33,15 +33,27 @@ enum fsck_msg_type { FUNC(BAD_PACKED_REF_ENTRY, ERROR) \ FUNC(BAD_PACKED_REF_HEADER, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \ + FUNC(BAD_REFERENT_NAME, ERROR) \ FUNC(BAD_REF_CONTENT, ERROR) \ FUNC(BAD_REF_FILETYPE, ERROR) \ FUNC(BAD_REF_NAME, ERROR) \ - FUNC(BAD_REFERENT_NAME, ERROR) \ FUNC(BAD_TIMEZONE, ERROR) \ FUNC(BAD_TREE, ERROR) \ FUNC(BAD_TREE_SHA1, ERROR) \ FUNC(BAD_TYPE, ERROR) \ FUNC(DUPLICATE_ENTRIES, ERROR) \ + FUNC(GITATTRIBUTES_BLOB, ERROR) \ + FUNC(GITATTRIBUTES_LARGE, ERROR) \ + FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \ + FUNC(GITATTRIBUTES_MISSING, ERROR) \ + FUNC(GITMODULES_BLOB, ERROR) \ + FUNC(GITMODULES_LARGE, ERROR) \ + FUNC(GITMODULES_MISSING, ERROR) \ + FUNC(GITMODULES_NAME, ERROR) \ + FUNC(GITMODULES_PATH, ERROR) \ + FUNC(GITMODULES_SYMLINK, ERROR) \ + FUNC(GITMODULES_UPDATE, ERROR) \ + FUNC(GITMODULES_URL, ERROR) \ FUNC(MISSING_AUTHOR, ERROR) \ FUNC(MISSING_COMMITTER, ERROR) \ FUNC(MISSING_EMAIL, ERROR) \ @@ -60,39 +72,27 @@ enum fsck_msg_type { FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ - FUNC(GITMODULES_MISSING, ERROR) \ - FUNC(GITMODULES_BLOB, ERROR) \ - FUNC(GITMODULES_LARGE, ERROR) \ - FUNC(GITMODULES_NAME, ERROR) \ - FUNC(GITMODULES_SYMLINK, ERROR) \ - FUNC(GITMODULES_URL, ERROR) \ - FUNC(GITMODULES_PATH, ERROR) \ - FUNC(GITMODULES_UPDATE, ERROR) \ - FUNC(GITATTRIBUTES_MISSING, ERROR) \ - FUNC(GITATTRIBUTES_LARGE, ERROR) \ - FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \ - FUNC(GITATTRIBUTES_BLOB, ERROR) \ /* warnings */ \ FUNC(EMPTY_NAME, WARN) \ FUNC(FULL_PATHNAME, WARN) \ FUNC(HAS_DOT, WARN) \ FUNC(HAS_DOTDOT, WARN) \ FUNC(HAS_DOTGIT, WARN) \ + FUNC(LARGE_PATHNAME, WARN) \ FUNC(NULL_SHA1, WARN) \ - FUNC(ZERO_PADDED_FILEMODE, WARN) \ FUNC(NUL_IN_COMMIT, WARN) \ - FUNC(LARGE_PATHNAME, WARN) \ + FUNC(ZERO_PADDED_FILEMODE, WARN) \ /* infos (reported as warnings, but ignored by default) */ \ FUNC(BAD_FILEMODE, INFO) \ + FUNC(BAD_TAG_NAME, INFO) \ FUNC(EMPTY_PACKED_REFS_FILE, INFO) \ - FUNC(GITMODULES_PARSE, INFO) \ - FUNC(GITIGNORE_SYMLINK, INFO) \ FUNC(GITATTRIBUTES_SYMLINK, INFO) \ + FUNC(GITIGNORE_SYMLINK, INFO) \ + FUNC(GITMODULES_PARSE, INFO) \ FUNC(MAILMAP_SYMLINK, INFO) \ - FUNC(BAD_TAG_NAME, INFO) \ FUNC(MISSING_TAGGER_ENTRY, INFO) \ - FUNC(SYMLINK_REF, INFO) \ FUNC(REF_MISSING_NEWLINE, INFO) \ + FUNC(SYMLINK_REF, INFO) \ FUNC(SYMREF_TARGET_IS_NOT_A_REF, INFO) \ FUNC(TRAILING_REF_CONTENT, INFO) \ /* ignored (elevated when requested) */ \ -- GitLab From a1dea4335e4d6e4237066434df9d984049259c0c Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Fri, 8 Aug 2025 15:56:03 +0200 Subject: [PATCH 7/8] reftable: add code to facilitate consistency checks The `git refs verify` command is used to run consistency checks on the reference backends. This command is also invoked when users run 'git fsck'. While the files-backend has some fsck checks added, the reftable backend lacks such checks. Let's add the required infrastructure and a check to test for the files present in the reftable directory. Since the reftable library is treated as an independent library we should ensure that the library code works independently without knowledge about Git's internals. To do this, add both 'reftable/fsck.c' and 'reftable/reftable-fsck.h'. Which provide an entry point 'reftable_fsck_check' for running fsck checks over a provided reftable stack. The callee provides the function with callbacks to handle issue and information reporting. The added check, goes over all tables in the reftable stack validates that they have a valid name. It not, it raises an error. While here, move 'reftable/error.o' in the Makefile to retain lexicographic ordering. Signed-off-by: Karthik Nayak --- Makefile | 3 +- meson.build | 1 + reftable/fsck.c | 100 +++++++++++++++++++++++++++++++++++++++ reftable/reftable-fsck.h | 40 ++++++++++++++++ 4 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 reftable/fsck.c create mode 100644 reftable/reftable-fsck.h diff --git a/Makefile b/Makefile index 4c95affadb5..03fbaf2b21a 100644 --- a/Makefile +++ b/Makefile @@ -2732,9 +2732,10 @@ XDIFF_OBJS += xdiff/xutils.o xdiff-objs: $(XDIFF_OBJS) REFTABLE_OBJS += reftable/basics.o -REFTABLE_OBJS += reftable/error.o REFTABLE_OBJS += reftable/block.o REFTABLE_OBJS += reftable/blocksource.o +REFTABLE_OBJS += reftable/error.o +REFTABLE_OBJS += reftable/fsck.o REFTABLE_OBJS += reftable/iter.o REFTABLE_OBJS += reftable/merged.o REFTABLE_OBJS += reftable/pq.o diff --git a/meson.build b/meson.build index b3dfcc04972..89142529105 100644 --- a/meson.build +++ b/meson.build @@ -452,6 +452,7 @@ libgit_sources = [ 'reftable/error.c', 'reftable/block.c', 'reftable/blocksource.c', + 'reftable/fsck.c', 'reftable/iter.c', 'reftable/merged.c', 'reftable/pq.c', diff --git a/reftable/fsck.c b/reftable/fsck.c new file mode 100644 index 00000000000..26b9115b14a --- /dev/null +++ b/reftable/fsck.c @@ -0,0 +1,100 @@ +#include "basics.h" +#include "reftable-fsck.h" +#include "reftable-table.h" +#include "stack.h" + +static bool table_has_valid_name(const char *name) +{ + const char *ptr = name; + char *endptr; + + /* strtoull doesn't set errno on success */ + errno = 0; + + strtoull(ptr, &endptr, 16); + if (errno) + return false; + ptr = endptr; + + if (*ptr != '-') + return false; + ptr++; + + strtoull(ptr, &endptr, 16); + if (errno) + return false; + ptr = endptr; + + if (*ptr != '-') + return false; + ptr++; + + strtoul(ptr, &endptr, 16); + if (errno) + return false; + ptr = endptr; + + if (strcmp(ptr, ".ref") && strcmp(ptr, ".log")) + return false; + + return true; +} + +typedef int (*table_check_fn)(struct reftable_table *table, + reftable_fsck_report_fn report_fn, + void *cb_data); + +static int table_check_name(struct reftable_table *table, + reftable_fsck_report_fn report_fn, + void *cb_data) +{ + if (!table_has_valid_name(table->name)) { + struct reftable_fsck_info info; + + info.error = REFTABLE_FSCK_ERROR_TABLE_NAME; + info.msg = "invalid reftable table name"; + info.path = table->name; + + return report_fn(&info, cb_data); + } + + return 0; +} + +static int table_checks(struct reftable_table *table, + reftable_fsck_report_fn report_fn, + reftable_fsck_verbose_fn verbose_fn UNUSED, + void *cb_data) +{ + table_check_fn table_check_fns[] = { + table_check_name, + NULL, + }; + int err = 0; + + for (size_t i = 0; table_check_fns[i]; i++) + err |= table_check_fns[i](table, report_fn, cb_data); + + return err; +} + +int reftable_fsck_check(struct reftable_stack *stack, + reftable_fsck_report_fn report_fn, + reftable_fsck_verbose_fn verbose_fn, + void *cb_data) +{ + struct reftable_buf msg = REFTABLE_BUF_INIT; + int err = 0; + + for (size_t i = 0; i < stack->tables_len; i++) { + reftable_buf_reset(&msg); + reftable_buf_addstr(&msg, "Checking table: "); + reftable_buf_addstr(&msg, stack->tables[i]->name); + verbose_fn(msg.buf, cb_data); + + err |= table_checks(stack->tables[i], report_fn, verbose_fn, cb_data); + } + + reftable_buf_release(&msg); + return err; +} diff --git a/reftable/reftable-fsck.h b/reftable/reftable-fsck.h new file mode 100644 index 00000000000..007a392cf90 --- /dev/null +++ b/reftable/reftable-fsck.h @@ -0,0 +1,40 @@ +#ifndef REFTABLE_FSCK_H +#define REFTABLE_FSCK_H + +#include "reftable-stack.h" + +enum reftable_fsck_error { + /* Invalid table name */ + REFTABLE_FSCK_ERROR_TABLE_NAME = 0, + /* Used for bounds checking, must be last */ + REFTABLE_FSCK_MAX_VALUE, +}; + +/* Represents an individual error encountered during the FSCK checks. */ +struct reftable_fsck_info { + enum reftable_fsck_error error; + const char *msg; + const char *path; +}; + +typedef int reftable_fsck_report_fn(struct reftable_fsck_info *info, + void *cb_data); +typedef void reftable_fsck_verbose_fn(const char *msg, void *cb_data); + +/* + * Given a reftable stack, perform consistency checks on the stack. + * + * If an issue is encountered, the issue is reported to the callee via the + * provided 'report_fn'. If the issue is non-recoverable the flow will not + * continue. If it is recoverable, the flow will continue and further issues + * will be reported as identified. + * + * The 'verbose_fn' will be invoked to provide verbose information about + * the progress and state of the consistency checks. + */ +int reftable_fsck_check(struct reftable_stack *stack, + reftable_fsck_report_fn report_fn, + reftable_fsck_verbose_fn verbose_fn, + void *cb_data); + +#endif /* REFTABLE_FSCK_H */ -- GitLab From dcd172827b3f0f9915a5409c224c06e238d0ff64 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Wed, 17 Sep 2025 11:01:04 +0200 Subject: [PATCH 8/8] refs/reftable: add fsck check for checking the table name Add glue code in 'refs/reftable-backend.c' which calls the reftable library to perform the fsck checks. Here we also map the reftable errors to Git' fsck errors. Introduce a check to validate table names for a given reftable stack. Also add 'badReftableTableName' as a corresponding error within Git. The reftable specification mentions: It suggested to use ${min_update_index}-${max_update_index}-${random}.ref as a naming convention. So treat non-conformant file names as warnings. While adding the fsck header to 'refs/reftable-backend.c', modify the list to maintain lexicographical ordering. Signed-off-by: Karthik Nayak --- Documentation/fsck-msgids.adoc | 3 ++ fsck.h | 1 + refs/reftable-backend.c | 57 ++++++++++++++++++++++++++++++--- t/meson.build | 1 + t/t0614-reftable-fsck.sh | 58 ++++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 5 deletions(-) create mode 100755 t/t0614-reftable-fsck.sh diff --git a/Documentation/fsck-msgids.adoc b/Documentation/fsck-msgids.adoc index 1c912615f99..81f11ba125a 100644 --- a/Documentation/fsck-msgids.adoc +++ b/Documentation/fsck-msgids.adoc @@ -38,6 +38,9 @@ `badReferentName`:: (ERROR) The referent name of a symref is invalid. +`badReftableTableName`:: + (WARN) A reftable table has an invalid name. + `badTagName`:: (INFO) A tag has an invalid format. diff --git a/fsck.h b/fsck.h index 6b0db235e02..759df976556 100644 --- a/fsck.h +++ b/fsck.h @@ -73,6 +73,7 @@ enum fsck_msg_type { FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ /* warnings */ \ + FUNC(BAD_REFTABLE_TABLE_NAME, WARN) \ FUNC(EMPTY_NAME, WARN) \ FUNC(FULL_PATHNAME, WARN) \ FUNC(HAS_DOT, WARN) \ diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 2152349cb9e..b106fd8b53c 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -6,6 +6,7 @@ #include "../config.h" #include "../dir.h" #include "../environment.h" +#include "../fsck.h" #include "../gettext.h" #include "../hash.h" #include "../hex.h" @@ -15,10 +16,11 @@ #include "../path.h" #include "../refs.h" #include "../reftable/reftable-basics.h" -#include "../reftable/reftable-stack.h" -#include "../reftable/reftable-record.h" #include "../reftable/reftable-error.h" +#include "../reftable/reftable-fsck.h" #include "../reftable/reftable-iterator.h" +#include "../reftable/reftable-record.h" +#include "../reftable/reftable-stack.h" #include "../repo-settings.h" #include "../setup.h" #include "../strmap.h" @@ -2707,11 +2709,56 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, return ret; } -static int reftable_be_fsck(struct ref_store *ref_store UNUSED, - struct fsck_options *o UNUSED, +static void reftable_fsck_verbose_handler(const char *msg, void *cb_data) +{ + struct fsck_options *o = cb_data; + + if (o->verbose) + fprintf_ln(stderr, "%s", msg); +} + +static const enum fsck_msg_id fsck_msg_id_map[] = { + [REFTABLE_FSCK_ERROR_TABLE_NAME] = FSCK_MSG_BAD_REFTABLE_TABLE_NAME, +}; + +static int reftable_fsck_error_handler(struct reftable_fsck_info *info, + void *cb_data) +{ + struct fsck_ref_report report = { .path = info->path }; + struct fsck_options *o = cb_data; + enum fsck_msg_id msg_id; + + if (info->error < 0 || info->error >= REFTABLE_FSCK_MAX_VALUE) + BUG("unknown fsck error: %d", (int)info->error); + + msg_id = fsck_msg_id_map[info->error]; + + if (!msg_id) + BUG("fsck_msg_id value missing for reftable error: %d", (int)info->error); + + return fsck_report_ref(o, &report, msg_id, "%s", info->msg); +} + +static int reftable_be_fsck(struct ref_store *ref_store, struct fsck_options *o, struct worktree *wt UNUSED) { - return 0; + struct reftable_ref_store *refs; + struct strmap_entry *entry; + struct hashmap_iter iter; + int ret = 0; + + refs = reftable_be_downcast(ref_store, REF_STORE_READ, "fsck"); + + ret |= reftable_fsck_check(refs->main_backend.stack, reftable_fsck_error_handler, + reftable_fsck_verbose_handler, o); + + strmap_for_each_entry(&refs->worktree_backends, &iter, entry) { + struct reftable_backend *b = (struct reftable_backend *)entry->value; + ret |= reftable_fsck_check(b->stack, reftable_fsck_error_handler, + reftable_fsck_verbose_handler, o); + } + + return ret; } struct ref_storage_be refs_be_reftable = { diff --git a/t/meson.build b/t/meson.build index 7974795fe48..ec1fc0b2a1b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -146,6 +146,7 @@ integration_tests = [ 't0611-reftable-httpd.sh', 't0612-reftable-jgit-compatibility.sh', 't0613-reftable-write-options.sh', + 't0614-reftable-fsck.sh', 't1000-read-tree-m-3way.sh', 't1001-read-tree-m-2way.sh', 't1002-read-tree-m-u-2way.sh', diff --git a/t/t0614-reftable-fsck.sh b/t/t0614-reftable-fsck.sh new file mode 100755 index 00000000000..85cc47d67e1 --- /dev/null +++ b/t/t0614-reftable-fsck.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +test_description='Test reftable backend consistency check' + +GIT_TEST_DEFAULT_REF_FORMAT=reftable +export GIT_TEST_DEFAULT_REF_FORMAT + +. ./test-lib.sh + +test_expect_success "no errors reported on a well formed repository" ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m initial && + + for i in $(test_seq 20) + do + git update-ref refs/heads/branch-$i HEAD || return 1 + done && + + # The repository should end up with multiple tables. + test_line_count ">" 1 .git/reftable/tables.list && + + git refs verify 2>err && + test_must_be_empty err + ) +' + +for TABLE_NAME in "foo-bar-e4d12d59.ref" \ + "0x00000000zzzz-0x00000000zzzz-e4d12d59.ref" \ + "0x000000000001-0x000000000002-e4d12d59.abc" \ + "0x000000000001-0x000000000002-e4d12d59.refabc"; do + test_expect_success "table name $TABLE_NAME should be checked" ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m initial && + + git refs verify 2>err && + test_must_be_empty err && + + EXISTING_TABLE=$(head -n1 .git/reftable/tables.list) && + mv ".git/reftable/$EXISTING_TABLE" ".git/reftable/$TABLE_NAME" && + sed "s/${EXISTING_TABLE}/${TABLE_NAME}/g" .git/reftable/tables.list > tables.list && + mv tables.list .git/reftable/tables.list && + + git refs verify 2>err && + cat >expect <<-EOF && + warning: ${TABLE_NAME}: badReftableTableName: invalid reftable table name + EOF + test_cmp expect err + ) + ' +done + +test_done -- GitLab