From 98518304c5761ba04cefb6d73c5698db7e46d1c2 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:57 -0500 Subject: [PATCH 01/10] bulk-checkin: introduce object database transaction structure Object database transaction state is stored across several global variables in the bulk-checkin subsystem. Consolidate this state into a single `struct odb_transaction` global. In a subsequent commit, the transactional interfaces will be updated to wire this structure instead of relying on a global variable. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index b2809ab0398..82a73da79e8 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -19,11 +19,7 @@ #include "object-file.h" #include "odb.h" -static int odb_transaction_nesting; - -static struct tmp_objdir *bulk_fsync_objdir; - -static struct bulk_checkin_packfile { +struct bulk_checkin_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -32,7 +28,13 @@ static struct bulk_checkin_packfile { struct pack_idx_entry **written; uint32_t alloc_written; uint32_t nr_written; -} bulk_checkin_packfile; +}; + +static struct odb_transaction { + int nesting; + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +} transaction; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -101,7 +103,7 @@ static void flush_batch_fsync(void) struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; - if (!bulk_fsync_objdir) + if (!transaction.objdir) return; /* @@ -123,8 +125,8 @@ static void flush_batch_fsync(void) * Make the object files visible in the primary ODB after their data is * fully durable. */ - tmp_objdir_migrate(bulk_fsync_objdir); - bulk_fsync_objdir = NULL; + tmp_objdir_migrate(transaction.objdir); + transaction.objdir = NULL; } static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) @@ -331,12 +333,12 @@ void prepare_loose_object_bulk_checkin(void) * callers may not know whether any objects will be * added at the time they call begin_odb_transaction. */ - if (!odb_transaction_nesting || bulk_fsync_objdir) + if (!transaction.nesting || transaction.objdir) return; - bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (bulk_fsync_objdir) - tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); + transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + if (transaction.objdir) + tmp_objdir_replace_primary_odb(transaction.objdir, 0); } void fsync_loose_object_bulk_checkin(int fd, const char *filename) @@ -348,7 +350,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) * before renaming the objects to their final names as part of * flush_batch_fsync. */ - if (!bulk_fsync_objdir || + if (!transaction.objdir || git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { if (errno == ENOSYS) warning(_("core.fsyncMethod = batch is unsupported on this platform")); @@ -360,31 +362,31 @@ int index_blob_bulk_checkin(struct object_id *oid, int fd, size_t size, const char *path, unsigned flags) { - int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size, + int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size, path, flags); - if (!odb_transaction_nesting) - flush_bulk_checkin_packfile(&bulk_checkin_packfile); + if (!transaction.nesting) + flush_bulk_checkin_packfile(&transaction.packfile); return status; } void begin_odb_transaction(void) { - odb_transaction_nesting += 1; + transaction.nesting += 1; } void flush_odb_transaction(void) { flush_batch_fsync(); - flush_bulk_checkin_packfile(&bulk_checkin_packfile); + flush_bulk_checkin_packfile(&transaction.packfile); } void end_odb_transaction(void) { - odb_transaction_nesting -= 1; - if (odb_transaction_nesting < 0) + transaction.nesting -= 1; + if (transaction.nesting < 0) BUG("Unbalanced ODB transaction nesting"); - if (odb_transaction_nesting) + if (transaction.nesting) return; flush_odb_transaction(); -- GitLab From b3361447256bb92a1dbdda910a33cfb1d6fc8f88 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:58 -0500 Subject: [PATCH 02/10] bulk-checkin: remove global transaction state Object database transactions in the bulk-checkin subsystem rely on global state to track transaction status. Stop relying on global state and instead store the transaction in the `struct object_database`. Functions that operate on transactions are updated to now wire transaction state. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/add.c | 5 ++- builtin/unpack-objects.c | 5 ++- builtin/update-index.c | 7 ++-- bulk-checkin.c | 82 ++++++++++++++++++++++++++-------------- bulk-checkin.h | 18 +++++---- cache-tree.c | 5 ++- object-file.c | 11 +++--- odb.h | 8 ++++ read-cache.c | 5 ++- 9 files changed, 94 insertions(+), 52 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 0235854f809..740c7c45817 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -389,6 +389,7 @@ int cmd_add(int argc, char *seen = NULL; char *ps_matched = NULL; struct lock_file lock_file = LOCK_INIT; + struct odb_transaction *transaction; repo_config(repo, add_config, NULL); @@ -574,7 +575,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -593,7 +594,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(); + end_odb_transaction(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 7ae7c82b6c0..28124b324d2 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -584,6 +584,7 @@ static void unpack_all(void) { int i; unsigned char *hdr = fill(sizeof(struct pack_header)); + struct odb_transaction *transaction; if (get_be32(hdr) != PACK_SIGNATURE) die("bad pack file"); @@ -599,12 +600,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(); + end_odb_transaction(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2380f3ccd68..2ba2d29c959 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -77,7 +77,7 @@ static void report(const char *fmt, ...) * objects invisible while a transaction is active, so flush the * transaction here before reporting a change made by update-index. */ - flush_odb_transaction(); + flush_odb_transaction(the_repository->objects->transaction); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -940,6 +940,7 @@ int cmd_update_index(int argc, strbuf_getline_fn getline_fn; int parseopt_state = PARSE_OPT_UNKNOWN; struct repository *r = the_repository; + struct odb_transaction *transaction; struct option options[] = { OPT_BIT('q', NULL, &refresh_args.flags, N_("continue refresh even when index needs update"), @@ -1130,7 +1131,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1213,7 +1214,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(); + end_odb_transaction(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/bulk-checkin.c b/bulk-checkin.c index 82a73da79e8..53a20a2d92f 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -30,11 +30,13 @@ struct bulk_checkin_packfile { uint32_t nr_written; }; -static struct odb_transaction { +struct odb_transaction { + struct object_database *odb; + int nesting; struct tmp_objdir *objdir; struct bulk_checkin_packfile packfile; -} transaction; +}; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -98,12 +100,12 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(void) +static void flush_batch_fsync(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; - if (!transaction.objdir) + if (!transaction->objdir) return; /* @@ -125,8 +127,8 @@ static void flush_batch_fsync(void) * Make the object files visible in the primary ODB after their data is * fully durable. */ - tmp_objdir_migrate(transaction.objdir); - transaction.objdir = NULL; + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; } static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) @@ -325,7 +327,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -void prepare_loose_object_bulk_checkin(void) +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -333,15 +335,16 @@ void prepare_loose_object_bulk_checkin(void) * callers may not know whether any objects will be * added at the time they call begin_odb_transaction. */ - if (!transaction.nesting || transaction.objdir) + if (!transaction || transaction->objdir) return; - transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (transaction.objdir) - tmp_objdir_replace_primary_odb(transaction.objdir, 0); + transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -void fsync_loose_object_bulk_checkin(int fd, const char *filename) +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) { /* * If we have an active ODB transaction, we issue a call that @@ -350,7 +353,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) * before renaming the objects to their final names as part of * flush_batch_fsync. */ - if (!transaction.objdir || + if (!transaction || !transaction->objdir || git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { if (errno == ENOSYS) warning(_("core.fsyncMethod = batch is unsupported on this platform")); @@ -358,36 +361,57 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) } } -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *oid, int fd, size_t size, const char *path, unsigned flags) { - int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size, - path, flags); - if (!transaction.nesting) - flush_bulk_checkin_packfile(&transaction.packfile); + int status; + + if (transaction) { + status = deflate_blob_to_pack(&transaction->packfile, oid, fd, + size, path, flags); + } else { + struct bulk_checkin_packfile state = { 0 }; + + status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); + flush_bulk_checkin_packfile(&state); + } + return status; } -void begin_odb_transaction(void) +struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - transaction.nesting += 1; + if (!odb->transaction) { + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + } + + odb->transaction->nesting += 1; + + return odb->transaction; } -void flush_odb_transaction(void) +void flush_odb_transaction(struct odb_transaction *transaction) { - flush_batch_fsync(); - flush_bulk_checkin_packfile(&transaction.packfile); + if (!transaction) + return; + + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(&transaction->packfile); } -void end_odb_transaction(void) +void end_odb_transaction(struct odb_transaction *transaction) { - transaction.nesting -= 1; - if (transaction.nesting < 0) + if (!transaction || transaction->nesting == 0) BUG("Unbalanced ODB transaction nesting"); - if (transaction.nesting) + transaction->nesting -= 1; + + if (transaction->nesting) return; - flush_odb_transaction(); + flush_odb_transaction(transaction); + transaction->odb->transaction = NULL; + free(transaction); } diff --git a/bulk-checkin.h b/bulk-checkin.h index 7246ea58dcf..16254ce6a70 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -5,9 +5,13 @@ #define BULK_CHECKIN_H #include "object.h" +#include "odb.h" -void prepare_loose_object_bulk_checkin(void); -void fsync_loose_object_bulk_checkin(int fd, const char *filename); +struct odb_transaction; + +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename); /* * This creates one packfile per large blob unless bulk-checkin @@ -24,8 +28,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename); * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *oid, int fd, size_t size, const char *path, unsigned flags); /* @@ -35,20 +39,20 @@ int index_blob_bulk_checkin(struct object_id *oid, * and objects are only visible after the outermost transaction * is complete or the transaction is flushed. */ -void begin_odb_transaction(void); +struct odb_transaction *begin_odb_transaction(struct object_database *odb); /* * Make any objects that are currently part of a pending object * database transaction visible. It is valid to call this function * even if no transaction is active. */ -void flush_odb_transaction(void); +void flush_odb_transaction(struct odb_transaction *transaction); /* * Tell the object database to make any objects from the * current transaction visible if this is the final nested * transaction. */ -void end_odb_transaction(void); +void end_odb_transaction(struct odb_transaction *transaction); #endif diff --git a/cache-tree.c b/cache-tree.c index 66ef2becbe0..d225554eedd 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it, int cache_tree_update(struct index_state *istate, int flags) { + struct odb_transaction *transaction; int skip, i; i = verify_cache(istate, flags); @@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(); + end_odb_transaction(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 2bc36ab3ee8..1740aa2b2e3 100644 --- a/object-file.c +++ b/object-file.c @@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(fd, filename); + fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1263,8 +1263,9 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); else - ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path, - flags); + ret = index_blob_bulk_checkin(the_repository->objects->transaction, + oid, fd, xsize_t(st->st_size), + path, flags); close(fd); return ret; } diff --git a/odb.h b/odb.h index 3dfc66d75a3..a89b2143909 100644 --- a/odb.h +++ b/odb.h @@ -84,6 +84,7 @@ struct odb_source { struct packed_git; struct cached_object_entry; +struct odb_transaction; /* * The object database encapsulates access to objects in a repository. It @@ -94,6 +95,13 @@ struct object_database { /* Repository that owns this database. */ struct repository *repo; + /* + * State of current current object database transaction. Only one + * transaction may be pending at a time. Is NULL when no transaction is + * configured. + */ + struct odb_transaction *transaction; + /* * Set of all object directories; the main directory is first (and * cannot be NULL after initialization). Subsequent directories are diff --git a/read-cache.c b/read-cache.c index 06ad74db228..229b8ef11c9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix, const struct pathspec *pathspec, char *ps_matched, int include_sparse, int flags) { + struct odb_transaction *transaction; struct update_callback_data data; struct rev_info rev; @@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(); + end_odb_transaction(transaction); release_revisions(&rev); return !!data.add_errors; -- GitLab From aa4d81b53311fcdf099400beebad99c14be4b561 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:59 -0500 Subject: [PATCH 03/10] bulk-checkin: require transaction for index_blob_bulk_checkin() The bulk-checkin subsystem provides a mechanism to write blobs directly to a packfile via `index_blob_bulk_checkin()`. If there is an ongoing transaction when invoked, objects written via this function are stored in the same packfile. The packfile is not flushed until the transaction itself is flushed. If there is no transaction, the single object is written to a packfile and immediately flushed. This complicates `index_blob_bulk_checkin()` as it cannot reliably use the provided transaction to get the associated repository. Update `index_blob_bulk_checkin()` to assume that a valid transaction is always provided. Callers are now expected to ensure a transaction is set up beforehand. With this simplification, `deflate_blob_bulk_checkin()` is no longer needed as a standalone internal function and is combined with `index_blob_bulk_checkin()`. The single call site in `object-file.c:index_fd()` is updated accordingly. Due to how `{begin,end}_odb_transaction()` handles nested transactions, a new transaction is only created and committed if there is not already an ongoing transaction. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 27 ++++----------------------- bulk-checkin.h | 7 +++++-- object-file.c | 21 ++++++++++++++------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 53a20a2d92f..542d8125a86 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -254,11 +254,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state, die_errno("unable to write pack header"); } -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, - struct object_id *result_oid, - int fd, size_t size, - const char *path, unsigned flags) +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -361,25 +361,6 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, } } -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags) -{ - int status; - - if (transaction) { - status = deflate_blob_to_pack(&transaction->packfile, oid, fd, - size, path, flags); - } else { - struct bulk_checkin_packfile state = { 0 }; - - status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); - flush_bulk_checkin_packfile(&state); - } - - return status; -} - struct odb_transaction *begin_odb_transaction(struct object_database *odb) { if (!odb->transaction) { diff --git a/bulk-checkin.h b/bulk-checkin.h index 16254ce6a70..ac8887f476b 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -14,8 +14,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, int fd, const char *filename); /* - * This creates one packfile per large blob unless bulk-checkin - * machinery is "plugged". + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. * * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting diff --git a/object-file.c b/object-file.c index 1740aa2b2e3..bc15af42450 100644 --- a/object-file.c +++ b/object-file.c @@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid, * Call xsize_t() only when needed to avoid potentially unnecessary * die() for large files. */ - if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { ret = index_stream_convert_blob(istate, oid, fd, path, flags); - else if (!S_ISREG(st->st_mode)) + } else if (!S_ISREG(st->st_mode)) { ret = index_pipe(istate, oid, fd, type, path, flags); - else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || - type != OBJ_BLOB || - (path && would_convert_to_git(istate, path))) + } else if ((st->st_size >= 0 && + (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || + type != OBJ_BLOB || + (path && would_convert_to_git(istate, path))) { ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); - else - ret = index_blob_bulk_checkin(the_repository->objects->transaction, + } else { + struct odb_transaction *transaction; + + transaction = begin_odb_transaction(the_repository->objects); + ret = index_blob_bulk_checkin(transaction, oid, fd, xsize_t(st->st_size), path, flags); + end_odb_transaction(transaction); + } + close(fd); return ret; } -- GitLab From ddc0b56ad77d7c86145a6a1774f05f9d11bf2337 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:35:00 -0500 Subject: [PATCH 04/10] bulk-checkin: use repository variable from transaction The bulk-checkin subsystem depends on `the_repository`. Adapt functions and call sites to access the repository through `struct odb_transaction` instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the `pack_compression_level` and `pack_size_limit_cfg` globals are still used. Also adapt functions using packfile state to instead access it through the transaction. This makes some function parameters redundant and go away. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 67 +++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 542d8125a86..124c4930676 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -38,25 +38,26 @@ struct odb_transaction { struct bulk_checkin_packfile packfile; }; -static void finish_tmp_packfile(struct strbuf *basename, - const char *pack_tmp_name, - struct pack_idx_entry **written_list, - uint32_t nr_written, - struct pack_idx_option *pack_idx_opts, +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, unsigned char hash[]) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; char *idx_tmp_name = NULL; - stage_tmp_packfiles(the_repository, basename, pack_tmp_name, - written_list, nr_written, NULL, pack_idx_opts, hash, - &idx_tmp_name); - rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name); + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); free(idx_tmp_name); } -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; @@ -73,17 +74,17 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); } else { int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name, + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, state->nr_written, hash, state->offset); close(fd); } - strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository), - hash_to_hex(hash)); - finish_tmp_packfile(&packname, state->pack_tmp_name, - state->written, state->nr_written, - &state->pack_idx_opts, hash); + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); @@ -94,7 +95,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); + reprepare_packed_git(repo); } /* @@ -117,7 +118,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction) * to ensure that the data in each new object file is durable before * the final name is visible. */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository)); + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); temp = xmks_tempfile(temp_path.buf); fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); delete_tempfile(&temp); @@ -131,16 +133,17 @@ static void flush_batch_fsync(struct odb_transaction *transaction) transaction->objdir = NULL; } -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) { /* The object may already exist in the repository */ - if (odb_has_object(the_repository->objects, oid, + if (odb_has_object(transaction->odb, oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 1; /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < state->nr_written; i++) - if (oideq(&state->written[i]->oid, oid)) + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) return 1; /* This is a new object we need to keep */ @@ -239,13 +242,15 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct bulk_checkin_packfile *state, +static void prepare_to_stream(struct odb_transaction *transaction, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; - state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name); + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); reset_pack_idx_option(&state->pack_idx_opts); /* Pretend we are going to write only one object */ @@ -272,21 +277,21 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); - the_hash_algo->init_fn(&ctx); + transaction->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); /* Note: idx is non-NULL when we are writing */ if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -304,7 +309,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(state); + flush_bulk_checkin_packfile(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) return error("cannot seek back"); } @@ -313,7 +318,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, return 0; idx->crc32 = crc32_end(state->f); - if (already_written(state, result_oid)) { + if (already_written(transaction, result_oid)) { hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; free(idx); @@ -338,7 +343,7 @@ void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) if (!transaction || transaction->objdir) return; - transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); if (transaction->objdir) tmp_objdir_replace_primary_odb(transaction->objdir, 0); } @@ -379,7 +384,7 @@ void flush_odb_transaction(struct odb_transaction *transaction) return; flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(&transaction->packfile); + flush_bulk_checkin_packfile(transaction); } void end_odb_transaction(struct odb_transaction *transaction) -- GitLab From 9fe2ab419880b9e48ccef1f82109a2bb924decdd Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 8 Sep 2025 10:44:56 -0500 Subject: [PATCH 05/10] bulk-checkin: remove ODB transaction nesting ODB transactions support being nested. Only the outermost {begin,end}_odb_transaction() start and finish a transaction. This allows internal object write codepaths to be optimized with ODB transactions without worrying about whether a transaction is already active. When {begin,end}_odb_transaction() is invoked during an active transaction, these operations are essentially treated as no-ops. This can make the interface a bit awkward to use, as calling end_odb_transaction() does not guarantee that a transaction is actually ended. Thus, in situations where a transaction needs to be explicitly flushed, flush_odb_transaction() must be used. To remove the need for an explicit transaction flush operation via flush_odb_transaction() and better clarify transaction semantics, drop the transaction nesting mechanism in favor of begin_odb_transaction() returning a NULL transaction value to signal it was a no-op, and end_odb_transaction() behaving as a no-op when a NULL transaction value is passed. This is safe for existing callers as the transaction value wired to end_odb_transaction() already comes from begin_odb_transaction() and thus continues the same no-op behavior when a transaction is already pending. With this model, passing a pending transaction to end_odb_transaction() ensures it is committed at that point in time. Signed-off-by: Justin Tobler --- bulk-checkin.c | 22 ++++++++++------------ bulk-checkin.h | 8 +++----- object-file.c | 2 +- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 124c4930676..eb6ef704c3c 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -33,7 +33,6 @@ struct bulk_checkin_packfile { struct odb_transaction { struct object_database *odb; - int nesting; struct tmp_objdir *objdir; struct bulk_checkin_packfile packfile; }; @@ -368,12 +367,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - if (!odb->transaction) { - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - } + if (odb->transaction) + return NULL; - odb->transaction->nesting += 1; + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; return odb->transaction; } @@ -389,14 +387,14 @@ void flush_odb_transaction(struct odb_transaction *transaction) void end_odb_transaction(struct odb_transaction *transaction) { - if (!transaction || transaction->nesting == 0) - BUG("Unbalanced ODB transaction nesting"); - - transaction->nesting -= 1; - - if (transaction->nesting) + if (!transaction) return; + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + flush_odb_transaction(transaction); transaction->odb->transaction = NULL; free(transaction); diff --git a/bulk-checkin.h b/bulk-checkin.h index ac8887f476b..51d0ac6134e 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -38,9 +38,8 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, /* * Tell the object database to optimize for adding * multiple objects. end_odb_transaction must be called - * to make new objects visible. Transactions can be nested, - * and objects are only visible after the outermost transaction - * is complete or the transaction is flushed. + * to make new objects visible. If a transaction is already + * pending, NULL is returned. */ struct odb_transaction *begin_odb_transaction(struct object_database *odb); @@ -53,8 +52,7 @@ void flush_odb_transaction(struct odb_transaction *transaction); /* * Tell the object database to make any objects from the - * current transaction visible if this is the final nested - * transaction. + * current transaction visible. */ void end_odb_transaction(struct odb_transaction *transaction); diff --git a/object-file.c b/object-file.c index bc15af42450..5e765735495 100644 --- a/object-file.c +++ b/object-file.c @@ -1267,7 +1267,7 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(transaction, + ret = index_blob_bulk_checkin(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); end_odb_transaction(transaction); -- GitLab From f24c28ec56e361c5143e4bb6c72d1435fef5ad7e Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 8 Sep 2025 14:49:35 -0500 Subject: [PATCH 06/10] builtin/update-index: end ODB transaction when --verbose is specified With 23a3a303 (update-index: use the bulk-checkin infrastructure, 2022-04-04), object database transactions were added to git-update-index(1) to facilitate writing objects in bulk. With transactions, newly added objects are instead written to a temporary object directory and migrated to the primary object database upon transaction commit. When the --verbose option is specified, the subsequent set of objects written are explicitly flushed via flush_odb_transaction() prior to reporting the update. Flushing the object database transaction migrates pending objects to the primary object database without marking the transaction as complete. This is done so objects are immediately visible to git-update-index(1) callers using the --verbose option and that rely on parsing verbose output to know when objects are written. Due to how git-update-index(1) parses arguments, options that come after a filename are not considered during the object update. Therefore, it may not be known ahead of time whether the --verbose option is present and thus object writes are considered transactional by default until a --verbose option is parsed. Flushing a transaction after individual object writes negates the benefit of writing objects to a transaction in the first place. Furthermore, the mechanism to flush a transaction without actually committing is rather awkward. Drop the call to flush_odb_transaction() in favor of ending the transaction altogether when the --verbose flag is encountered. Subsequent object writes occur outside of a transaction and are therefore immediately visible which matches the current behavior. Signed-off-by: Justin Tobler --- builtin/update-index.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2ba2d29c959..d36bc557521 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -70,14 +70,6 @@ static void report(const char *fmt, ...) if (!verbose) return; - /* - * It is possible, though unlikely, that a caller could use the verbose - * output to synchronize with addition of objects to the object - * database. The current implementation of ODB transactions leaves - * objects invisible while a transaction is active, so flush the - * transaction here before reporting a change made by update-index. - */ - flush_odb_transaction(the_repository->objects->transaction); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -1150,6 +1142,21 @@ int cmd_update_index(int argc, const char *path = ctx.argv[0]; char *p; + /* + * It is possible, though unlikely, that a caller could + * use the verbose output to synchronize with addition + * of objects to the object database. The current + * implementation of ODB transactions leaves objects + * invisible while a transaction is active, so end the + * transaction here early before processing the next + * update. All further updates are performed outside of + * a transaction. + */ + if (transaction && verbose) { + end_odb_transaction(transaction); + transaction = NULL; + } + setup_work_tree(); p = prefix_path(prefix, prefix_length, path); update_one(p); -- GitLab From 6268b2934aa79166810c0d5cfc8f8e4de7fe0582 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 8 Sep 2025 15:02:32 -0500 Subject: [PATCH 07/10] bulk-checkin: drop flush_odb_transaction() Object database transactions can be explicitly flushed via flush_odb_transaction() without actually completing the transaction. This makes the provided transactional interface a bit awkward. Now that there are no longer any flush_odb_transaction() call sites, drop the function to simplify the interface and further ensure that a transaction is only finalized when end_odb_transaction() is invoked. Signed-off-by: Justin Tobler --- bulk-checkin.c | 12 ++---------- bulk-checkin.h | 7 ------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index eb6ef704c3c..5de848deffe 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -376,15 +376,6 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb) return odb->transaction; } -void flush_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); -} - void end_odb_transaction(struct odb_transaction *transaction) { if (!transaction) @@ -395,7 +386,8 @@ void end_odb_transaction(struct odb_transaction *transaction) */ ASSERT(transaction == transaction->odb->transaction); - flush_odb_transaction(transaction); + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); transaction->odb->transaction = NULL; free(transaction); } diff --git a/bulk-checkin.h b/bulk-checkin.h index 51d0ac6134e..eea728f0d41 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -43,13 +43,6 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, */ struct odb_transaction *begin_odb_transaction(struct object_database *odb); -/* - * Make any objects that are currently part of a pending object - * database transaction visible. It is valid to call this function - * even if no transaction is active. - */ -void flush_odb_transaction(struct odb_transaction *transaction); - /* * Tell the object database to make any objects from the * current transaction visible. -- GitLab From e5f1d9379777a21e9b3cdcb7d32bcd33de7ec51f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 8 Sep 2025 15:58:00 -0500 Subject: [PATCH 08/10] object-file: relocate ODB transaction code The bulk-checkin subsystem provides various functions to manage ODB transactions. Apart from {begin,end}_odb_transaction(), these functions are only used by the object-file subsystem to manage aspects of a transaction implementation specific to the files object source. Relocate all the transaction code in bulk-checkin to object-file. This simplifies the exposed transaction interface by reducing it to only {begin,end}_odb_transaction(). Function and type names are adjusted in the subsequent commit to better fit the new location. Signed-off-by: Justin Tobler --- Makefile | 1 - builtin/add.c | 2 +- builtin/unpack-objects.c | 1 - builtin/update-index.c | 1 - bulk-checkin.c | 393 -------------------------------------- bulk-checkin.h | 52 ------ cache-tree.c | 1 - meson.build | 1 - object-file.c | 394 ++++++++++++++++++++++++++++++++++++++- object-file.h | 16 ++ read-cache.c | 1 - 11 files changed, 410 insertions(+), 453 deletions(-) delete mode 100644 bulk-checkin.c delete mode 100644 bulk-checkin.h diff --git a/Makefile b/Makefile index 4c95affadb5..d25d4255f8a 100644 --- a/Makefile +++ b/Makefile @@ -974,7 +974,6 @@ LIB_OBJS += blame.o LIB_OBJS += blob.o LIB_OBJS += bloom.o LIB_OBJS += branch.o -LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle-uri.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o diff --git a/builtin/add.c b/builtin/add.c index 740c7c45817..8294366d68a 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -14,13 +14,13 @@ #include "gettext.h" #include "pathspec.h" #include "run-command.h" +#include "object-file.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" #include "diff.h" #include "read-cache.h" #include "revision.h" -#include "bulk-checkin.h" #include "strvec.h" #include "submodule.h" #include "add-interactive.h" diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 28124b324d2..4596fff0dad 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -2,7 +2,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/builtin/update-index.c b/builtin/update-index.c index d36bc557521..ee01c4e423d 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/bulk-checkin.c b/bulk-checkin.c deleted file mode 100644 index 5de848deffe..00000000000 --- a/bulk-checkin.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "bulk-checkin.h" -#include "environment.h" -#include "gettext.h" -#include "hex.h" -#include "lockfile.h" -#include "repository.h" -#include "csum-file.h" -#include "pack.h" -#include "strbuf.h" -#include "tmp-objdir.h" -#include "packfile.h" -#include "object-file.h" -#include "odb.h" - -struct bulk_checkin_packfile { - char *pack_tmp_name; - struct hashfile *f; - off_t offset; - struct pack_idx_option pack_idx_opts; - - struct pack_idx_entry **written; - uint32_t alloc_written; - uint32_t nr_written; -}; - -struct odb_transaction { - struct object_database *odb; - - struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; -}; - -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - unsigned char hash[GIT_MAX_RAWSZ]; - struct strbuf packname = STRBUF_INIT; - - if (!state->f) - return; - - if (state->nr_written == 0) { - close(state->f->fd); - free_hashfile(state->f); - unlink(state->pack_tmp_name); - goto clear_exit; - } else if (state->nr_written == 1) { - finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, - CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); - } else { - int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, - state->nr_written, hash, - state->offset); - close(fd); - } - - strbuf_addf(&packname, "%s/pack/pack-%s.", - repo_get_object_directory(transaction->odb->repo), - hash_to_hex_algop(hash, repo->hash_algo)); - - finish_tmp_packfile(transaction, &packname, hash); - for (uint32_t i = 0; i < state->nr_written; i++) - free(state->written[i]); - -clear_exit: - free(state->pack_tmp_name); - free(state->written); - memset(state, 0, sizeof(*state)); - - strbuf_release(&packname); - /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(repo); -} - -/* - * Cleanup after batch-mode fsync_object_files. - */ -static void flush_batch_fsync(struct odb_transaction *transaction) -{ - struct strbuf temp_path = STRBUF_INIT; - struct tempfile *temp; - - if (!transaction->objdir) - return; - - /* - * Issue a full hardware flush against a temporary file to ensure - * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout - * request, but it has not flushed any writeback cache in the storage - * hardware or any filesystem logs. This fsync call acts as a barrier - * to ensure that the data in each new object file is durable before - * the final name is visible. - */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", - repo_get_object_directory(transaction->odb->repo)); - temp = xmks_tempfile(temp_path.buf); - fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); - delete_tempfile(&temp); - strbuf_release(&temp_path); - - /* - * Make the object files visible in the primary ODB after their data is - * fully durable. - */ - tmp_objdir_migrate(transaction->objdir); - transaction->objdir = NULL; -} - -static int already_written(struct odb_transaction *transaction, - struct object_id *oid) -{ - /* The object may already exist in the repository */ - if (odb_has_object(transaction->odb, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - return 1; - - /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) - if (oideq(&transaction->packfile.written[i]->oid, oid)) - return 1; - - /* This is a new object we need to keep */ - return 0; -} - -/* - * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. - */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) -{ - git_zstream s; - unsigned char ibuf[16384]; - unsigned char obuf[16384]; - unsigned hdrlen; - int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); - off_t offset = 0; - - git_deflate_init(&s, pack_compression_level); - - hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); - s.next_out = obuf + hdrlen; - s.avail_out = sizeof(obuf) - hdrlen; - - while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } - s.next_in = ibuf; - s.avail_in = rsize; - size -= rsize; - } - - status = git_deflate(&s, size ? 0 : Z_FINISH); - - if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; - } - s.next_out = obuf; - s.avail_out = sizeof(obuf); - } - - switch (status) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - continue; - default: - die("unexpected deflate failure: %d", status); - } - } - git_deflate_end(&s); - return 0; -} - -/* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - if (!(flags & INDEX_WRITE_OBJECT) || state->f) - return; - - state->f = create_tmp_packfile(transaction->odb->repo, - &state->pack_tmp_name); - reset_pack_idx_option(&state->pack_idx_opts); - - /* Pretend we are going to write only one object */ - state->offset = write_pack_header(state->f, 1); - if (!state->offset) - die_errno("unable to write pack header"); -} - -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; - struct git_hash_ctx ctx; - unsigned char obuf[16384]; - unsigned header_len; - struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; - - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t) -1) - return error("cannot find the current offset"); - - header_len = format_object_header((char *)obuf, sizeof(obuf), - OBJ_BLOB, size); - transaction->odb->repo->hash_algo->init_fn(&ctx); - git_hash_update(&ctx, obuf, header_len); - - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_to_stream(transaction, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } - - already_hashed_to = 0; - - while (1) { - prepare_to_stream(transaction, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - if (!idx) - BUG("should not happen"); - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) - return error("cannot seek back"); - } - git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; - - idx->crc32 = crc32_end(state->f); - if (already_written(transaction, result_oid)) { - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - free(idx); - } else { - oidcpy(&idx->oid, result_oid); - ALLOC_GROW(state->written, - state->nr_written + 1, - state->alloc_written); - state->written[state->nr_written++] = idx; - } - return 0; -} - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) -{ - /* - * We lazily create the temporary object directory - * the first time an object might be added, since - * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. - */ - if (!transaction || transaction->objdir) - return; - - transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); - if (transaction->objdir) - tmp_objdir_replace_primary_odb(transaction->objdir, 0); -} - -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename) -{ - /* - * If we have an active ODB transaction, we issue a call that - * cleans the filesystem page cache but avoids a hardware flush - * command. Later on we will issue a single hardware flush - * before renaming the objects to their final names as part of - * flush_batch_fsync. - */ - if (!transaction || !transaction->objdir || - git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { - if (errno == ENOSYS) - warning(_("core.fsyncMethod = batch is unsupported on this platform")); - fsync_or_die(fd, filename); - } -} - -struct odb_transaction *begin_odb_transaction(struct object_database *odb) -{ - if (odb->transaction) - return NULL; - - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - - return odb->transaction; -} - -void end_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - /* - * Ensure the transaction ending matches the pending transaction. - */ - ASSERT(transaction == transaction->odb->transaction); - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); - transaction->odb->transaction = NULL; - free(transaction); -} diff --git a/bulk-checkin.h b/bulk-checkin.h deleted file mode 100644 index eea728f0d41..00000000000 --- a/bulk-checkin.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef BULK_CHECKIN_H -#define BULK_CHECKIN_H - -#include "object.h" -#include "odb.h" - -struct odb_transaction; - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename); - -/* - * This writes the specified object to a packfile. Objects written here - * during the same transaction are written to the same packfile. The - * packfile is not flushed until the transaction is flushed. The caller - * is expected to ensure a valid transaction is setup for objects to be - * recorded to. - * - * This also bypasses the usual "convert-to-git" dance, and that is on - * purpose. We could write a streaming version of the converting - * functions and insert that before feeding the data to fast-import - * (or equivalent in-core API described above). However, that is - * somewhat complicated, as we do not know the size of the filter - * result, which we need to know beforehand when writing a git object. - * Since the primary motivation for trying to stream from the working - * tree file and to avoid mmaping it in core is to deal with large - * binary blobs, they generally do not want to get any conversion, and - * callers should avoid this code path when filters are requested. - */ -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags); - -/* - * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called - * to make new objects visible. If a transaction is already - * pending, NULL is returned. - */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); - -/* - * Tell the object database to make any objects from the - * current transaction visible. - */ -void end_odb_transaction(struct odb_transaction *transaction); - -#endif diff --git a/cache-tree.c b/cache-tree.c index d225554eedd..79ddf6b7278 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -8,7 +8,6 @@ #include "tree.h" #include "tree-walk.h" #include "cache-tree.h" -#include "bulk-checkin.h" #include "object-file.h" #include "odb.h" #include "read-cache-ll.h" diff --git a/meson.build b/meson.build index b3dfcc04972..fccb6d2eeca 100644 --- a/meson.build +++ b/meson.build @@ -287,7 +287,6 @@ libgit_sources = [ 'blob.c', 'bloom.c', 'branch.c', - 'bulk-checkin.c', 'bundle-uri.c', 'bundle.c', 'cache-tree.c', diff --git a/object-file.c b/object-file.c index 5e765735495..03f9931b832 100644 --- a/object-file.c +++ b/object-file.c @@ -10,7 +10,6 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" -#include "bulk-checkin.h" #include "convert.h" #include "dir.h" #include "environment.h" @@ -28,6 +27,8 @@ #include "read-cache-ll.h" #include "setup.h" #include "streaming.h" +#include "tempfile.h" +#include "tmp-objdir.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -666,6 +667,93 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } +struct bulk_checkin_packfile { + char *pack_tmp_name; + struct hashfile *f; + off_t offset; + struct pack_idx_option pack_idx_opts; + + struct pack_idx_entry **written; + uint32_t alloc_written; + uint32_t nr_written; +}; + +struct odb_transaction { + struct object_database *odb; + + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +}; + +static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +{ + /* + * We lazily create the temporary object directory + * the first time an object might be added, since + * callers may not know whether any objects will be + * added at the time they call begin_odb_transaction. + */ + if (!transaction || transaction->objdir) + return; + + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); +} + +static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) +{ + /* + * If we have an active ODB transaction, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before renaming the objects to their final names as part of + * flush_batch_fsync. + */ + if (!transaction || !transaction->objdir || + git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { + if (errno == ENOSYS) + warning(_("core.fsyncMethod = batch is unsupported on this platform")); + fsync_or_die(fd, filename); + } +} + +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void flush_batch_fsync(struct odb_transaction *transaction) +{ + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + if (!transaction->objdir) + return; + + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_bulk_checkin has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware or any filesystem logs. This fsync call acts as a barrier + * to ensure that the data in each new object file is durable before + * the final name is visible. + */ + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + + /* + * Make the object files visible in the primary ODB after their data is + * fully durable. + */ + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; +} + /* Finalize a file on disk, and close it. */ static void close_loose_object(struct odb_source *source, int fd, const char *filename) @@ -1243,6 +1331,283 @@ static int index_core(struct index_state *istate, return ret; } +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) +{ + /* The object may already exist in the repository */ + if (odb_has_object(transaction->odb, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + return 1; + + /* Might want to keep the list sorted */ + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) + return 1; + + /* This is a new object we need to keep */ + return 0; +} + +/* Lazily create backing packfile for the state */ +static void prepare_to_stream(struct odb_transaction *transaction, + unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + if (!(flags & INDEX_WRITE_OBJECT) || state->f) + return; + + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); + reset_pack_idx_option(&state->pack_idx_opts); + + /* Pretend we are going to write only one object */ + state->offset = write_pack_header(state->f, 1); + if (!state->offset) + die_errno("unable to write pack header"); +} + +/* + * Read the contents from fd for size bytes, streaming it to the + * packfile in state while updating the hash in ctx. Signal a failure + * by returning a negative value when the resulting pack would exceed + * the pack size limit and this is not the first object in the pack, + * so that the caller can discard what we wrote from the current pack + * by truncating it and opening a new one. The caller will then call + * us again after rewinding the input fd. + * + * The already_hashed_to pointer is kept untouched by the caller to + * make sure we do not hash the same byte when we are called + * again. This way, the caller does not have to checkpoint its hash + * status before calling us just in case we ask it to call us again + * with a new pack. + */ +static int stream_blob_to_pack(struct bulk_checkin_packfile *state, + struct git_hash_ctx *ctx, off_t *already_hashed_to, + int fd, size_t size, const char *path, + unsigned flags) +{ + git_zstream s; + unsigned char ibuf[16384]; + unsigned char obuf[16384]; + unsigned hdrlen; + int status = Z_OK; + int write_object = (flags & INDEX_WRITE_OBJECT); + off_t offset = 0; + + git_deflate_init(&s, pack_compression_level); + + hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); + s.next_out = obuf + hdrlen; + s.avail_out = sizeof(obuf) - hdrlen; + + while (status != Z_STREAM_END) { + if (size && !s.avail_in) { + size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); + ssize_t read_result = read_in_full(fd, ibuf, rsize); + if (read_result < 0) + die_errno("failed to read from '%s'", path); + if ((size_t)read_result != rsize) + die("failed to read %u bytes from '%s'", + (unsigned)rsize, path); + offset += rsize; + if (*already_hashed_to < offset) { + size_t hsize = offset - *already_hashed_to; + if (rsize < hsize) + hsize = rsize; + if (hsize) + git_hash_update(ctx, ibuf, hsize); + *already_hashed_to = offset; + } + s.next_in = ibuf; + s.avail_in = rsize; + size -= rsize; + } + + status = git_deflate(&s, size ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + if (write_object) { + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; + } + + hashwrite(state->f, obuf, written); + state->offset += written; + } + s.next_out = obuf; + s.avail_out = sizeof(obuf); + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + git_deflate_end(&s); + return 0; +} + +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, + unsigned char hash[]) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + char *idx_tmp_name = NULL; + + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); + + free(idx_tmp_name); +} + +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + unsigned char hash[GIT_MAX_RAWSZ]; + struct strbuf packname = STRBUF_INIT; + + if (!state->f) + return; + + if (state->nr_written == 0) { + close(state->f->fd); + free_hashfile(state->f); + unlink(state->pack_tmp_name); + goto clear_exit; + } else if (state->nr_written == 1) { + finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + } else { + int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, + state->nr_written, hash, + state->offset); + close(fd); + } + + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); + for (uint32_t i = 0; i < state->nr_written; i++) + free(state->written[i]); + +clear_exit: + free(state->pack_tmp_name); + free(state->written); + memset(state, 0, sizeof(*state)); + + strbuf_release(&packname); + /* Make objects we just wrote available to ourselves */ + reprepare_packed_git(repo); +} + +/* + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. + */ +static int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + off_t seekback, already_hashed_to; + struct git_hash_ctx ctx; + unsigned char obuf[16384]; + unsigned header_len; + struct hashfile_checkpoint checkpoint; + struct pack_idx_entry *idx = NULL; + + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t)-1) + return error("cannot find the current offset"); + + header_len = format_object_header((char *)obuf, sizeof(obuf), + OBJ_BLOB, size); + transaction->odb->repo->hash_algo->init_fn(&ctx); + git_hash_update(&ctx, obuf, header_len); + + /* Note: idx is non-NULL when we are writing */ + if ((flags & INDEX_WRITE_OBJECT) != 0) { + CALLOC_ARRAY(idx, 1); + + prepare_to_stream(transaction, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + + already_hashed_to = 0; + + while (1) { + prepare_to_stream(transaction, flags); + if (idx) { + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + } + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, + fd, size, path, flags)) + break; + /* + * Writing this object to the current pack will make + * it too big; we need to truncate it, start a new + * pack, and write into it. + */ + if (!idx) + BUG("should not happen"); + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + flush_bulk_checkin_packfile(transaction); + if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) + return error("cannot seek back"); + } + git_hash_final_oid(result_oid, &ctx); + if (!idx) + return 0; + + idx->crc32 = crc32_end(state->f); + if (already_written(transaction, result_oid)) { + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + } else { + oidcpy(&idx->oid, result_oid); + ALLOC_GROW(state->written, + state->nr_written + 1, + state->alloc_written); + state->written[state->nr_written++] = idx; + } + return 0; +} + int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) @@ -1609,3 +1974,30 @@ int read_loose_object(struct repository *repo, munmap(map, mapsize); return ret; } + +struct odb_transaction *begin_odb_transaction(struct object_database *odb) +{ + if (odb->transaction) + return NULL; + + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + + return odb->transaction; +} + +void end_odb_transaction(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); + transaction->odb->transaction = NULL; + free(transaction); +} diff --git a/object-file.h b/object-file.h index 15d97630d3b..6323d2e63c0 100644 --- a/object-file.h +++ b/object-file.h @@ -218,4 +218,20 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); +struct odb_transaction; + +/* + * Tell the object database to optimize for adding + * multiple objects. end_odb_transaction must be called + * to make new objects visible. If a transaction is already + * pending, NULL is returned. + */ +struct odb_transaction *begin_odb_transaction(struct object_database *odb); + +/* + * Tell the object database to make any objects from the + * current transaction visible. + */ +void end_odb_transaction(struct odb_transaction *transaction); + #endif /* OBJECT_FILE_H */ diff --git a/read-cache.c b/read-cache.c index 229b8ef11c9..80591eecedc 100644 --- a/read-cache.c +++ b/read-cache.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "bulk-checkin.h" #include "config.h" #include "date.h" #include "diff.h" -- GitLab From 76eabfb6a193b8b70d00f520875f0788dee8ec8e Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 8 Sep 2025 16:38:53 -0500 Subject: [PATCH 09/10] object-file: update naming from bulk-checkin Update the names of several functions and types relocated from the bulk-checkin subsystem for better clarity. Also drop finish_tmp_packfile() as a standalone function in favor of embedding it in flush_packfile_transaction() directly. Signed-off-by: Justin Tobler --- object-file.c | 80 +++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/object-file.c b/object-file.c index 03f9931b832..8103a2bf413 100644 --- a/object-file.c +++ b/object-file.c @@ -667,7 +667,7 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } -struct bulk_checkin_packfile { +struct transaction_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -682,10 +682,10 @@ struct odb_transaction { struct object_database *odb; struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; + struct transaction_packfile packfile; }; -static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +static void prepare_loose_object_transaction(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -701,7 +701,7 @@ static void prepare_loose_object_bulk_checkin(struct odb_transaction *transactio tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, +static void fsync_loose_object_transaction(struct odb_transaction *transaction, int fd, const char *filename) { /* @@ -722,7 +722,7 @@ static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(struct odb_transaction *transaction) +static void flush_loose_object_transaction(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; @@ -733,7 +733,7 @@ static void flush_batch_fsync(struct odb_transaction *transaction) /* * Issue a full hardware flush against a temporary file to ensure * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout + * fsync_loose_object_transaction has already issued a writeout * request, but it has not flushed any writeback cache in the storage * hardware or any filesystem logs. This fsync call acts as a barrier * to ensure that the data in each new object file is durable before @@ -762,7 +762,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); + fsync_loose_object_transaction(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -940,7 +940,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -1029,7 +1029,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1349,10 +1349,10 @@ static int already_written(struct odb_transaction *transaction, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) +static void prepare_packfile_transaction(struct odb_transaction *transaction, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; @@ -1381,7 +1381,7 @@ static void prepare_to_stream(struct odb_transaction *transaction, * status before calling us just in case we ask it to call us again * with a new pack. */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, +static int stream_blob_to_pack(struct transaction_packfile *state, struct git_hash_ctx *ctx, off_t *already_hashed_to, int fd, size_t size, const char *path, unsigned flags) @@ -1457,28 +1457,13 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) +static void flush_packfile_transaction(struct odb_transaction *transaction) { - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; + char *idx_tmp_name = NULL; if (!state->f) return; @@ -1503,11 +1488,16 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) repo_get_object_directory(transaction->odb->repo), hash_to_hex_algop(hash, repo->hash_algo)); - finish_tmp_packfile(transaction, &packname, hash); + stage_tmp_packfiles(repo, &packname, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name); + for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); clear_exit: + free(idx_tmp_name); free(state->pack_tmp_name); free(state->written); memset(state, 0, sizeof(*state)); @@ -1535,11 +1525,12 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) +static int index_blob_packfile_transaction(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, + size_t size, const char *path, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -1560,14 +1551,14 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -1585,7 +1576,7 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); + flush_packfile_transaction(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) return error("cannot seek back"); } @@ -1632,9 +1623,10 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(the_repository->objects->transaction, - oid, fd, xsize_t(st->st_size), - path, flags); + ret = index_blob_packfile_transaction(the_repository->objects->transaction, + oid, fd, + xsize_t(st->st_size), + path, flags); end_odb_transaction(transaction); } @@ -1996,8 +1988,8 @@ void end_odb_transaction(struct odb_transaction *transaction) */ ASSERT(transaction == transaction->odb->transaction); - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); + flush_loose_object_transaction(transaction); + flush_packfile_transaction(transaction); transaction->odb->transaction = NULL; free(transaction); } -- GitLab From 9accbfdbf0cf99505f5371c51d9822cb8fadaf5b Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 8 Sep 2025 19:42:18 -0500 Subject: [PATCH 10/10] odb: add transaction interface Transactions are managed via the {begin,end}_odb_transaction() function in the object-file subsystem and its implementation is specific to the files object source. Introduce odb_transaction_{begin,commit}() in the odb subsystem to provide an eventual object source agnostic means to manage transactions. Update call sites to instead manage transactions through the odb subsystem. Also rename {begin,end}_odb_transaction() functions to object_file_transaction_{begin,commit}() to clarify the object source it supports. Signed-off-by: Justin Tobler --- builtin/add.c | 5 +++-- builtin/unpack-objects.c | 4 ++-- builtin/update-index.c | 7 ++++--- cache-tree.c | 4 ++-- object-file.c | 12 +++++++----- object-file.h | 6 +++--- odb.c | 10 ++++++++++ odb.h | 13 +++++++++++++ read-cache.c | 4 ++-- 9 files changed, 46 insertions(+), 19 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 8294366d68a..bf312c40be9 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -15,6 +15,7 @@ #include "pathspec.h" #include "run-command.h" #include "object-file.h" +#include "odb.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" @@ -575,7 +576,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -594,7 +595,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 4596fff0dad..ef79e43715d 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -599,12 +599,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(transaction); + odb_transaction_commit(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index ee01c4e423d..8a5907767bf 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -18,6 +18,7 @@ #include "cache-tree.h" #include "tree-walk.h" #include "object-file.h" +#include "odb.h" #include "refs.h" #include "resolve-undo.h" #include "parse-options.h" @@ -1122,7 +1123,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1152,7 +1153,7 @@ int cmd_update_index(int argc, * a transaction. */ if (transaction && verbose) { - end_odb_transaction(transaction); + odb_transaction_commit(transaction); transaction = NULL; } @@ -1220,7 +1221,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(transaction); + odb_transaction_commit(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/cache-tree.c b/cache-tree.c index 79ddf6b7278..2aba47060e9 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -489,10 +489,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 8103a2bf413..17a236d2fe1 100644 --- a/object-file.c +++ b/object-file.c @@ -691,7 +691,7 @@ static void prepare_loose_object_transaction(struct odb_transaction *transaction * We lazily create the temporary object directory * the first time an object might be added, since * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. + * added at the time they call object_file_transaction_begin. */ if (!transaction || transaction->objdir) return; @@ -1622,12 +1622,12 @@ int index_fd(struct index_state *istate, struct object_id *oid, } else { struct odb_transaction *transaction; - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); ret = index_blob_packfile_transaction(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); } close(fd); @@ -1967,8 +1967,10 @@ int read_loose_object(struct repository *repo, return ret; } -struct odb_transaction *begin_odb_transaction(struct object_database *odb) +struct odb_transaction *object_file_transaction_begin(struct odb_source *source) { + struct object_database *odb = source->odb; + if (odb->transaction) return NULL; @@ -1978,7 +1980,7 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb) return odb->transaction; } -void end_odb_transaction(struct odb_transaction *transaction) +void object_file_transaction_commit(struct odb_transaction *transaction) { if (!transaction) return; diff --git a/object-file.h b/object-file.h index 6323d2e63c0..3fd48dcafbf 100644 --- a/object-file.h +++ b/object-file.h @@ -222,16 +222,16 @@ struct odb_transaction; /* * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called + * multiple objects. object_file_transaction_commit must be called * to make new objects visible. If a transaction is already * pending, NULL is returned. */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); +struct odb_transaction *object_file_transaction_begin(struct odb_source *source); /* * Tell the object database to make any objects from the * current transaction visible. */ -void end_odb_transaction(struct odb_transaction *transaction); +void object_file_transaction_commit(struct odb_transaction *transaction); #endif /* OBJECT_FILE_H */ diff --git a/odb.c b/odb.c index 2a92a018c42..af9534bfe1c 100644 --- a/odb.c +++ b/odb.c @@ -1051,3 +1051,13 @@ void odb_clear(struct object_database *o) hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } + +struct odb_transaction *odb_transaction_begin(struct object_database *odb) +{ + return object_file_transaction_begin(odb->sources); +} + +void odb_transaction_commit(struct odb_transaction *transaction) +{ + object_file_transaction_commit(transaction); +} diff --git a/odb.h b/odb.h index a89b2143909..82093753c84 100644 --- a/odb.h +++ b/odb.h @@ -185,6 +185,19 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Starts an ODB transaction. Subsequent objects are written to the transaction + * and not committed until odb_transaction_commit() is invoked on the + * transaction. If the ODB already has a pending transaction, NULL is returned. + */ +struct odb_transaction *odb_transaction_begin(struct object_database *odb); + +/* + * Commits an ODB transaction making the written objects visible. If the + * specified transaction is NULL, the function is a no-op. + */ +void odb_transaction_commit(struct odb_transaction *transaction); + /* * Find source by its object directory path. Dies in case the source couldn't * be found. diff --git a/read-cache.c b/read-cache.c index 80591eecedc..94098a38614 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3972,9 +3972,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); release_revisions(&rev); return !!data.add_errors; -- GitLab