diff --git a/Makefile b/Makefile index 4c95affadb5e265a98ffcf485f8c13a6c3a7e167..d25d4255f8a7ebf8abfed5c5ae61a11baedc74c7 100644 --- a/Makefile +++ b/Makefile @@ -974,7 +974,6 @@ LIB_OBJS += blame.o LIB_OBJS += blob.o LIB_OBJS += bloom.o LIB_OBJS += branch.o -LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle-uri.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o diff --git a/builtin/add.c b/builtin/add.c index 0235854f8099c49328a00aadad4adbbbdfc0579b..bf312c40be9789cf2bda329d48d53c49c0e12470 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -14,13 +14,14 @@ #include "gettext.h" #include "pathspec.h" #include "run-command.h" +#include "object-file.h" +#include "odb.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" #include "diff.h" #include "read-cache.h" #include "revision.h" -#include "bulk-checkin.h" #include "strvec.h" #include "submodule.h" #include "add-interactive.h" @@ -389,6 +390,7 @@ int cmd_add(int argc, char *seen = NULL; char *ps_matched = NULL; struct lock_file lock_file = LOCK_INIT; + struct odb_transaction *transaction; repo_config(repo, add_config, NULL); @@ -574,7 +576,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - begin_odb_transaction(); + transaction = odb_transaction_begin(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -593,7 +595,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(); + odb_transaction_commit(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 7ae7c82b6c05a6a6b09e77766ef7226c0c4ddcd4..ef79e43715d362d2907c23b5613d6fdaf9a96c80 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -2,7 +2,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" @@ -584,6 +583,7 @@ static void unpack_all(void) { int i; unsigned char *hdr = fill(sizeof(struct pack_header)); + struct odb_transaction *transaction; if (get_be32(hdr) != PACK_SIGNATURE) die("bad pack file"); @@ -599,12 +599,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - begin_odb_transaction(); + transaction = odb_transaction_begin(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(); + odb_transaction_commit(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2380f3ccd68c8ce0ac4b221dbf84f18b9a430811..8a5907767bf297eefcb0c6de5441cabd567536fd 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" @@ -19,6 +18,7 @@ #include "cache-tree.h" #include "tree-walk.h" #include "object-file.h" +#include "odb.h" #include "refs.h" #include "resolve-undo.h" #include "parse-options.h" @@ -70,14 +70,6 @@ static void report(const char *fmt, ...) if (!verbose) return; - /* - * It is possible, though unlikely, that a caller could use the verbose - * output to synchronize with addition of objects to the object - * database. The current implementation of ODB transactions leaves - * objects invisible while a transaction is active, so flush the - * transaction here before reporting a change made by update-index. - */ - flush_odb_transaction(); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -940,6 +932,7 @@ int cmd_update_index(int argc, strbuf_getline_fn getline_fn; int parseopt_state = PARSE_OPT_UNKNOWN; struct repository *r = the_repository; + struct odb_transaction *transaction; struct option options[] = { OPT_BIT('q', NULL, &refresh_args.flags, N_("continue refresh even when index needs update"), @@ -1130,7 +1123,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - begin_odb_transaction(); + transaction = odb_transaction_begin(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1149,6 +1142,21 @@ int cmd_update_index(int argc, const char *path = ctx.argv[0]; char *p; + /* + * It is possible, though unlikely, that a caller could + * use the verbose output to synchronize with addition + * of objects to the object database. The current + * implementation of ODB transactions leaves objects + * invisible while a transaction is active, so end the + * transaction here early before processing the next + * update. All further updates are performed outside of + * a transaction. + */ + if (transaction && verbose) { + odb_transaction_commit(transaction); + transaction = NULL; + } + setup_work_tree(); p = prefix_path(prefix, prefix_length, path); update_one(p); @@ -1213,7 +1221,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(); + odb_transaction_commit(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/bulk-checkin.c b/bulk-checkin.c deleted file mode 100644 index b2809ab0398136ea0bd18b5d1c90f6a5729415ab..0000000000000000000000000000000000000000 --- a/bulk-checkin.c +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "bulk-checkin.h" -#include "environment.h" -#include "gettext.h" -#include "hex.h" -#include "lockfile.h" -#include "repository.h" -#include "csum-file.h" -#include "pack.h" -#include "strbuf.h" -#include "tmp-objdir.h" -#include "packfile.h" -#include "object-file.h" -#include "odb.h" - -static int odb_transaction_nesting; - -static struct tmp_objdir *bulk_fsync_objdir; - -static struct bulk_checkin_packfile { - char *pack_tmp_name; - struct hashfile *f; - off_t offset; - struct pack_idx_option pack_idx_opts; - - struct pack_idx_entry **written; - uint32_t alloc_written; - uint32_t nr_written; -} bulk_checkin_packfile; - -static void finish_tmp_packfile(struct strbuf *basename, - const char *pack_tmp_name, - struct pack_idx_entry **written_list, - uint32_t nr_written, - struct pack_idx_option *pack_idx_opts, - unsigned char hash[]) -{ - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(the_repository, basename, pack_tmp_name, - written_list, nr_written, NULL, pack_idx_opts, hash, - &idx_tmp_name); - rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) -{ - unsigned char hash[GIT_MAX_RAWSZ]; - struct strbuf packname = STRBUF_INIT; - - if (!state->f) - return; - - if (state->nr_written == 0) { - close(state->f->fd); - free_hashfile(state->f); - unlink(state->pack_tmp_name); - goto clear_exit; - } else if (state->nr_written == 1) { - finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, - CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); - } else { - int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name, - state->nr_written, hash, - state->offset); - close(fd); - } - - strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository), - hash_to_hex(hash)); - finish_tmp_packfile(&packname, state->pack_tmp_name, - state->written, state->nr_written, - &state->pack_idx_opts, hash); - for (uint32_t i = 0; i < state->nr_written; i++) - free(state->written[i]); - -clear_exit: - free(state->pack_tmp_name); - free(state->written); - memset(state, 0, sizeof(*state)); - - strbuf_release(&packname); - /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); -} - -/* - * Cleanup after batch-mode fsync_object_files. - */ -static void flush_batch_fsync(void) -{ - struct strbuf temp_path = STRBUF_INIT; - struct tempfile *temp; - - if (!bulk_fsync_objdir) - return; - - /* - * Issue a full hardware flush against a temporary file to ensure - * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout - * request, but it has not flushed any writeback cache in the storage - * hardware or any filesystem logs. This fsync call acts as a barrier - * to ensure that the data in each new object file is durable before - * the final name is visible. - */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository)); - temp = xmks_tempfile(temp_path.buf); - fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); - delete_tempfile(&temp); - strbuf_release(&temp_path); - - /* - * Make the object files visible in the primary ODB after their data is - * fully durable. - */ - tmp_objdir_migrate(bulk_fsync_objdir); - bulk_fsync_objdir = NULL; -} - -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) -{ - /* The object may already exist in the repository */ - if (odb_has_object(the_repository->objects, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - return 1; - - /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < state->nr_written; i++) - if (oideq(&state->written[i]->oid, oid)) - return 1; - - /* This is a new object we need to keep */ - return 0; -} - -/* - * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. - */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) -{ - git_zstream s; - unsigned char ibuf[16384]; - unsigned char obuf[16384]; - unsigned hdrlen; - int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); - off_t offset = 0; - - git_deflate_init(&s, pack_compression_level); - - hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); - s.next_out = obuf + hdrlen; - s.avail_out = sizeof(obuf) - hdrlen; - - while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } - s.next_in = ibuf; - s.avail_in = rsize; - size -= rsize; - } - - status = git_deflate(&s, size ? 0 : Z_FINISH); - - if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; - } - s.next_out = obuf; - s.avail_out = sizeof(obuf); - } - - switch (status) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - continue; - default: - die("unexpected deflate failure: %d", status); - } - } - git_deflate_end(&s); - return 0; -} - -/* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct bulk_checkin_packfile *state, - unsigned flags) -{ - if (!(flags & INDEX_WRITE_OBJECT) || state->f) - return; - - state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name); - reset_pack_idx_option(&state->pack_idx_opts); - - /* Pretend we are going to write only one object */ - state->offset = write_pack_header(state->f, 1); - if (!state->offset) - die_errno("unable to write pack header"); -} - -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, - struct object_id *result_oid, - int fd, size_t size, - const char *path, unsigned flags) -{ - off_t seekback, already_hashed_to; - struct git_hash_ctx ctx; - unsigned char obuf[16384]; - unsigned header_len; - struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; - - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t) -1) - return error("cannot find the current offset"); - - header_len = format_object_header((char *)obuf, sizeof(obuf), - OBJ_BLOB, size); - the_hash_algo->init_fn(&ctx); - git_hash_update(&ctx, obuf, header_len); - - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_to_stream(state, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } - - already_hashed_to = 0; - - while (1) { - prepare_to_stream(state, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - if (!idx) - BUG("should not happen"); - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(state); - if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) - return error("cannot seek back"); - } - git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; - - idx->crc32 = crc32_end(state->f); - if (already_written(state, result_oid)) { - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - free(idx); - } else { - oidcpy(&idx->oid, result_oid); - ALLOC_GROW(state->written, - state->nr_written + 1, - state->alloc_written); - state->written[state->nr_written++] = idx; - } - return 0; -} - -void prepare_loose_object_bulk_checkin(void) -{ - /* - * We lazily create the temporary object directory - * the first time an object might be added, since - * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. - */ - if (!odb_transaction_nesting || bulk_fsync_objdir) - return; - - bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (bulk_fsync_objdir) - tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); -} - -void fsync_loose_object_bulk_checkin(int fd, const char *filename) -{ - /* - * If we have an active ODB transaction, we issue a call that - * cleans the filesystem page cache but avoids a hardware flush - * command. Later on we will issue a single hardware flush - * before renaming the objects to their final names as part of - * flush_batch_fsync. - */ - if (!bulk_fsync_objdir || - git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { - if (errno == ENOSYS) - warning(_("core.fsyncMethod = batch is unsupported on this platform")); - fsync_or_die(fd, filename); - } -} - -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, - const char *path, unsigned flags) -{ - int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size, - path, flags); - if (!odb_transaction_nesting) - flush_bulk_checkin_packfile(&bulk_checkin_packfile); - return status; -} - -void begin_odb_transaction(void) -{ - odb_transaction_nesting += 1; -} - -void flush_odb_transaction(void) -{ - flush_batch_fsync(); - flush_bulk_checkin_packfile(&bulk_checkin_packfile); -} - -void end_odb_transaction(void) -{ - odb_transaction_nesting -= 1; - if (odb_transaction_nesting < 0) - BUG("Unbalanced ODB transaction nesting"); - - if (odb_transaction_nesting) - return; - - flush_odb_transaction(); -} diff --git a/bulk-checkin.h b/bulk-checkin.h deleted file mode 100644 index 7246ea58dcf3481e026560f987138b1efe112390..0000000000000000000000000000000000000000 --- a/bulk-checkin.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef BULK_CHECKIN_H -#define BULK_CHECKIN_H - -#include "object.h" - -void prepare_loose_object_bulk_checkin(void); -void fsync_loose_object_bulk_checkin(int fd, const char *filename); - -/* - * This creates one packfile per large blob unless bulk-checkin - * machinery is "plugged". - * - * This also bypasses the usual "convert-to-git" dance, and that is on - * purpose. We could write a streaming version of the converting - * functions and insert that before feeding the data to fast-import - * (or equivalent in-core API described above). However, that is - * somewhat complicated, as we do not know the size of the filter - * result, which we need to know beforehand when writing a git object. - * Since the primary motivation for trying to stream from the working - * tree file and to avoid mmaping it in core is to deal with large - * binary blobs, they generally do not want to get any conversion, and - * callers should avoid this code path when filters are requested. - */ -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, - const char *path, unsigned flags); - -/* - * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called - * to make new objects visible. Transactions can be nested, - * and objects are only visible after the outermost transaction - * is complete or the transaction is flushed. - */ -void begin_odb_transaction(void); - -/* - * Make any objects that are currently part of a pending object - * database transaction visible. It is valid to call this function - * even if no transaction is active. - */ -void flush_odb_transaction(void); - -/* - * Tell the object database to make any objects from the - * current transaction visible if this is the final nested - * transaction. - */ -void end_odb_transaction(void); - -#endif diff --git a/cache-tree.c b/cache-tree.c index 66ef2becbe01a4bfe2d41de91d97389487c82f64..2aba47060e95d49463a5de78fc29fd601ae0c281 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -8,7 +8,6 @@ #include "tree.h" #include "tree-walk.h" #include "cache-tree.h" -#include "bulk-checkin.h" #include "object-file.h" #include "odb.h" #include "read-cache-ll.h" @@ -474,6 +473,7 @@ static int update_one(struct cache_tree *it, int cache_tree_update(struct index_state *istate, int flags) { + struct odb_transaction *transaction; int skip, i; i = verify_cache(istate, flags); @@ -489,10 +489,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - begin_odb_transaction(); + transaction = odb_transaction_begin(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(); + odb_transaction_commit(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/meson.build b/meson.build index b3dfcc04972601cbc3d05222f72070ddf0c2356f..fccb6d2eeca050e72c0db0193fea3ae104376696 100644 --- a/meson.build +++ b/meson.build @@ -287,7 +287,6 @@ libgit_sources = [ 'blob.c', 'bloom.c', 'branch.c', - 'bulk-checkin.c', 'bundle-uri.c', 'bundle.c', 'cache-tree.c', diff --git a/object-file.c b/object-file.c index 2bc36ab3ee8cbf2d83c4b3204a7c5df132b934d6..17a236d2fe121bc447f73138c9db4a651b07ca22 100644 --- a/object-file.c +++ b/object-file.c @@ -10,7 +10,6 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" -#include "bulk-checkin.h" #include "convert.h" #include "dir.h" #include "environment.h" @@ -28,6 +27,8 @@ #include "read-cache-ll.h" #include "setup.h" #include "streaming.h" +#include "tempfile.h" +#include "tmp-objdir.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -666,6 +667,93 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } +struct transaction_packfile { + char *pack_tmp_name; + struct hashfile *f; + off_t offset; + struct pack_idx_option pack_idx_opts; + + struct pack_idx_entry **written; + uint32_t alloc_written; + uint32_t nr_written; +}; + +struct odb_transaction { + struct object_database *odb; + + struct tmp_objdir *objdir; + struct transaction_packfile packfile; +}; + +static void prepare_loose_object_transaction(struct odb_transaction *transaction) +{ + /* + * We lazily create the temporary object directory + * the first time an object might be added, since + * callers may not know whether any objects will be + * added at the time they call object_file_transaction_begin. + */ + if (!transaction || transaction->objdir) + return; + + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); +} + +static void fsync_loose_object_transaction(struct odb_transaction *transaction, + int fd, const char *filename) +{ + /* + * If we have an active ODB transaction, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before renaming the objects to their final names as part of + * flush_batch_fsync. + */ + if (!transaction || !transaction->objdir || + git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { + if (errno == ENOSYS) + warning(_("core.fsyncMethod = batch is unsupported on this platform")); + fsync_or_die(fd, filename); + } +} + +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void flush_loose_object_transaction(struct odb_transaction *transaction) +{ + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + if (!transaction->objdir) + return; + + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_transaction has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware or any filesystem logs. This fsync call acts as a barrier + * to ensure that the data in each new object file is durable before + * the final name is visible. + */ + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + + /* + * Make the object files visible in the primary ODB after their data is + * fully durable. + */ + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; +} + /* Finalize a file on disk, and close it. */ static void close_loose_object(struct odb_source *source, int fd, const char *filename) @@ -674,7 +762,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(fd, filename); + fsync_loose_object_transaction(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -852,7 +940,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_transaction(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -941,7 +1029,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_transaction(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1243,6 +1331,274 @@ static int index_core(struct index_state *istate, return ret; } +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) +{ + /* The object may already exist in the repository */ + if (odb_has_object(transaction->odb, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + return 1; + + /* Might want to keep the list sorted */ + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) + return 1; + + /* This is a new object we need to keep */ + return 0; +} + +/* Lazily create backing packfile for the state */ +static void prepare_packfile_transaction(struct odb_transaction *transaction, + unsigned flags) +{ + struct transaction_packfile *state = &transaction->packfile; + if (!(flags & INDEX_WRITE_OBJECT) || state->f) + return; + + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); + reset_pack_idx_option(&state->pack_idx_opts); + + /* Pretend we are going to write only one object */ + state->offset = write_pack_header(state->f, 1); + if (!state->offset) + die_errno("unable to write pack header"); +} + +/* + * Read the contents from fd for size bytes, streaming it to the + * packfile in state while updating the hash in ctx. Signal a failure + * by returning a negative value when the resulting pack would exceed + * the pack size limit and this is not the first object in the pack, + * so that the caller can discard what we wrote from the current pack + * by truncating it and opening a new one. The caller will then call + * us again after rewinding the input fd. + * + * The already_hashed_to pointer is kept untouched by the caller to + * make sure we do not hash the same byte when we are called + * again. This way, the caller does not have to checkpoint its hash + * status before calling us just in case we ask it to call us again + * with a new pack. + */ +static int stream_blob_to_pack(struct transaction_packfile *state, + struct git_hash_ctx *ctx, off_t *already_hashed_to, + int fd, size_t size, const char *path, + unsigned flags) +{ + git_zstream s; + unsigned char ibuf[16384]; + unsigned char obuf[16384]; + unsigned hdrlen; + int status = Z_OK; + int write_object = (flags & INDEX_WRITE_OBJECT); + off_t offset = 0; + + git_deflate_init(&s, pack_compression_level); + + hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); + s.next_out = obuf + hdrlen; + s.avail_out = sizeof(obuf) - hdrlen; + + while (status != Z_STREAM_END) { + if (size && !s.avail_in) { + size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); + ssize_t read_result = read_in_full(fd, ibuf, rsize); + if (read_result < 0) + die_errno("failed to read from '%s'", path); + if ((size_t)read_result != rsize) + die("failed to read %u bytes from '%s'", + (unsigned)rsize, path); + offset += rsize; + if (*already_hashed_to < offset) { + size_t hsize = offset - *already_hashed_to; + if (rsize < hsize) + hsize = rsize; + if (hsize) + git_hash_update(ctx, ibuf, hsize); + *already_hashed_to = offset; + } + s.next_in = ibuf; + s.avail_in = rsize; + size -= rsize; + } + + status = git_deflate(&s, size ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + if (write_object) { + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; + } + + hashwrite(state->f, obuf, written); + state->offset += written; + } + s.next_out = obuf; + s.avail_out = sizeof(obuf); + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + git_deflate_end(&s); + return 0; +} + +static void flush_packfile_transaction(struct odb_transaction *transaction) +{ + struct transaction_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + unsigned char hash[GIT_MAX_RAWSZ]; + struct strbuf packname = STRBUF_INIT; + char *idx_tmp_name = NULL; + + if (!state->f) + return; + + if (state->nr_written == 0) { + close(state->f->fd); + free_hashfile(state->f); + unlink(state->pack_tmp_name); + goto clear_exit; + } else if (state->nr_written == 1) { + finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + } else { + int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, + state->nr_written, hash, + state->offset); + close(fd); + } + + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + stage_tmp_packfiles(repo, &packname, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name); + + for (uint32_t i = 0; i < state->nr_written; i++) + free(state->written[i]); + +clear_exit: + free(idx_tmp_name); + free(state->pack_tmp_name); + free(state->written); + memset(state, 0, sizeof(*state)); + + strbuf_release(&packname); + /* Make objects we just wrote available to ourselves */ + reprepare_packed_git(repo); +} + +/* + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. + */ +static int index_blob_packfile_transaction(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, + size_t size, const char *path, + unsigned flags) +{ + struct transaction_packfile *state = &transaction->packfile; + off_t seekback, already_hashed_to; + struct git_hash_ctx ctx; + unsigned char obuf[16384]; + unsigned header_len; + struct hashfile_checkpoint checkpoint; + struct pack_idx_entry *idx = NULL; + + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t)-1) + return error("cannot find the current offset"); + + header_len = format_object_header((char *)obuf, sizeof(obuf), + OBJ_BLOB, size); + transaction->odb->repo->hash_algo->init_fn(&ctx); + git_hash_update(&ctx, obuf, header_len); + + /* Note: idx is non-NULL when we are writing */ + if ((flags & INDEX_WRITE_OBJECT) != 0) { + CALLOC_ARRAY(idx, 1); + + prepare_packfile_transaction(transaction, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + + already_hashed_to = 0; + + while (1) { + prepare_packfile_transaction(transaction, flags); + if (idx) { + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + } + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, + fd, size, path, flags)) + break; + /* + * Writing this object to the current pack will make + * it too big; we need to truncate it, start a new + * pack, and write into it. + */ + if (!idx) + BUG("should not happen"); + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + flush_packfile_transaction(transaction); + if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) + return error("cannot seek back"); + } + git_hash_final_oid(result_oid, &ctx); + if (!idx) + return 0; + + idx->crc32 = crc32_end(state->f); + if (already_written(transaction, result_oid)) { + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + } else { + oidcpy(&idx->oid, result_oid); + ALLOC_GROW(state->written, + state->nr_written + 1, + state->alloc_written); + state->written[state->nr_written++] = idx; + } + return 0; +} + int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) @@ -1253,18 +1609,27 @@ int index_fd(struct index_state *istate, struct object_id *oid, * Call xsize_t() only when needed to avoid potentially unnecessary * die() for large files. */ - if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { ret = index_stream_convert_blob(istate, oid, fd, path, flags); - else if (!S_ISREG(st->st_mode)) + } else if (!S_ISREG(st->st_mode)) { ret = index_pipe(istate, oid, fd, type, path, flags); - else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || - type != OBJ_BLOB || - (path && would_convert_to_git(istate, path))) + } else if ((st->st_size >= 0 && + (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || + type != OBJ_BLOB || + (path && would_convert_to_git(istate, path))) { ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); - else - ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path, - flags); + } else { + struct odb_transaction *transaction; + + transaction = odb_transaction_begin(the_repository->objects); + ret = index_blob_packfile_transaction(the_repository->objects->transaction, + oid, fd, + xsize_t(st->st_size), + path, flags); + odb_transaction_commit(transaction); + } + close(fd); return ret; } @@ -1601,3 +1966,32 @@ int read_loose_object(struct repository *repo, munmap(map, mapsize); return ret; } + +struct odb_transaction *object_file_transaction_begin(struct odb_source *source) +{ + struct object_database *odb = source->odb; + + if (odb->transaction) + return NULL; + + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + + return odb->transaction; +} + +void object_file_transaction_commit(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + + flush_loose_object_transaction(transaction); + flush_packfile_transaction(transaction); + transaction->odb->transaction = NULL; + free(transaction); +} diff --git a/object-file.h b/object-file.h index 15d97630d3b11b5b4f94f9f6b9d39502928101df..3fd48dcafbf1dc797efabd489e68720e521a38d2 100644 --- a/object-file.h +++ b/object-file.h @@ -218,4 +218,20 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); +struct odb_transaction; + +/* + * Tell the object database to optimize for adding + * multiple objects. object_file_transaction_commit must be called + * to make new objects visible. If a transaction is already + * pending, NULL is returned. + */ +struct odb_transaction *object_file_transaction_begin(struct odb_source *source); + +/* + * Tell the object database to make any objects from the + * current transaction visible. + */ +void object_file_transaction_commit(struct odb_transaction *transaction); + #endif /* OBJECT_FILE_H */ diff --git a/odb.c b/odb.c index 2a92a018c42940f131edd4e7cacd3454e66ee370..af9534bfe1cdf548edb87e25d196ec89a453fac4 100644 --- a/odb.c +++ b/odb.c @@ -1051,3 +1051,13 @@ void odb_clear(struct object_database *o) hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } + +struct odb_transaction *odb_transaction_begin(struct object_database *odb) +{ + return object_file_transaction_begin(odb->sources); +} + +void odb_transaction_commit(struct odb_transaction *transaction) +{ + object_file_transaction_commit(transaction); +} diff --git a/odb.h b/odb.h index 3dfc66d75a3d20dd64d787d04e3ab4e5f9010e8a..82093753c84ca6125101eb2a7b9762aa30a92369 100644 --- a/odb.h +++ b/odb.h @@ -84,6 +84,7 @@ struct odb_source { struct packed_git; struct cached_object_entry; +struct odb_transaction; /* * The object database encapsulates access to objects in a repository. It @@ -94,6 +95,13 @@ struct object_database { /* Repository that owns this database. */ struct repository *repo; + /* + * State of current current object database transaction. Only one + * transaction may be pending at a time. Is NULL when no transaction is + * configured. + */ + struct odb_transaction *transaction; + /* * Set of all object directories; the main directory is first (and * cannot be NULL after initialization). Subsequent directories are @@ -177,6 +185,19 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Starts an ODB transaction. Subsequent objects are written to the transaction + * and not committed until odb_transaction_commit() is invoked on the + * transaction. If the ODB already has a pending transaction, NULL is returned. + */ +struct odb_transaction *odb_transaction_begin(struct object_database *odb); + +/* + * Commits an ODB transaction making the written objects visible. If the + * specified transaction is NULL, the function is a no-op. + */ +void odb_transaction_commit(struct odb_transaction *transaction); + /* * Find source by its object directory path. Dies in case the source couldn't * be found. diff --git a/read-cache.c b/read-cache.c index 06ad74db2286aee617b0f7bf5eb3c15cc252a3bf..94098a3861403c400ff45eb87c51ba8cbfcafeab 100644 --- a/read-cache.c +++ b/read-cache.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "bulk-checkin.h" #include "config.h" #include "date.h" #include "diff.h" @@ -3947,6 +3946,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix, const struct pathspec *pathspec, char *ps_matched, int include_sparse, int flags) { + struct odb_transaction *transaction; struct update_callback_data data; struct rev_info rev; @@ -3972,9 +3972,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - begin_odb_transaction(); + transaction = odb_transaction_begin(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(); + odb_transaction_commit(transaction); release_revisions(&rev); return !!data.add_errors;