diff options
author | Junio C Hamano <gitster@pobox.com> | 2022-06-04 00:30:34 +0300 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2022-06-04 00:30:34 +0300 |
commit | 83937e9592832408670da38bfe6e96c90ad63521 (patch) | |
tree | 20c6895015e2c281780eba0fead78aef3862ed8e /bulk-checkin.c | |
parent | 377d347eb3b1a23ece080dc5e5b8df6958c56e96 (diff) | |
parent | 112a9fe60d7c5f02ee1a805d8730d54a458b7ad1 (diff) |
Merge branch 'ns/batch-fsync'
Introduce a filesystem-dependent mechanism to optimize the way the
bits for many loose object files are ensured to hit the disk
platter.
* ns/batch-fsync:
core.fsyncmethod: performance tests for batch mode
t/perf: add iteration setup mechanism to perf-lib
core.fsyncmethod: tests for batch mode
test-lib-functions: add parsing helpers for ls-files and ls-tree
core.fsync: use batch mode and sync loose objects by default on Windows
unpack-objects: use the bulk-checkin infrastructure
update-index: use the bulk-checkin infrastructure
builtin/add: add ODB transaction around add_files_to_cache
cache-tree: use ODB transaction around writing a tree
core.fsyncmethod: batched disk flushes for loose-objects
bulk-checkin: rebrand plug/unplug APIs as 'odb transactions'
bulk-checkin: rename 'state' variable and separate 'plugged' boolean
Diffstat (limited to 'bulk-checkin.c')
-rw-r--r-- | bulk-checkin.c | 117 |
1 files changed, 99 insertions, 18 deletions
diff --git a/bulk-checkin.c b/bulk-checkin.c index 6d6c37171c..bcf878460b 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -3,16 +3,21 @@ */ #include "cache.h" #include "bulk-checkin.h" +#include "lockfile.h" #include "repository.h" #include "csum-file.h" #include "pack.h" #include "strbuf.h" +#include "string-list.h" +#include "tmp-objdir.h" #include "packfile.h" #include "object-store.h" -static struct bulk_checkin_state { - unsigned plugged:1; +static int odb_transaction_nesting; +static struct tmp_objdir *bulk_fsync_objdir; + +static struct bulk_checkin_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -21,7 +26,7 @@ static struct bulk_checkin_state { struct pack_idx_entry **written; uint32_t alloc_written; uint32_t nr_written; -} state; +} bulk_checkin_packfile; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -39,7 +44,7 @@ static void finish_tmp_packfile(struct strbuf *basename, free(idx_tmp_name); } -static void finish_bulk_checkin(struct bulk_checkin_state *state) +static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) { unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; @@ -80,7 +85,41 @@ clear_exit: reprepare_packed_git(the_repository); } -static int already_written(struct bulk_checkin_state *state, struct object_id *oid) +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void flush_batch_fsync(void) +{ + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + if (!bulk_fsync_objdir) + return; + + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_bulk_checkin has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware or any filesystem logs. This fsync call acts as a barrier + * to ensure that the data in each new object file is durable before + * the final name is visible. + */ + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory()); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + + /* + * Make the object files visible in the primary ODB after their data is + * fully durable. + */ + tmp_objdir_migrate(bulk_fsync_objdir); + bulk_fsync_objdir = NULL; +} + +static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) { int i; @@ -112,7 +151,7 @@ static int already_written(struct bulk_checkin_state *state, struct object_id *o * status before calling us just in case we ask it to call us again * with a new pack. */ -static int stream_to_pack(struct bulk_checkin_state *state, +static int stream_to_pack(struct bulk_checkin_packfile *state, git_hash_ctx *ctx, off_t *already_hashed_to, int fd, size_t size, enum object_type type, const char *path, unsigned flags) @@ -189,7 +228,7 @@ static int stream_to_pack(struct bulk_checkin_state *state, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct bulk_checkin_state *state, +static void prepare_to_stream(struct bulk_checkin_packfile *state, unsigned flags) { if (!(flags & HASH_WRITE_OBJECT) || state->f) @@ -204,7 +243,7 @@ static void prepare_to_stream(struct bulk_checkin_state *state, die_errno("unable to write pack header"); } -static int deflate_to_pack(struct bulk_checkin_state *state, +static int deflate_to_pack(struct bulk_checkin_packfile *state, struct object_id *result_oid, int fd, size_t size, enum object_type type, const char *path, @@ -251,7 +290,7 @@ static int deflate_to_pack(struct bulk_checkin_state *state, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - finish_bulk_checkin(state); + flush_bulk_checkin_packfile(state); if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) return error("cannot seek back"); } @@ -274,25 +313,67 @@ static int deflate_to_pack(struct bulk_checkin_state *state, return 0; } +void prepare_loose_object_bulk_checkin(void) +{ + /* + * We lazily create the temporary object directory + * the first time an object might be added, since + * callers may not know whether any objects will be + * added at the time they call begin_odb_transaction. + */ + if (!odb_transaction_nesting || bulk_fsync_objdir) + return; + + bulk_fsync_objdir = tmp_objdir_create("bulk-fsync"); + if (bulk_fsync_objdir) + tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); +} + +void fsync_loose_object_bulk_checkin(int fd, const char *filename) +{ + /* + * If we have an active ODB transaction, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before renaming the objects to their final names as part of + * flush_batch_fsync. + */ + if (!bulk_fsync_objdir || + git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { + fsync_or_die(fd, filename); + } +} + int index_bulk_checkin(struct object_id *oid, int fd, size_t size, enum object_type type, const char *path, unsigned flags) { - int status = deflate_to_pack(&state, oid, fd, size, type, + int status = deflate_to_pack(&bulk_checkin_packfile, oid, fd, size, type, path, flags); - if (!state.plugged) - finish_bulk_checkin(&state); + if (!odb_transaction_nesting) + flush_bulk_checkin_packfile(&bulk_checkin_packfile); return status; } -void plug_bulk_checkin(void) +void begin_odb_transaction(void) { - state.plugged = 1; + odb_transaction_nesting += 1; } -void unplug_bulk_checkin(void) +void flush_odb_transaction(void) { - state.plugged = 0; - if (state.f) - finish_bulk_checkin(&state); + flush_batch_fsync(); + flush_bulk_checkin_packfile(&bulk_checkin_packfile); +} + +void end_odb_transaction(void) +{ + odb_transaction_nesting -= 1; + if (odb_transaction_nesting < 0) + BUG("Unbalanced ODB transaction nesting"); + + if (odb_transaction_nesting) + return; + + flush_odb_transaction(); } |