Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-01-23 03:08:35 +0300
committerJunio C Hamano <gitster@pobox.com>2024-01-23 03:08:35 +0300
commitb867e8b9a8135150527797216831a3ac3b1ee9c8 (patch)
tree6e413c4cd056e2ebabac72d1a4bb4ca0bc689e0f
parenta20d4a9a7fdb62f71e8b2be79dabf8d37ab34d7d (diff)
parent4f36b8597c8f1c20e465ade4159896fb592e34c0 (diff)
Merge branch 'ps/reftable-optimize-io' into next
Low-level I/O optimization for reftable. * ps/reftable-optimize-io: reftable/stack: fix race in up-to-date check reftable/stack: unconditionally reload stack after commit
-rw-r--r--reftable/stack.c101
-rw-r--r--reftable/stack.h4
-rw-r--r--reftable/system.h1
3 files changed, 96 insertions, 10 deletions
diff --git a/reftable/stack.c b/reftable/stack.c
index 09007d39fc..bf3869ce70 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -66,6 +66,7 @@ int reftable_new_stack(struct reftable_stack **dest, const char *dir,
strbuf_addstr(&list_file_name, "/tables.list");
p->list_file = strbuf_detach(&list_file_name, NULL);
+ p->list_fd = -1;
p->reftable_dir = xstrdup(dir);
p->config = config;
@@ -175,7 +176,12 @@ void reftable_stack_destroy(struct reftable_stack *st)
st->readers_len = 0;
FREE_AND_NULL(st->readers);
}
- stat_validity_clear(&st->list_validity);
+
+ if (st->list_fd >= 0) {
+ close(st->list_fd);
+ st->list_fd = -1;
+ }
+
FREE_AND_NULL(st->list_file);
FREE_AND_NULL(st->reftable_dir);
reftable_free(st);
@@ -375,11 +381,59 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
sleep_millisec(delay);
}
- stat_validity_update(&st->list_validity, fd);
-
out:
- if (err)
- stat_validity_clear(&st->list_validity);
+ /*
+ * Invalidate the stat cache. It is sufficient to only close the file
+ * descriptor and keep the cached stat info because we never use the
+ * latter when the former is negative.
+ */
+ if (st->list_fd >= 0) {
+ close(st->list_fd);
+ st->list_fd = -1;
+ }
+
+ /*
+ * Cache stat information in case it provides a useful signal to us.
+ * According to POSIX, "The st_ino and st_dev fields taken together
+ * uniquely identify the file within the system." That being said,
+ * Windows is not POSIX compliant and we do not have these fields
+ * available. So the information we have there is insufficient to
+ * determine whether two file descriptors point to the same file.
+ *
+ * While we could fall back to using other signals like the file's
+ * mtime, those are not sufficient to avoid races. We thus refrain from
+ * using the stat cache on such systems and fall back to the secondary
+ * caching mechanism, which is to check whether contents of the file
+ * have changed.
+ *
+ * On other systems which are POSIX compliant we must keep the file
+ * descriptor open. This is to avoid a race condition where two
+ * processes access the reftable stack at the same point in time:
+ *
+ * 1. A reads the reftable stack and caches its stat info.
+ *
+ * 2. B updates the stack, appending a new table to "tables.list".
+ * This will both use a new inode and result in a different file
+ * size, thus invalidating A's cache in theory.
+ *
+ * 3. B decides to auto-compact the stack and merges two tables. The
+ * file size now matches what A has cached again. Furthermore, the
+ * filesystem may decide to recycle the inode number of the file
+ * we have replaced in (2) because it is not in use anymore.
+ *
+ * 4. A reloads the reftable stack. Neither the inode number nor the
+ * file size changed. If the timestamps did not change either then
+ * we think the cached copy of our stack is up-to-date.
+ *
+ * By keeping the file descriptor open the inode number cannot be
+ * recycled, mitigating the race.
+ */
+ if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
+ st->list_st.st_dev && st->list_st.st_ino) {
+ st->list_fd = fd;
+ fd = -1;
+ }
+
if (fd >= 0)
close(fd);
free_names(names);
@@ -396,8 +450,39 @@ static int stack_uptodate(struct reftable_stack *st)
int err;
int i = 0;
- if (stat_validity_check(&st->list_validity, st->list_file))
- return 0;
+ /*
+ * When we have cached stat information available then we use it to
+ * verify whether the file has been rewritten.
+ *
+ * Note that we explicitly do not want to use `stat_validity_check()`
+ * and friends here because they may end up not comparing the `st_dev`
+ * and `st_ino` fields. These functions thus cannot guarantee that we
+ * indeed still have the same file.
+ */
+ if (st->list_fd >= 0) {
+ struct stat list_st;
+
+ if (stat(st->list_file, &list_st) < 0) {
+ /*
+ * It's fine for "tables.list" to not exist. In that
+ * case, we have to refresh when the loaded stack has
+ * any readers.
+ */
+ if (errno == ENOENT)
+ return !!st->readers_len;
+ return REFTABLE_IO_ERROR;
+ }
+
+ /*
+ * When "tables.list" refers to the same file we can assume
+ * that it didn't change. This is because we always use
+ * rename(3P) to update the file and never write to it
+ * directly.
+ */
+ if (st->list_st.st_dev == list_st.st_dev &&
+ st->list_st.st_ino == list_st.st_ino)
+ return 0;
+ }
err = read_lines(st->list_file, &names);
if (err < 0)
@@ -582,7 +667,7 @@ int reftable_addition_commit(struct reftable_addition *add)
add->new_tables = NULL;
add->new_tables_len = 0;
- err = reftable_stack_reload(add->stack);
+ err = reftable_stack_reload_maybe_reuse(add->stack, 1);
if (err)
goto done;
diff --git a/reftable/stack.h b/reftable/stack.h
index 3f80cc598a..c1e3efa899 100644
--- a/reftable/stack.h
+++ b/reftable/stack.h
@@ -14,8 +14,10 @@ https://developers.google.com/open-source/licenses/bsd
#include "reftable-stack.h"
struct reftable_stack {
- struct stat_validity list_validity;
+ struct stat list_st;
char *list_file;
+ int list_fd;
+
char *reftable_dir;
int disable_auto_compact;
diff --git a/reftable/system.h b/reftable/system.h
index 2cc7adf271..6b74a81514 100644
--- a/reftable/system.h
+++ b/reftable/system.h
@@ -12,7 +12,6 @@ https://developers.google.com/open-source/licenses/bsd
/* This header glues the reftable library to the rest of Git */
#include "git-compat-util.h"
-#include "statinfo.h"
#include "strbuf.h"
#include "hash-ll.h" /* hash ID, sizes.*/
#include "dir.h" /* remove_dir_recursively, for tests.*/