Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Vosmaer <jacob@gitlab.com>2021-09-13 17:31:51 +0300
committerJacob Vosmaer <jacob@gitlab.com>2021-09-15 10:15:09 +0300
commit2f6f81300a7f27324c691557b8ea5b15252fb501 (patch)
tree6f2d192d8729fa832ea9faa5084b946480193bbf
parentccd8ea3e1436d6464ac5f67e6bebd1ae317f4d50 (diff)
Makefile: apply Git ref advertisement buffering patches
Prior to these patches, Git used unbuffered writes for upload-pack ref advertisements, with one or two write syscalls per ref. In repositories with many refs that adds up to a lot of syscalls. These patches add stdio buffering for the ref advertisement writes. Also see gitlab-com/gl-infra/scalability#1257. Changelog: performance
-rw-r--r--Makefile7
-rw-r--r--_support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch126
-rw-r--r--_support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch94
3 files changed, 226 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index b84894484..fdf268d26 100644
--- a/Makefile
+++ b/Makefile
@@ -152,13 +152,18 @@ ifeq ($(origin GIT_PATCHES),undefined)
# 'ps/update-ref-batch-flush' into next, 2021-09-10).
GIT_PATCHES += 0015-update-ref-fix-streaming-of-status-updates.patch
+ # Buffer ref advertisement writes in upload-pack. Merged into next via
+ # c31d871c (Merge branch 'jv/pkt-line-batch' into next, 2021-09-10).
+ GIT_PATCHES += 0016-pkt-line-add-stdio-packet-write-functions.patch
+ GIT_PATCHES += 0017-upload-pack-use-stdio-in-send_ref-callbacks.patch
+
# This extra version has two intentions: first, it allows us to detect
# capabilities of the command at runtime. Second, it helps admins to
# discover which version is currently in use. As such, this version must be
# incremented whenever a new patch is added above. When no patches exist,
# then this should be undefined. Otherwise, it must be set to at least
# `gl1` given that `0` is the "default" GitLab patch level.
- GIT_EXTRA_VERSION := gl3
+ GIT_EXTRA_VERSION := gl4
endif
ifeq ($(origin GIT_BUILD_OPTIONS),undefined)
diff --git a/_support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch b/_support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch
new file mode 100644
index 000000000..ea4e221f3
--- /dev/null
+++ b/_support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch
@@ -0,0 +1,126 @@
+From c7e2dd1a100170e1dbd204be68d54c0e230113df Mon Sep 17 00:00:00 2001
+From: Jacob Vosmaer <jacob@gitlab.com>
+Date: Wed, 1 Sep 2021 14:54:41 +0200
+Subject: [PATCH 16/17] pkt-line: add stdio packet write functions
+
+This adds three new functions to pkt-line.c: packet_fwrite,
+packet_fwrite_fmt and packet_fflush. Besides writing a pktline flush
+packet, packet_fflush also flushes the stdio buffer of the stream.
+
+Helped-by: Patrick Steinhardt <ps@pks.im>
+Helped-by: Jeff King <peff@peff.net>
+Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ cache.h | 2 ++
+ pkt-line.c | 37 +++++++++++++++++++++++++++++++++++++
+ pkt-line.h | 11 +++++++++++
+ write-or-die.c | 12 ++++++++++++
+ 4 files changed, 62 insertions(+)
+
+diff --git a/cache.h b/cache.h
+index bd4869beee..dcf2454c3b 100644
+--- a/cache.h
++++ b/cache.h
+@@ -1736,6 +1736,8 @@ extern const char *git_mailmap_blob;
+ void maybe_flush_or_die(FILE *, const char *);
+ __attribute__((format (printf, 2, 3)))
+ void fprintf_or_die(FILE *, const char *fmt, ...);
++void fwrite_or_die(FILE *f, const void *buf, size_t count);
++void fflush_or_die(FILE *f);
+
+ #define COPY_READ_ERROR (-2)
+ #define COPY_WRITE_ERROR (-3)
+diff --git a/pkt-line.c b/pkt-line.c
+index 9f63eae2e6..de4a94b437 100644
+--- a/pkt-line.c
++++ b/pkt-line.c
+@@ -243,6 +243,43 @@ void packet_write(int fd_out, const char *buf, size_t size)
+ die("%s", err.buf);
+ }
+
++void packet_fwrite(FILE *f, const char *buf, size_t size)
++{
++ size_t packet_size;
++ char header[4];
++
++ if (size > LARGE_PACKET_DATA_MAX)
++ die(_("packet write failed - data exceeds max packet size"));
++
++ packet_trace(buf, size, 1);
++ packet_size = size + 4;
++
++ set_packet_header(header, packet_size);
++ fwrite_or_die(f, header, 4);
++ fwrite_or_die(f, buf, size);
++}
++
++void packet_fwrite_fmt(FILE *fh, const char *fmt, ...)
++{
++ static struct strbuf buf = STRBUF_INIT;
++ va_list args;
++
++ strbuf_reset(&buf);
++
++ va_start(args, fmt);
++ format_packet(&buf, "", fmt, args);
++ va_end(args);
++
++ fwrite_or_die(fh, buf.buf, buf.len);
++}
++
++void packet_fflush(FILE *f)
++{
++ packet_trace("0000", 4, 1);
++ fwrite_or_die(f, "0000", 4);
++ fflush_or_die(f);
++}
++
+ void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
+ {
+ va_list args;
+diff --git a/pkt-line.h b/pkt-line.h
+index 5af5f45687..82b95e4bdd 100644
+--- a/pkt-line.h
++++ b/pkt-line.h
+@@ -35,6 +35,17 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format
+ int write_packetized_from_fd_no_flush(int fd_in, int fd_out);
+ int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out);
+
++/*
++ * Stdio versions of packet_write functions. When mixing these with fd
++ * based functions, take care to call fflush(3) before doing fd writes or
++ * closing the fd.
++ */
++void packet_fwrite(FILE *f, const char *buf, size_t size);
++void packet_fwrite_fmt(FILE *f, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
++
++/* packet_fflush writes a flush packet and flushes the stdio buffer of f */
++void packet_fflush(FILE *f);
++
+ /*
+ * Read a packetized line into the buffer, which must be at least size bytes
+ * long. The return value specifies the number of bytes read into the buffer.
+diff --git a/write-or-die.c b/write-or-die.c
+index d33e68f6ab..0b1ec8190b 100644
+--- a/write-or-die.c
++++ b/write-or-die.c
+@@ -70,3 +70,15 @@ void write_or_die(int fd, const void *buf, size_t count)
+ die_errno("write error");
+ }
+ }
++
++void fwrite_or_die(FILE *f, const void *buf, size_t count)
++{
++ if (fwrite(buf, 1, count, f) != count)
++ die_errno("fwrite error");
++}
++
++void fflush_or_die(FILE *f)
++{
++ if (fflush(f))
++ die_errno("fflush error");
++}
+--
+2.32.0
+
diff --git a/_support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch b/_support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch
new file mode 100644
index 000000000..83db05f80
--- /dev/null
+++ b/_support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch
@@ -0,0 +1,94 @@
+From 66949334dbd9e0d36b0fa8e50ef120aa88e7c094 Mon Sep 17 00:00:00 2001
+From: Jacob Vosmaer <jacob@gitlab.com>
+Date: Wed, 1 Sep 2021 14:54:42 +0200
+Subject: [PATCH 17/17] upload-pack: use stdio in send_ref callbacks
+
+In both protocol v0 and v2, upload-pack writes one pktline packet per
+advertised ref to stdout. That means one or two write(2) syscalls per
+ref. This is problematic if these writes become network sends with
+high overhead.
+
+This commit changes both send_ref callbacks to use buffered IO using
+stdio.
+
+To give an example of the impact: I set up a single-threaded loop that
+calls ls-remote (with HTTP and protocol v2) on a local GitLab
+instance, on a repository with 11K refs. When I switch from Git
+v2.32.0 to this patch, I see a 40% reduction in CPU time for Git, and
+65% for Gitaly (GitLab's Git RPC service).
+
+So using buffered IO not only saves syscalls in upload-pack, it also
+saves time in things that consume upload-pack's output.
+
+Helped-by: Jeff King <peff@peff.net>
+Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
+Signed-off-by: Junio C Hamano <gitster@pobox.com>
+---
+ ls-refs.c | 4 ++--
+ upload-pack.c | 11 ++++++++---
+ 2 files changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/ls-refs.c b/ls-refs.c
+index 88f6c3f60d..e6a2dbd962 100644
+--- a/ls-refs.c
++++ b/ls-refs.c
+@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
+ }
+
+ strbuf_addch(&refline, '\n');
+- packet_write(1, refline.buf, refline.len);
++ packet_fwrite(stdout, refline.buf, refline.len);
+
+ strbuf_release(&refline);
+ return 0;
+@@ -171,7 +171,7 @@ int ls_refs(struct repository *r, struct strvec *keys,
+ strvec_push(&data.prefixes, "");
+ for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
+ send_ref, &data, 0);
+- packet_flush(1);
++ packet_fflush(stdout);
+ strvec_clear(&data.prefixes);
+ return 0;
+ }
+diff --git a/upload-pack.c b/upload-pack.c
+index 297b76fcb4..0ed377b1fb 100644
+--- a/upload-pack.c
++++ b/upload-pack.c
+@@ -1207,7 +1207,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
+
+ format_symref_info(&symref_info, &data->symref);
+ format_session_id(&session_id, data);
+- packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
++ packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
+ oid_to_hex(oid), refname_nons,
+ 0, capabilities,
+ (data->allow_uor & ALLOW_TIP_SHA1) ?
+@@ -1223,11 +1223,11 @@ static int send_ref(const char *refname, const struct object_id *oid,
+ strbuf_release(&symref_info);
+ strbuf_release(&session_id);
+ } else {
+- packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
++ packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons);
+ }
+ capabilities = NULL;
+ if (!peel_iterated_oid(oid, &peeled))
+- packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
++ packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+ return 0;
+ }
+
+@@ -1348,6 +1348,11 @@ void upload_pack(struct upload_pack_options *options)
+ reset_timeout(data.timeout);
+ head_ref_namespaced(send_ref, &data);
+ for_each_namespaced_ref(send_ref, &data);
++ /*
++ * fflush stdout before calling advertise_shallow_grafts because send_ref
++ * uses stdio.
++ */
++ fflush_or_die(stdout);
+ advertise_shallow_grafts(1);
+ packet_flush(1);
+ } else {
+--
+2.32.0
+