diff options
author | Jacob Vosmaer <jacob@gitlab.com> | 2021-09-13 17:31:51 +0300 |
---|---|---|
committer | Jacob Vosmaer <jacob@gitlab.com> | 2021-09-15 10:15:09 +0300 |
commit | 2f6f81300a7f27324c691557b8ea5b15252fb501 (patch) | |
tree | 6f2d192d8729fa832ea9faa5084b946480193bbf | |
parent | ccd8ea3e1436d6464ac5f67e6bebd1ae317f4d50 (diff) |
Makefile: apply Git ref advertisement buffering patches
Prior to these patches, Git used unbuffered writes for upload-pack ref
advertisements, with one or two write syscalls per ref. In
repositories with many refs that adds up to a lot of syscalls. These
patches add stdio buffering for the ref advertisement writes.
Also see gitlab-com/gl-infra/scalability#1257.
Changelog: performance
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | _support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch | 126 | ||||
-rw-r--r-- | _support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch | 94 |
3 files changed, 226 insertions, 1 deletions
@@ -152,13 +152,18 @@ ifeq ($(origin GIT_PATCHES),undefined) # 'ps/update-ref-batch-flush' into next, 2021-09-10). GIT_PATCHES += 0015-update-ref-fix-streaming-of-status-updates.patch + # Buffer ref advertisement writes in upload-pack. Merged into next via + # c31d871c (Merge branch 'jv/pkt-line-batch' into next, 2021-09-10). + GIT_PATCHES += 0016-pkt-line-add-stdio-packet-write-functions.patch + GIT_PATCHES += 0017-upload-pack-use-stdio-in-send_ref-callbacks.patch + # This extra version has two intentions: first, it allows us to detect # capabilities of the command at runtime. Second, it helps admins to # discover which version is currently in use. As such, this version must be # incremented whenever a new patch is added above. When no patches exist, # then this should be undefined. Otherwise, it must be set to at least # `gl1` given that `0` is the "default" GitLab patch level. - GIT_EXTRA_VERSION := gl3 + GIT_EXTRA_VERSION := gl4 endif ifeq ($(origin GIT_BUILD_OPTIONS),undefined) diff --git a/_support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch b/_support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch new file mode 100644 index 000000000..ea4e221f3 --- /dev/null +++ b/_support/git-patches/0016-pkt-line-add-stdio-packet-write-functions.patch @@ -0,0 +1,126 @@ +From c7e2dd1a100170e1dbd204be68d54c0e230113df Mon Sep 17 00:00:00 2001 +From: Jacob Vosmaer <jacob@gitlab.com> +Date: Wed, 1 Sep 2021 14:54:41 +0200 +Subject: [PATCH 16/17] pkt-line: add stdio packet write functions + +This adds three new functions to pkt-line.c: packet_fwrite, +packet_fwrite_fmt and packet_fflush. Besides writing a pktline flush +packet, packet_fflush also flushes the stdio buffer of the stream. + +Helped-by: Patrick Steinhardt <ps@pks.im> +Helped-by: Jeff King <peff@peff.net> +Signed-off-by: Jacob Vosmaer <jacob@gitlab.com> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + cache.h | 2 ++ + pkt-line.c | 37 +++++++++++++++++++++++++++++++++++++ + pkt-line.h | 11 +++++++++++ + write-or-die.c | 12 ++++++++++++ + 4 files changed, 62 insertions(+) + +diff --git a/cache.h b/cache.h +index bd4869beee..dcf2454c3b 100644 +--- a/cache.h ++++ b/cache.h +@@ -1736,6 +1736,8 @@ extern const char *git_mailmap_blob; + void maybe_flush_or_die(FILE *, const char *); + __attribute__((format (printf, 2, 3))) + void fprintf_or_die(FILE *, const char *fmt, ...); ++void fwrite_or_die(FILE *f, const void *buf, size_t count); ++void fflush_or_die(FILE *f); + + #define COPY_READ_ERROR (-2) + #define COPY_WRITE_ERROR (-3) +diff --git a/pkt-line.c b/pkt-line.c +index 9f63eae2e6..de4a94b437 100644 +--- a/pkt-line.c ++++ b/pkt-line.c +@@ -243,6 +243,43 @@ void packet_write(int fd_out, const char *buf, size_t size) + die("%s", err.buf); + } + ++void packet_fwrite(FILE *f, const char *buf, size_t size) ++{ ++ size_t packet_size; ++ char header[4]; ++ ++ if (size > LARGE_PACKET_DATA_MAX) ++ die(_("packet write failed - data exceeds max packet size")); ++ ++ packet_trace(buf, size, 1); ++ packet_size = size + 4; ++ ++ set_packet_header(header, packet_size); ++ fwrite_or_die(f, header, 4); ++ fwrite_or_die(f, buf, size); ++} ++ ++void packet_fwrite_fmt(FILE *fh, const char *fmt, ...) ++{ ++ static struct strbuf buf = STRBUF_INIT; ++ va_list args; ++ ++ strbuf_reset(&buf); ++ ++ va_start(args, fmt); ++ format_packet(&buf, "", fmt, args); ++ va_end(args); ++ ++ fwrite_or_die(fh, buf.buf, buf.len); ++} ++ ++void packet_fflush(FILE *f) ++{ ++ packet_trace("0000", 4, 1); ++ fwrite_or_die(f, "0000", 4); ++ fflush_or_die(f); ++} ++ + void packet_buf_write(struct strbuf *buf, const char *fmt, ...) + { + va_list args; +diff --git a/pkt-line.h b/pkt-line.h +index 5af5f45687..82b95e4bdd 100644 +--- a/pkt-line.h ++++ b/pkt-line.h +@@ -35,6 +35,17 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format + int write_packetized_from_fd_no_flush(int fd_in, int fd_out); + int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out); + ++/* ++ * Stdio versions of packet_write functions. When mixing these with fd ++ * based functions, take care to call fflush(3) before doing fd writes or ++ * closing the fd. ++ */ ++void packet_fwrite(FILE *f, const char *buf, size_t size); ++void packet_fwrite_fmt(FILE *f, const char *fmt, ...) __attribute__((format (printf, 2, 3))); ++ ++/* packet_fflush writes a flush packet and flushes the stdio buffer of f */ ++void packet_fflush(FILE *f); ++ + /* + * Read a packetized line into the buffer, which must be at least size bytes + * long. The return value specifies the number of bytes read into the buffer. +diff --git a/write-or-die.c b/write-or-die.c +index d33e68f6ab..0b1ec8190b 100644 +--- a/write-or-die.c ++++ b/write-or-die.c +@@ -70,3 +70,15 @@ void write_or_die(int fd, const void *buf, size_t count) + die_errno("write error"); + } + } ++ ++void fwrite_or_die(FILE *f, const void *buf, size_t count) ++{ ++ if (fwrite(buf, 1, count, f) != count) ++ die_errno("fwrite error"); ++} ++ ++void fflush_or_die(FILE *f) ++{ ++ if (fflush(f)) ++ die_errno("fflush error"); ++} +-- +2.32.0 + diff --git a/_support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch b/_support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch new file mode 100644 index 000000000..83db05f80 --- /dev/null +++ b/_support/git-patches/0017-upload-pack-use-stdio-in-send_ref-callbacks.patch @@ -0,0 +1,94 @@ +From 66949334dbd9e0d36b0fa8e50ef120aa88e7c094 Mon Sep 17 00:00:00 2001 +From: Jacob Vosmaer <jacob@gitlab.com> +Date: Wed, 1 Sep 2021 14:54:42 +0200 +Subject: [PATCH 17/17] upload-pack: use stdio in send_ref callbacks + +In both protocol v0 and v2, upload-pack writes one pktline packet per +advertised ref to stdout. That means one or two write(2) syscalls per +ref. This is problematic if these writes become network sends with +high overhead. + +This commit changes both send_ref callbacks to use buffered IO using +stdio. + +To give an example of the impact: I set up a single-threaded loop that +calls ls-remote (with HTTP and protocol v2) on a local GitLab +instance, on a repository with 11K refs. When I switch from Git +v2.32.0 to this patch, I see a 40% reduction in CPU time for Git, and +65% for Gitaly (GitLab's Git RPC service). + +So using buffered IO not only saves syscalls in upload-pack, it also +saves time in things that consume upload-pack's output. + +Helped-by: Jeff King <peff@peff.net> +Signed-off-by: Jacob Vosmaer <jacob@gitlab.com> +Signed-off-by: Junio C Hamano <gitster@pobox.com> +--- + ls-refs.c | 4 ++-- + upload-pack.c | 11 ++++++++--- + 2 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/ls-refs.c b/ls-refs.c +index 88f6c3f60d..e6a2dbd962 100644 +--- a/ls-refs.c ++++ b/ls-refs.c +@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid, + } + + strbuf_addch(&refline, '\n'); +- packet_write(1, refline.buf, refline.len); ++ packet_fwrite(stdout, refline.buf, refline.len); + + strbuf_release(&refline); + return 0; +@@ -171,7 +171,7 @@ int ls_refs(struct repository *r, struct strvec *keys, + strvec_push(&data.prefixes, ""); + for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v, + send_ref, &data, 0); +- packet_flush(1); ++ packet_fflush(stdout); + strvec_clear(&data.prefixes); + return 0; + } +diff --git a/upload-pack.c b/upload-pack.c +index 297b76fcb4..0ed377b1fb 100644 +--- a/upload-pack.c ++++ b/upload-pack.c +@@ -1207,7 +1207,7 @@ static int send_ref(const char *refname, const struct object_id *oid, + + format_symref_info(&symref_info, &data->symref); + format_session_id(&session_id, data); +- packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", ++ packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", + oid_to_hex(oid), refname_nons, + 0, capabilities, + (data->allow_uor & ALLOW_TIP_SHA1) ? +@@ -1223,11 +1223,11 @@ static int send_ref(const char *refname, const struct object_id *oid, + strbuf_release(&symref_info); + strbuf_release(&session_id); + } else { +- packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons); ++ packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons); + } + capabilities = NULL; + if (!peel_iterated_oid(oid, &peeled)) +- packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); ++ packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); + return 0; + } + +@@ -1348,6 +1348,11 @@ void upload_pack(struct upload_pack_options *options) + reset_timeout(data.timeout); + head_ref_namespaced(send_ref, &data); + for_each_namespaced_ref(send_ref, &data); ++ /* ++ * fflush stdout before calling advertise_shallow_grafts because send_ref ++ * uses stdio. ++ */ ++ fflush_or_die(stdout); + advertise_shallow_grafts(1); + packet_flush(1); + } else { +-- +2.32.0 + |