From b7b021701cea6c9e54c826e1f6bc5faa9d3fef53 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Feb 2013 14:53:33 -0500 Subject: upload-pack: use get_sha1_hex to parse "shallow" lines When we receive a line like "shallow " from the client, we feed the part to get_sha1. This is a mistake, as the argument on a shallow line is defined by Documentation/technical/pack-protocol.txt to contain an "obj-id". This is never defined in the BNF, but it is clear from the text and from the other uses that it is meant to be a hex sha1, not an arbitrary identifier (and that is what fetch-pack has always sent). We should be using get_sha1_hex instead, which doesn't allow the client to request arbitrary junk like "HEAD@{yesterday}". Because this is just marking shallow objects, the client couldn't actually do anything interesting (like fetching objects from unreachable reflog entries), but we should keep our parsing tight to be on the safe side. Because get_sha1 is for the most part a superset of get_sha1_hex, in theory the only behavior change should be disallowing non-hex object references. However, there is one interesting exception: get_sha1 will only parse a 40-character hex sha1 if the string has exactly 40 characters, whereas get_sha1_hex will just eat the first 40 characters, leaving the rest. That means that current versions of git-upload-pack will not accept a "shallow" packet that has a trailing newline, even though the protocol documentation is clear that newlines are allowed (even encouraged) in non-binary parts of the protocol. This never mattered in practice, though, because fetch-pack, contrary to the protocol documentation, does not include a newline in its shallow lines. JGit follows its lead (though it correctly is strict on the parsing end about wanting a hex object id). We do not adjust fetch-pack to send newlines here, as it would break communication with older versions of git (and there is no actual benefit to doing so, except for consistency with other parts of the protocol). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index 30146a04f7..b058e8de50 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -596,7 +596,7 @@ static void receive_needs(void) if (!prefixcmp(line, "shallow ")) { unsigned char sha1[20]; struct object *object; - if (get_sha1(line + 8, sha1)) + if (get_sha1_hex(line + 8, sha1)) die("invalid shallow line: %s", line); object = parse_object(sha1); if (!object) -- cgit v1.2.3 From e58e57e49eb990e38df19628a744c71b44397ef1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Feb 2013 14:54:57 -0500 Subject: upload-pack: do not add duplicate objects to shallow list When the client tells us it has a shallow object via "shallow ", we make sure we have the object, mark it with a flag, then add it to a dynamic array of shallow objects. This means that a client can get us to allocate arbitrary amounts of memory just by flooding us with shallow lines (whether they have the objects or not). You can demonstrate it easily with: yes '0035shallow e83c5163316f89bfbde7d9ab23ca2e25604af290' | git-upload-pack git.git We already protect against duplicates in want lines by checking if our flag is already set; let's do the same thing here. Note that a client can still get us to allocate some amount of memory by marking every object in the repo as "shallow" (or "want"). But this at least bounds it with the number of objects in the repository, which is not under the control of an upload-pack client. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index b058e8de50..bd6f25519c 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -603,8 +603,10 @@ static void receive_needs(void) die("did not find object for %s", line); if (object->type != OBJ_COMMIT) die("invalid shallow object %s", sha1_to_hex(sha1)); - object->flags |= CLIENT_SHALLOW; - add_object_array(object, NULL, &shallows); + if (!(object->flags & CLIENT_SHALLOW)) { + object->flags |= CLIENT_SHALLOW; + add_object_array(object, NULL, &shallows); + } continue; } if (!prefixcmp(line, "deepen ")) { -- cgit v1.2.3 From 97a83fa839d818130bfeca060279c4c355036785 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Feb 2013 14:55:28 -0500 Subject: upload-pack: remove packet debugging harness If you set the GIT_DEBUG_SEND_PACK environment variable, upload-pack will dump lines it receives in the receive_needs phase to a descriptor. This debugging harness is a strict subset of what GIT_TRACE_PACKET can do. Let's just drop it in favor of that. A few tests used GIT_DEBUG_SEND_PACK to confirm which objects get sent; we have to adapt them to the new output format. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index bd6f25519c..0edc79e4a4 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -42,7 +42,6 @@ static unsigned int timeout; * otherwise maximum packet size (up to 65520 bytes). */ static int use_sideband; -static int debug_fd; static int advertise_refs; static int stateless_rpc; @@ -580,8 +579,6 @@ static void receive_needs(void) int has_non_tip = 0; shallow_nr = 0; - if (debug_fd) - write_str_in_full(debug_fd, "#S\n"); for (;;) { struct object *o; const char *features; @@ -590,8 +587,6 @@ static void receive_needs(void) reset_timeout(); if (!len) break; - if (debug_fd) - write_in_full(debug_fd, line, len); if (!prefixcmp(line, "shallow ")) { unsigned char sha1[20]; @@ -653,8 +648,6 @@ static void receive_needs(void) add_object_array(o, NULL, &want_obj); } } - if (debug_fd) - write_str_in_full(debug_fd, "#E\n"); /* * We have sent all our refs already, and the other end @@ -845,8 +838,6 @@ int main(int argc, char **argv) if (is_repository_shallow()) die("attempt to fetch/clone from a shallow repository"); git_config(upload_pack_config, NULL); - if (getenv("GIT_DEBUG_SEND_PACK")) - debug_fd = atoi(getenv("GIT_DEBUG_SEND_PACK")); upload_pack(); return 0; } -- cgit v1.2.3 From cdf4fb8e332f9641ac1ca95e999fe98251d31392 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Feb 2013 15:01:56 -0500 Subject: pkt-line: drop safe_write function This is just write_or_die by another name. The one distinction is that write_or_die will treat EPIPE specially by suppressing error messages. That's fine, as we die by SIGPIPE anyway (and in the off chance that it is disabled, write_or_die will simulate it). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index 0edc79e4a4..afc2d9279c 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -69,7 +69,8 @@ static ssize_t send_client_data(int fd, const char *data, ssize_t sz) xwrite(fd, data, sz); return sz; } - return safe_write(fd, data, sz); + write_or_die(fd, data, sz); + return sz; } static FILE *pack_pipe = NULL; -- cgit v1.2.3 From 819b929d3389f6007e1c469d9060e7876caeb97f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Feb 2013 15:02:28 -0500 Subject: pkt-line: teach packet_read_line to chomp newlines The packets sent during ref negotiation are all terminated by newline; even though the code to chomp these newlines is short, we end up doing it in a lot of places. This patch teaches packet_read_line to auto-chomp the trailing newline; this lets us get rid of a lot of inline chomping code. As a result, some call-sites which are not reading line-oriented data (e.g., when reading chunks of packfiles alongside sideband) transition away from packet_read_line to the generic packet_read interface. This patch converts all of the existing callsites. Since the function signature of packet_read_line does not change (but its behavior does), there is a possibility of new callsites being introduced in later commits, silently introducing an incompatibility. However, since a later patch in this series will change the signature, such a commit would have to be merged directly into this commit, not to the tip of the series; we can therefore ignore the issue. This is an internal cleanup and should produce no change of behavior in the normal case. However, there is one corner case to note. Callers of packet_read_line have never been able to tell the difference between a flush packet ("0000") and an empty packet ("0004"), as both cause packet_read_line to return a length of 0. Readers treat them identically, even though Documentation/technical/protocol-common.txt says we must not; it also says that implementations should not send an empty pkt-line. By stripping out the newline before the result gets to the caller, we will now treat the newline-only packet ("0005\n") the same as an empty packet, which in turn gets treated like a flush packet. In practice this doesn't matter, as neither empty nor newline-only packets are part of git's protocols (at least not for the line-oriented bits, and readers who are not expecting line-oriented packets will be calling packet_read directly, anyway). But even if we do decide to care about the distinction later, it is orthogonal to this patch. The right place to tighten would be to stop treating empty packets as flush packets, and this change does not make doing so any harder. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index afc2d9279c..6e6d166876 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -50,13 +50,6 @@ static void reset_timeout(void) alarm(timeout); } -static int strip(char *line, int len) -{ - if (len && line[len-1] == '\n') - line[--len] = 0; - return len; -} - static ssize_t send_client_data(int fd, const char *data, ssize_t sz) { if (use_sideband) @@ -447,7 +440,6 @@ static int get_common_commits(void) got_other = 0; continue; } - strip(line, len); if (!prefixcmp(line, "have ")) { switch (got_sha1(line+5, sha1)) { case -1: /* they have what we do not */ -- cgit v1.2.3 From 74543a0423c96130b3b07946c20b10735c3b5b15 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Feb 2013 15:02:57 -0500 Subject: pkt-line: provide a LARGE_PACKET_MAX static buffer Most of the callers of packet_read_line just read into a static 1000-byte buffer (callers which handle arbitrary binary data already use LARGE_PACKET_MAX). This works fine in practice, because: 1. The only variable-sized data in these lines is a ref name, and refs tend to be a lot shorter than 1000 characters. 2. When sending ref lines, git-core always limits itself to 1000 byte packets. However, the only limit given in the protocol specification in Documentation/technical/protocol-common.txt is LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in pack-protocol.txt, and then only describing what we write, not as a specific limit for readers. This patch lets us bump the 1000-byte limit to LARGE_PACKET_MAX. Even though git-core will never write a packet where this makes a difference, there are two good reasons to do this: 1. Other git implementations may have followed protocol-common.txt and used a larger maximum size. We don't bump into it in practice because it would involve very long ref names. 2. We may want to increase the 1000-byte limit one day. Since packets are transferred before any capabilities, it's difficult to do this in a backwards-compatible way. But if we bump the size of buffer the readers can handle, eventually older versions of git will be obsolete enough that we can justify bumping the writers, as well. We don't have plans to do this anytime soon, but there is no reason not to start the clock ticking now. Just bumping all of the reading bufs to LARGE_PACKET_MAX would waste memory. Instead, since most readers just read into a temporary buffer anyway, let's provide a single static buffer that all callers can use. We can further wrap this detail away by having the packet_read_line wrapper just use the buffer transparently and return a pointer to the static storage. That covers most of the cases, and the remaining ones already read into their own LARGE_PACKET_MAX buffers. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index 6e6d166876..98ddb69581 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -408,7 +408,6 @@ static int ok_to_give_up(void) static int get_common_commits(void) { - static char line[1000]; unsigned char sha1[20]; char last_hex[41]; int got_common = 0; @@ -418,10 +417,10 @@ static int get_common_commits(void) save_commit_buffer = 0; for (;;) { - int len = packet_read_line(0, line, sizeof(line)); + char *line = packet_read_line(0, NULL); reset_timeout(); - if (!len) { + if (!line) { if (multi_ack == 2 && got_common && !got_other && ok_to_give_up()) { sent_ready = 1; @@ -567,8 +566,7 @@ error: static void receive_needs(void) { struct object_array shallows = OBJECT_ARRAY_INIT; - static char line[1000]; - int len, depth = 0; + int depth = 0; int has_non_tip = 0; shallow_nr = 0; @@ -576,9 +574,9 @@ static void receive_needs(void) struct object *o; const char *features; unsigned char sha1_buf[20]; - len = packet_read_line(0, line, sizeof(line)); + char *line = packet_read_line(0, NULL); reset_timeout(); - if (!len) + if (!line) break; if (!prefixcmp(line, "shallow ")) { -- cgit v1.2.3