From 926f1dd95486b596de1f6b8e108053a870da0e18 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 6 Jan 2012 14:17:40 -0500 Subject: upload-pack: avoid parsing objects during ref advertisement When we advertise a ref, the first thing we do is parse the pointed-to object. This gives us two things: 1. a "struct object" we can use to store flags 2. the type of the object, so we know whether we need to dereference it as a tag Instead, we can just use lookup_unknown_object to get an object struct, and then fill in just the type field using sha1_object_info (which, in the case of packed files, can find the information without actually inflating the object data). This can save time if you have a large number of refs, and the client isn't actually going to request those refs (e.g., because most of them are already up-to-date). The downside is that we are no longer verifying objects that we advertise by fully parsing them (however, we do still know we actually have them, because sha1_object_info must find them to get the type). While we might fail to detect a corrupt object here, if the client actually fetches the object, we will parse (and verify) it then. On a repository with 120K refs, the advertisement portion of upload-pack goes from ~3.4s to 3.2s (the failure to speed up more is largely due to the fact that most of these refs are tags, which need dereferenced to find the tag destination anyway). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index 6f36f6255c..65cb0ff0fb 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -720,11 +720,14 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo static const char *capabilities = "multi_ack thin-pack side-band" " side-band-64k ofs-delta shallow no-progress" " include-tag multi_ack_detailed"; - struct object *o = parse_object(sha1); + struct object *o = lookup_unknown_object(sha1); const char *refname_nons = strip_namespace(refname); - if (!o) - die("git upload-pack: cannot find object %s:", sha1_to_hex(sha1)); + if (o->type == OBJ_NONE) { + o->type = sha1_object_info(sha1, NULL); + if (o->type < 0) + die("git upload-pack: cannot find object %s:", sha1_to_hex(sha1)); + } if (capabilities) packet_write(1, "%s %s%c%s%s\n", sha1_to_hex(sha1), refname_nons, @@ -738,6 +741,7 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo nr_our_refs++; } if (o->type == OBJ_TAG) { + o = parse_object(o->sha1); o = deref_tag(o, refname, 0); if (o) packet_write(1, "%s %s^{}\n", sha1_to_hex(o->sha1), refname_nons); -- cgit v1.2.3 From 90108a2441fb57f69c3f25d072d1952b306b77ab Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 6 Jan 2012 14:18:01 -0500 Subject: upload-pack: avoid parsing tag destinations When upload-pack advertises refs, it dereferences any tags it sees, and shows the resulting sha1 to the client. It does this by calling deref_tag. That function must load and parse each tag object to find the sha1 of the tagged object. However, it also ends up parsing the tagged object itself, which is not strictly necessary for upload-pack's use. Each tag produces two object loads (assuming it is not a recursive tag), when it could get away with only a single one. Dropping the second load halves the effort we spend. The downside is that we are no longer verifying the resulting object by loading it. In particular: 1. We never cross-check the "type" field given in the tag object with the type of the pointed-to object. If the tag says it points to a tag but doesn't, then we will keep peeling and realize the error. If the tag says it points to a non-tag but actually points to a tag, we will stop peeling and just advertise the pointed-to tag. 2. If we are missing the pointed-to object, we will not realize (because we never even look it up in the object db). However, both of these are errors in the object database, and both will be detected if a client actually requests the broken objects in question. So we are simply pushing the verification away from the advertising stage, and down to the actual fetching stage. On my test repo with 120K refs, this drops the time to advertise the refs from ~3.2s to ~2.0s. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'upload-pack.c') diff --git a/upload-pack.c b/upload-pack.c index 65cb0ff0fb..c01e161a9d 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -741,8 +741,7 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo nr_our_refs++; } if (o->type == OBJ_TAG) { - o = parse_object(o->sha1); - o = deref_tag(o, refname, 0); + o = deref_tag_noverify(o); if (o) packet_write(1, "%s %s^{}\n", sha1_to_hex(o->sha1), refname_nons); } -- cgit v1.2.3