From 468887f0f8a0a9e465737c3ad23cb40c8d690f2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kl=C3=B6tzke?= Date: Sat, 1 Jul 2023 22:57:02 +0200 Subject: ref-filter: handle nested tags in --points-at option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tags are dereferenced until reaching a different object type to handle nested tags, e.g. on checkout. In contrast, "git tag --points-at=..." fails to list such nested tags because only one level of indirection is obtained in filter_refs(). Implement the recursive dereferencing for the "--points-at" option when filtering refs to unify the behaviour. Signed-off-by: Jan Klötzke Signed-off-by: Junio C Hamano --- ref-filter.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index 60919f375f..7b17128bef 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2332,10 +2332,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, * of oids. If the given ref is a tag, check if the given tag points * at one of the oids in the given oid array. * NEEDSWORK: - * 1. Only a single level of indirection is obtained, we might want to - * change this to account for multiple levels (e.g. annotated tags - * pointing to annotated tags pointing to a commit.) - * 2. As the refs are cached we might know what refname peels to without + * As the refs are cached we might know what refname peels to without * the need to parse the object via parse_object(). peel_ref() might be a * more efficient alternative to obtain the pointee. */ @@ -2343,18 +2340,19 @@ static const struct object_id *match_points_at(struct oid_array *points_at, const struct object_id *oid, const char *refname) { - const struct object_id *tagged_oid = NULL; struct object *obj; if (oid_array_lookup(points_at, oid) >= 0) return oid; obj = parse_object(the_repository, oid); + while (obj && obj->type == OBJ_TAG) { + oid = get_tagged_oid((struct tag *)obj); + if (oid_array_lookup(points_at, oid) >= 0) + return oid; + obj = parse_object(the_repository, oid); + } if (!obj) die(_("malformed object at '%s'"), refname); - if (obj->type == OBJ_TAG) - tagged_oid = get_tagged_oid((struct tag *)obj); - if (tagged_oid && oid_array_lookup(points_at, tagged_oid) >= 0) - return tagged_oid; return NULL; } -- cgit v1.2.3 From b9584c5858799d5603851af5f0dbad5e7af29b22 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 2 Jul 2023 18:35:40 -0400 Subject: ref-filter: avoid parsing tagged objects in match_points_at() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we peel tags to check if they match a --points-at oid, we recursively parse the tagged object to see if it is also a tag. But since the tag itself tells us the type of the object it points to (and even gives us the appropriate object struct via its "tagged" member), we can use that directly. We do still have to make sure to call parse_tag() before looking at each tag. This is redundant for the outermost tag (since we did call parse_object() to find its type), but that's OK; parse_tag() is smart enough to make this a noop when the tag has already been parsed. In my clone of linux.git, with 782 tags (and only 3 non-tags), this yields a significant speedup (bringing us back where we were before the commit before this one started recursively dereferencing tags): Benchmark 1: ./git.old for-each-ref --points-at=HEAD --format="%(refname)" Time (mean ± σ): 20.3 ms ± 0.5 ms [User: 11.1 ms, System: 9.1 ms] Range (min … max): 19.6 ms … 21.5 ms 141 runs Benchmark 2: ./git.new for-each-ref --points-at=HEAD --format="%(refname)" Time (mean ± σ): 11.4 ms ± 0.2 ms [User: 6.3 ms, System: 5.0 ms] Range (min … max): 11.0 ms … 12.2 ms 250 runs Summary './git.new for-each-ref --points-at=HEAD --format="%(refname)"' ran 1.79 ± 0.05 times faster than './git.old for-each-ref --points-at=HEAD --format="%(refname)"' Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index 7b17128bef..2eb41accb3 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2346,10 +2346,18 @@ static const struct object_id *match_points_at(struct oid_array *points_at, return oid; obj = parse_object(the_repository, oid); while (obj && obj->type == OBJ_TAG) { - oid = get_tagged_oid((struct tag *)obj); + struct tag *tag = (struct tag *)obj; + + if (parse_tag(tag) < 0) { + obj = NULL; + break; + } + + oid = get_tagged_oid(tag); if (oid_array_lookup(points_at, oid) >= 0) return oid; - obj = parse_object(the_repository, oid); + + obj = tag->tagged; } if (!obj) die(_("malformed object at '%s'"), refname); -- cgit v1.2.3 From 870eb53ab20e9ff453e3b89b4927c154c2b7211a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 2 Jul 2023 18:37:47 -0400 Subject: ref-filter: avoid parsing non-tags in match_points_at() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When handling --points-at, we have to try to peel each ref to see if it's a tag that points at a requested oid. We start this process by calling parse_object() on the oid pointed to by each ref. The cost of parsing each object adds up, especially in an output that doesn't otherwise need to open the objects at all. Ideally we'd use peel_iterated_oid() here, which uses the cached information in the packed-refs file. But we can't, because our --points-at must match not only the fully peeled value, but any interim values (so if tag A points to tag B which points to commit C, we should match --points-at=B, but peel_iterated_oid() will only tell us about C). So the best we can do (absent changes to the packed-refs peel traits) is to avoid parsing non-tags. The obvious way to do that is to call oid_object_info() to check the type before parsing. But there are a few gotchas there, like checking if the object has already been parsed. Instead we can just tell parse_object() that we are OK skipping the hash check, which lets it turn on several optimizations. Commits can be loaded via the commit graph (so it's both fast and we have the benefit of the parsed data if we need it later at the output stage). Blobs are not loaded at all. Trees are still loaded, but it's rather rare to have a ref point directly to a tree (and since this is just an optimization, kicking in 99% of the time is OK). Even though we're paying for an extra lookup, the cost to avoid parsing the non-tags is a net benefit. In my git.git repository with 941 tags and 1440 other refs pointing to commits, this significantly cuts the runtime: Benchmark 1: ./git.old for-each-ref --points-at=HEAD Time (mean ± σ): 26.8 ms ± 0.5 ms [User: 24.5 ms, System: 2.2 ms] Range (min … max): 25.9 ms … 29.2 ms 107 runs Benchmark 2: ./git.new for-each-ref --points-at=HEAD Time (mean ± σ): 9.1 ms ± 0.3 ms [User: 6.8 ms, System: 2.2 ms] Range (min … max): 8.6 ms … 10.2 ms 308 runs Summary './git.new for-each-ref --points-at=HEAD' ran 2.96 ± 0.10 times faster than './git.old for-each-ref --points-at=HEAD' In a repository that is mostly annotated tags, we'd expect less improvement (we might still skip a few object loads, but that's balanced by the extra lookups). In my clone of linux.git, which has 782 tags and 3 branches, the run-time is about the same (it's actually ~1% faster on average after this patch, but that's within the run-to-run noise). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index 2eb41accb3..948a25335a 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2344,7 +2344,8 @@ static const struct object_id *match_points_at(struct oid_array *points_at, if (oid_array_lookup(points_at, oid) >= 0) return oid; - obj = parse_object(the_repository, oid); + obj = parse_object_with_flags(the_repository, oid, + PARSE_OBJECT_SKIP_HASH_CHECK); while (obj && obj->type == OBJ_TAG) { struct tag *tag = (struct tag *)obj; -- cgit v1.2.3 From d9e00621591549cd2d9989d290a7e0c63eadd03b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 2 Jul 2023 18:38:29 -0400 Subject: ref-filter: simplify return type of match_points_at We return the oid that matched, but the sole caller only cares whether we matched anything at all. This is mostly academic, since there's only one caller, but the lifetime of the returned pointer is not immediately clear. Sometimes it points to an oid in a tag struct, which should live forever. And sometimes to the oid passed in, which only lives as long as the each_ref_fn callback we're called from. Simplify this to a boolean return which is more direct and obvious. As a bonus, this lets us avoid the weird pattern of overwriting our "oid" parameter in the loop (since we now only refer to the tagged oid one time, and can just inline the call to get it). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index 948a25335a..e18e977b9e 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2330,20 +2330,22 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, /* * Given a ref (oid, refname), check if the ref belongs to the array * of oids. If the given ref is a tag, check if the given tag points - * at one of the oids in the given oid array. + * at one of the oids in the given oid array. Returns non-zero if a + * match is found. + * * NEEDSWORK: * As the refs are cached we might know what refname peels to without * the need to parse the object via parse_object(). peel_ref() might be a * more efficient alternative to obtain the pointee. */ -static const struct object_id *match_points_at(struct oid_array *points_at, - const struct object_id *oid, - const char *refname) +static int match_points_at(struct oid_array *points_at, + const struct object_id *oid, + const char *refname) { struct object *obj; if (oid_array_lookup(points_at, oid) >= 0) - return oid; + return 1; obj = parse_object_with_flags(the_repository, oid, PARSE_OBJECT_SKIP_HASH_CHECK); while (obj && obj->type == OBJ_TAG) { @@ -2354,15 +2356,14 @@ static const struct object_id *match_points_at(struct oid_array *points_at, break; } - oid = get_tagged_oid(tag); - if (oid_array_lookup(points_at, oid) >= 0) - return oid; + if (oid_array_lookup(points_at, get_tagged_oid(tag)) >= 0) + return 1; obj = tag->tagged; } if (!obj) die(_("malformed object at '%s'"), refname); - return NULL; + return 0; } /* -- cgit v1.2.3