From 4f33a6345f2bd6e47253d1dbefd01874d895ab2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Wed, 9 Mar 2022 16:01:38 +0000 Subject: list-objects: handle NULL function pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a caller to traverse_commit_list() specifies the options for the --objects flag but does not specify a show_object function pointer, the result is a segfault. This is currently visible by running 'git bundle create --objects HEAD'. We could fix this problem by supplying a no-op callback in builtin/bundle.c, but that only solves the problem for one builtin, leaving this segfault open for other callers. Replace all callers of the show_commit and show_object function pointers in list-objects.c to call helper functions show_commit() and show_object() which check that the given context has non-NULL functions before passing the necessary data. One extra benefit is that it reduces duplication due to passing ctx->show_data to every caller. Test that this segfault no longer occurs for 'git bundle'. Co-authored-by: Derrick Stolee Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'bundle.c') diff --git a/bundle.c b/bundle.c index a0bb687b0f..7ba60a573d 100644 --- a/bundle.c +++ b/bundle.c @@ -544,6 +544,8 @@ int create_bundle(struct repository *r, const char *path, die("revision walk setup failed"); bpi.fd = bundle_fd; bpi.pending = &revs_copy.pending; + + revs.blob_objects = revs.tree_objects = 0; traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); object_array_remove_duplicates(&revs_copy.pending); -- cgit v1.2.3 From 105c6f14ad34b417c1e78bc9a8704dcda7b059f2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:39 +0000 Subject: bundle: parse filter capability The v3 bundle format has capabilities, allowing newer versions of Git to create bundles with newer features. Older versions that do not understand these new capabilities will fail with a helpful warning. Create a new capability allowing Git to understand that the contained pack-file is filtered according to some object filter. Typically, this filter will be "blob:none" for a blobless partial clone. This change teaches Git to parse this capability, place its value in the bundle header, and demonstrate this understanding by adding a message to 'git bundle verify'. Since we will use gently_parse_list_objects_filter() outside of list-objects-filter-options.c, make it an external method and move its API documentation to before its declaration. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'bundle.c') diff --git a/bundle.c b/bundle.c index 7ba60a573d..41e75efab9 100644 --- a/bundle.c +++ b/bundle.c @@ -11,7 +11,7 @@ #include "run-command.h" #include "refs.h" #include "strvec.h" - +#include "list-objects-filter-options.h" static const char v2_bundle_signature[] = "# v2 git bundle\n"; static const char v3_bundle_signature[] = "# v3 git bundle\n"; @@ -33,6 +33,7 @@ void bundle_header_release(struct bundle_header *header) { string_list_clear(&header->prerequisites, 1); string_list_clear(&header->references, 1); + list_objects_filter_release(&header->filter); } static int parse_capability(struct bundle_header *header, const char *capability) @@ -45,6 +46,10 @@ static int parse_capability(struct bundle_header *header, const char *capability header->hash_algo = &hash_algos[algo]; return 0; } + if (skip_prefix(capability, "filter=", &arg)) { + parse_list_objects_filter(&header->filter, arg); + return 0; + } return error(_("unknown capability '%s'"), capability); } @@ -220,6 +225,8 @@ int verify_bundle(struct repository *r, req_nr = revs.pending.nr; setup_revisions(2, argv, &revs, NULL); + list_objects_filter_copy(&revs.filter, &header->filter); + if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); @@ -259,6 +266,12 @@ int verify_bundle(struct repository *r, r->nr), r->nr); list_refs(r, 0, NULL); + + if (header->filter.choice) { + printf_ln("The bundle uses this filter: %s", + list_objects_filter_spec(&header->filter)); + } + r = &header->prerequisites; if (!r->nr) { printf_ln(_("The bundle records a complete history.")); -- cgit v1.2.3 From f18b512bbba8ee508fec64698f581920bfa5b46d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:41 +0000 Subject: bundle: create filtered bundles A previous change allowed Git to parse bundles with the 'filter' capability. Now, teach Git to create bundles with this option. Some rearranging of code is required to get the option parsing in the correct spot. There are now two reasons why we might need capabilities (a new hash algorithm or an object filter) so that is pulled out into a place where we can check both at the same time. The --filter option is parsed as part of setup_revisions(), but it expected the --objects flag, too. That flag is somewhat implied by 'git bundle' because it creates a pack-file walking objects, but there is also a walk that walks the revision range expecting only commits. Make this parsing work by setting 'revs.tree_objects' and 'revs.blob_objects' before the call to setup_revisions(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 53 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 11 deletions(-) (limited to 'bundle.c') diff --git a/bundle.c b/bundle.c index 41e75efab9..9370a6e307 100644 --- a/bundle.c +++ b/bundle.c @@ -332,6 +332,9 @@ static int write_pack_data(int bundle_fd, struct rev_info *revs, struct strvec * "--stdout", "--thin", "--delta-base-offset", NULL); strvec_pushv(&pack_objects.args, pack_options->v); + if (revs->filter.choice) + strvec_pushf(&pack_objects.args, "--filter=%s", + list_objects_filter_spec(&revs->filter)); pack_objects.in = -1; pack_objects.out = bundle_fd; pack_objects.git_cmd = 1; @@ -499,10 +502,37 @@ int create_bundle(struct repository *r, const char *path, int bundle_to_stdout; int ref_count = 0; struct rev_info revs, revs_copy; - int min_version = the_hash_algo == &hash_algos[GIT_HASH_SHA1] ? 2 : 3; + int min_version = 2; struct bundle_prerequisites_info bpi; int i; + /* init revs to list objects for pack-objects later */ + save_commit_buffer = 0; + repo_init_revisions(r, &revs, NULL); + + /* + * Pre-initialize the '--objects' flag so we can parse a + * --filter option successfully. + */ + revs.tree_objects = revs.blob_objects = 1; + + argc = setup_revisions(argc, argv, &revs, NULL); + + /* + * Reasons to require version 3: + * + * 1. @object-format is required because our hash algorithm is not + * SHA1. + * 2. @filter is required because we parsed an object filter. + */ + if (the_hash_algo != &hash_algos[GIT_HASH_SHA1] || revs.filter.choice) + min_version = 3; + + if (argc > 1) { + error(_("unrecognized argument: %s"), argv[1]); + goto err; + } + bundle_to_stdout = !strcmp(path, "-"); if (bundle_to_stdout) bundle_fd = 1; @@ -525,17 +555,14 @@ int create_bundle(struct repository *r, const char *path, write_or_die(bundle_fd, capability, strlen(capability)); write_or_die(bundle_fd, the_hash_algo->name, strlen(the_hash_algo->name)); write_or_die(bundle_fd, "\n", 1); - } - - /* init revs to list objects for pack-objects later */ - save_commit_buffer = 0; - repo_init_revisions(r, &revs, NULL); - argc = setup_revisions(argc, argv, &revs, NULL); - - if (argc > 1) { - error(_("unrecognized argument: %s"), argv[1]); - goto err; + if (revs.filter.choice) { + const char *value = expand_list_objects_filter_spec(&revs.filter); + capability = "@filter="; + write_or_die(bundle_fd, capability, strlen(capability)); + write_or_die(bundle_fd, value, strlen(value)); + write_or_die(bundle_fd, "\n", 1); + } } /* save revs.pending in revs_copy for later use */ @@ -558,6 +585,10 @@ int create_bundle(struct repository *r, const char *path, bpi.fd = bundle_fd; bpi.pending = &revs_copy.pending; + /* + * Remove any object walking here. We only care about commits and + * tags here. The revs_copy has the right instances of these values. + */ revs.blob_objects = revs.tree_objects = 0; traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); object_array_remove_duplicates(&revs_copy.pending); -- cgit v1.2.3 From 4f39eb031af8690eb86eb781e43b10141dd47da9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:42 +0000 Subject: bundle: unbundle promisor packs In order to have a valid pack-file after unbundling a bundle that has the 'filter' capability, we need to generate a .promisor file. The bundle does not promise _where_ the objects can be found, but we can expect that these bundles will be unbundled in repositories with appropriate promisor remotes that can find those missing objects. Use the 'git index-pack --promisor=' option to create this .promisor file. Add "from-bundle" as the message to help anyone diagnose issues with these promisor packs. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'bundle.c') diff --git a/bundle.c b/bundle.c index 9370a6e307..56681c2113 100644 --- a/bundle.c +++ b/bundle.c @@ -620,6 +620,10 @@ int unbundle(struct repository *r, struct bundle_header *header, struct child_process ip = CHILD_PROCESS_INIT; strvec_pushl(&ip.args, "index-pack", "--fix-thin", "--stdin", NULL); + /* If there is a filter, then we need to create the promisor pack. */ + if (header->filter.choice) + strvec_push(&ip.args, "--promisor=from-bundle"); + if (extra_index_pack_args) { strvec_pushv(&ip.args, extra_index_pack_args->v); strvec_clear(extra_index_pack_args); -- cgit v1.2.3