From 9cb3cab56063754d9ee5bb27886c616ca1aec134 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:15 -0700 Subject: http: use --stdin when indexing dumb HTTP pack When Git fetches a pack using dumb HTTP, (among other things) it invokes index-pack on a ".pack.temp" packfile, specifying the filename as an argument. A future commit will require the aforementioned invocation of index-pack to also generate a "keep" file. To use this, we either have to use index-pack's naming convention (because --keep requires the pack's filename to end with ".pack") or to pass the pack through stdin. Of the two, it is simpler to pass the pack through stdin. Thus, teach http to pass --stdin to index-pack. As a bonus, the code is now simpler. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- http.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/http.c b/http.c index 62aa995245..39cbd56702 100644 --- a/http.c +++ b/http.c @@ -2270,9 +2270,9 @@ int finish_http_pack_request(struct http_pack_request *preq) { struct packed_git **lst; struct packed_git *p = preq->target; - char *tmp_idx; - size_t len; struct child_process ip = CHILD_PROCESS_INIT; + int tmpfile_fd; + int ret = 0; close_pack_index(p); @@ -2284,35 +2284,24 @@ int finish_http_pack_request(struct http_pack_request *preq) lst = &((*lst)->next); *lst = (*lst)->next; - if (!strip_suffix(preq->tmpfile.buf, ".pack.temp", &len)) - BUG("pack tmpfile does not end in .pack.temp?"); - tmp_idx = xstrfmt("%.*s.idx.temp", (int)len, preq->tmpfile.buf); + tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY); argv_array_push(&ip.args, "index-pack"); - argv_array_pushl(&ip.args, "-o", tmp_idx, NULL); - argv_array_push(&ip.args, preq->tmpfile.buf); + argv_array_push(&ip.args, "--stdin"); ip.git_cmd = 1; - ip.no_stdin = 1; + ip.in = tmpfile_fd; ip.no_stdout = 1; if (run_command(&ip)) { - unlink(preq->tmpfile.buf); - unlink(tmp_idx); - free(tmp_idx); - return -1; - } - - unlink(sha1_pack_index_name(p->hash)); - - if (finalize_object_file(preq->tmpfile.buf, sha1_pack_name(p->hash)) - || finalize_object_file(tmp_idx, sha1_pack_index_name(p->hash))) { - free(tmp_idx); - return -1; + ret = -1; + goto cleanup; } install_packed_git(the_repository, p); - free(tmp_idx); - return 0; +cleanup: + close(tmpfile_fd); + unlink(preq->tmpfile.buf); + return ret; } struct http_pack_request *new_http_pack_request( -- cgit v1.2.3 From eb05349247415992644fc63ba0cf0c4821d4eef2 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:16 -0700 Subject: http: refactor finish_http_pack_request() finish_http_pack_request() does multiple tasks, including some housekeeping on a struct packed_git - (1) closing its index, (2) removing it from a list, and (3) installing it. These concerns are independent of fetching a pack through HTTP: they are there only because (1) the calling code opens the pack's index before deciding to fetch it, (2) the calling code maintains a list of packfiles that can be fetched, and (3) the calling code fetches it in order to make use of its objects in the same process. In preparation for a subsequent commit, which adds a feature that does not need any of this housekeeping, remove (1), (2), and (3) from finish_http_pack_request(). (2) and (3) are now done by a helper function, and (1) is the responsibility of the caller (in this patch, done closer to the point where the pack index is opened). Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- http-push.c | 8 ++++++-- http-walker.c | 5 +++-- http.c | 31 ++++++++++++++++--------------- http.h | 13 ++++++++++--- 4 files changed, 35 insertions(+), 22 deletions(-) diff --git a/http-push.c b/http-push.c index 822f326599..ac7868ffee 100644 --- a/http-push.c +++ b/http-push.c @@ -117,6 +117,7 @@ enum transfer_state { struct transfer_request { struct object *obj; + struct packed_git *target; char *url; char *dest; struct remote_lock *lock; @@ -314,17 +315,18 @@ static void start_fetch_packed(struct transfer_request *request) release_request(request); return; } + close_pack_index(target); + request->target = target; fprintf(stderr, "Fetching pack %s\n", hash_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", oid_to_hex(&request->obj->oid)); - preq = new_http_pack_request(target, repo->url); + preq = new_http_pack_request(target->hash, repo->url); if (preq == NULL) { repo->can_update_info_refs = 0; return; } - preq->lst = &repo->packs; /* Make sure there isn't another open request for this pack */ while (check_request) { @@ -597,6 +599,8 @@ static void finish_request(struct transfer_request *request) } if (fail) repo->can_update_info_refs = 0; + else + http_install_packfile(request->target, &repo->packs); release_request(request); } } diff --git a/http-walker.c b/http-walker.c index fe15e325fa..4fb1235cd4 100644 --- a/http-walker.c +++ b/http-walker.c @@ -439,6 +439,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne target = find_sha1_pack(sha1, repo->packs); if (!target) return -1; + close_pack_index(target); if (walker->get_verbosely) { fprintf(stderr, "Getting pack %s\n", @@ -447,10 +448,9 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne hash_to_hex(sha1)); } - preq = new_http_pack_request(target, repo->base); + preq = new_http_pack_request(target->hash, repo->base); if (preq == NULL) goto abort; - preq->lst = &repo->packs; preq->slot->results = &results; if (start_active_slot(preq->slot)) { @@ -469,6 +469,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne release_http_pack_request(preq); if (ret) return ret; + http_install_packfile(target, &repo->packs); return 0; diff --git a/http.c b/http.c index 39cbd56702..4f6e1fb018 100644 --- a/http.c +++ b/http.c @@ -2268,22 +2268,13 @@ void release_http_pack_request(struct http_pack_request *preq) int finish_http_pack_request(struct http_pack_request *preq) { - struct packed_git **lst; - struct packed_git *p = preq->target; struct child_process ip = CHILD_PROCESS_INIT; int tmpfile_fd; int ret = 0; - close_pack_index(p); - fclose(preq->packfile); preq->packfile = NULL; - lst = preq->lst; - while (*lst != p) - lst = &((*lst)->next); - *lst = (*lst)->next; - tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY); argv_array_push(&ip.args, "index-pack"); @@ -2297,15 +2288,26 @@ int finish_http_pack_request(struct http_pack_request *preq) goto cleanup; } - install_packed_git(the_repository, p); cleanup: close(tmpfile_fd); unlink(preq->tmpfile.buf); return ret; } +void http_install_packfile(struct packed_git *p, + struct packed_git **list_to_remove_from) +{ + struct packed_git **lst = list_to_remove_from; + + while (*lst != p) + lst = &((*lst)->next); + *lst = (*lst)->next; + + install_packed_git(the_repository, p); +} + struct http_pack_request *new_http_pack_request( - struct packed_git *target, const char *base_url) + const unsigned char *packed_git_hash, const char *base_url) { off_t prev_posn = 0; struct strbuf buf = STRBUF_INIT; @@ -2313,14 +2315,13 @@ struct http_pack_request *new_http_pack_request( preq = xcalloc(1, sizeof(*preq)); strbuf_init(&preq->tmpfile, 0); - preq->target = target; end_url_with_slash(&buf, base_url); strbuf_addf(&buf, "objects/pack/pack-%s.pack", - hash_to_hex(target->hash)); + hash_to_hex(packed_git_hash)); preq->url = strbuf_detach(&buf, NULL); - strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(target->hash)); + strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash)); preq->packfile = fopen(preq->tmpfile.buf, "a"); if (!preq->packfile) { error("Unable to open local file %s for pack", @@ -2344,7 +2345,7 @@ struct http_pack_request *new_http_pack_request( if (http_is_verbose) fprintf(stderr, "Resuming fetch of pack %s at byte %"PRIuMAX"\n", - hash_to_hex(target->hash), + hash_to_hex(packed_git_hash), (uintmax_t)prev_posn); http_opt_request_remainder(preq->slot->curl, prev_posn); } diff --git a/http.h b/http.h index 5e0ad724f9..bbc6b070f1 100644 --- a/http.h +++ b/http.h @@ -216,18 +216,25 @@ int http_get_info_packs(const char *base_url, struct http_pack_request { char *url; - struct packed_git *target; - struct packed_git **lst; FILE *packfile; struct strbuf tmpfile; struct active_request_slot *slot; }; struct http_pack_request *new_http_pack_request( - struct packed_git *target, const char *base_url); + const unsigned char *packed_git_hash, const char *base_url); int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); +/* + * Remove p from the given list, and invoke install_packed_git() on it. + * + * This is a convenience function for users that have obtained a list of packs + * from http_get_info_packs() and have chosen a specific pack to fetch. + */ +void http_install_packfile(struct packed_git *p, + struct packed_git **list_to_remove_from); + /* Helpers for fetching object */ struct http_object_request { char *url; -- cgit v1.2.3 From 8e6adb69e18b18de72f0114d153b47bed4560560 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:17 -0700 Subject: http-fetch: refactor into function cmd_main() in http-fetch.c will grow in a future patch, so refactor the HTTP walking part into its own function. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- http-fetch.c | 69 ++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/http-fetch.c b/http-fetch.c index a32ac118d9..e538174bde 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -7,16 +7,49 @@ static const char http_fetch_usage[] = "git http-fetch " "[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url"; -int cmd_main(int argc, const char **argv) +static int fetch_using_walker(const char *raw_url, int get_verbosely, + int get_recover, int commits, char **commit_id, + const char **write_ref, int commits_on_stdin) { + char *url = NULL; struct walker *walker; + int rc; + + str_end_url_with_slash(raw_url, &url); + + http_init(NULL, url, 0); + + walker = get_http_walker(url); + walker->get_verbosely = get_verbosely; + walker->get_recover = get_recover; + walker->get_progress = 0; + + rc = walker_fetch(walker, commits, commit_id, write_ref, url); + + if (commits_on_stdin) + walker_targets_free(commits, commit_id, write_ref); + + if (walker->corrupt_object_found) { + fprintf(stderr, +"Some loose object were found to be corrupt, but they might be just\n" +"a false '404 Not Found' error message sent with incorrect HTTP\n" +"status code. Suggest running 'git fsck'.\n"); + } + + walker_free(walker); + http_cleanup(); + free(url); + + return rc; +} + +int cmd_main(int argc, const char **argv) +{ int commits_on_stdin = 0; int commits; const char **write_ref = NULL; char **commit_id; - char *url = NULL; int arg = 1; - int rc = 0; int get_verbosely = 0; int get_recover = 0; @@ -47,34 +80,14 @@ int cmd_main(int argc, const char **argv) commits = 1; } - if (argv[arg]) - str_end_url_with_slash(argv[arg], &url); - setup_git_directory(); git_config(git_default_config, NULL); - http_init(NULL, url, 0); - walker = get_http_walker(url); - walker->get_verbosely = get_verbosely; - walker->get_recover = get_recover; - - rc = walker_fetch(walker, commits, commit_id, write_ref, url); - - if (commits_on_stdin) - walker_targets_free(commits, commit_id, write_ref); + if (!argv[arg]) + BUG("must have one arg remaining"); - if (walker->corrupt_object_found) { - fprintf(stderr, -"Some loose object were found to be corrupt, but they might be just\n" -"a false '404 Not Found' error message sent with incorrect HTTP\n" -"status code. Suggest running 'git fsck'.\n"); - } - - walker_free(walker); - http_cleanup(); - - free(url); - - return rc; + return fetch_using_walker(argv[arg], get_verbosely, get_recover, + commits, commit_id, write_ref, + commits_on_stdin); } -- cgit v1.2.3 From 8d5d2a34df4f82cd9cce913fa25f3a3c2c07d126 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:18 -0700 Subject: http-fetch: support fetching packfiles by URL Teach http-fetch the ability to download packfiles directly, given a URL, and to verify them. The http_pack_request suite has been augmented with a function that takes a URL directly. With this function, the hash is only used to determine the name of the temporary file. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/git-http-fetch.txt | 9 +++++- http-fetch.c | 63 +++++++++++++++++++++++++++++++++------- http.c | 28 +++++++++++++----- http.h | 11 +++++++ t/t5550-http-fetch-dumb.sh | 30 +++++++++++++++++++ 5 files changed, 123 insertions(+), 18 deletions(-) diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt index 666b042679..4deb4893f5 100644 --- a/Documentation/git-http-fetch.txt +++ b/Documentation/git-http-fetch.txt @@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP SYNOPSIS -------- [verse] -'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] +'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile= | ] DESCRIPTION ----------- @@ -40,6 +40,13 @@ commit-id:: ['\t'] +--packfile=:: + Instead of a commit id on the command line (which is not expected in + this case), 'git http-fetch' fetches the packfile directly at the given + URL and uses index-pack to generate corresponding .idx and .keep files. + The hash is used to determine the name of the temporary file and is + arbitrary. The output of index-pack is printed to stdout. + --recover:: Verify that everything reachable from target is fetched. Used after an earlier fetch is interrupted. diff --git a/http-fetch.c b/http-fetch.c index e538174bde..1df376e745 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -5,7 +5,7 @@ #include "walker.h" static const char http_fetch_usage[] = "git http-fetch " -"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url"; +"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url"; static int fetch_using_walker(const char *raw_url, int get_verbosely, int get_recover, int commits, char **commit_id, @@ -43,6 +43,37 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely, return rc; } +static void fetch_single_packfile(struct object_id *packfile_hash, + const char *url) { + struct http_pack_request *preq; + struct slot_results results; + int ret; + + http_init(NULL, url, 0); + + preq = new_direct_http_pack_request(packfile_hash->hash, xstrdup(url)); + if (preq == NULL) + die("couldn't create http pack request"); + preq->slot->results = &results; + preq->generate_keep = 1; + + if (start_active_slot(preq->slot)) { + run_active_slot(preq->slot); + if (results.curl_result != CURLE_OK) { + die("Unable to get pack file %s\n%s", preq->url, + curl_errorstr); + } + } else { + die("Unable to start request"); + } + + if ((ret = finish_http_pack_request(preq))) + die("finish_http_pack_request gave result %d", ret); + + release_http_pack_request(preq); + http_cleanup(); +} + int cmd_main(int argc, const char **argv) { int commits_on_stdin = 0; @@ -52,8 +83,12 @@ int cmd_main(int argc, const char **argv) int arg = 1; int get_verbosely = 0; int get_recover = 0; + int packfile = 0; + struct object_id packfile_hash; while (arg < argc && argv[arg][0] == '-') { + const char *p; + if (argv[arg][1] == 't') { } else if (argv[arg][1] == 'c') { } else if (argv[arg][1] == 'a') { @@ -68,25 +103,33 @@ int cmd_main(int argc, const char **argv) get_recover = 1; } else if (!strcmp(argv[arg], "--stdin")) { commits_on_stdin = 1; + } else if (skip_prefix(argv[arg], "--packfile=", &p)) { + const char *end; + + packfile = 1; + if (parse_oid_hex(p, &packfile_hash, &end) || *end) + die(_("argument to --packfile must be a valid hash (got '%s')"), p); } arg++; } - if (argc != arg + 2 - commits_on_stdin) + if (argc != arg + 2 - (commits_on_stdin || packfile)) usage(http_fetch_usage); - if (commits_on_stdin) { - commits = walker_targets_stdin(&commit_id, &write_ref); - } else { - commit_id = (char **) &argv[arg++]; - commits = 1; - } setup_git_directory(); git_config(git_default_config, NULL); - if (!argv[arg]) - BUG("must have one arg remaining"); + if (packfile) { + fetch_single_packfile(&packfile_hash, argv[arg]); + return 0; + } + if (commits_on_stdin) { + commits = walker_targets_stdin(&commit_id, &write_ref); + } else { + commit_id = (char **) &argv[arg++]; + commits = 1; + } return fetch_using_walker(argv[arg], get_verbosely, get_recover, commits, commit_id, write_ref, commits_on_stdin); diff --git a/http.c b/http.c index 4f6e1fb018..3aa0fa9fe6 100644 --- a/http.c +++ b/http.c @@ -2281,7 +2281,13 @@ int finish_http_pack_request(struct http_pack_request *preq) argv_array_push(&ip.args, "--stdin"); ip.git_cmd = 1; ip.in = tmpfile_fd; - ip.no_stdout = 1; + if (preq->generate_keep) { + argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX, + (uintmax_t)getpid()); + ip.out = 0; + } else { + ip.no_stdout = 1; + } if (run_command(&ip)) { ret = -1; @@ -2307,19 +2313,27 @@ void http_install_packfile(struct packed_git *p, } struct http_pack_request *new_http_pack_request( - const unsigned char *packed_git_hash, const char *base_url) + const unsigned char *packed_git_hash, const char *base_url) { + + struct strbuf buf = STRBUF_INIT; + + end_url_with_slash(&buf, base_url); + strbuf_addf(&buf, "objects/pack/pack-%s.pack", + hash_to_hex(packed_git_hash)); + return new_direct_http_pack_request(packed_git_hash, + strbuf_detach(&buf, NULL)); +} + +struct http_pack_request *new_direct_http_pack_request( + const unsigned char *packed_git_hash, char *url) { off_t prev_posn = 0; - struct strbuf buf = STRBUF_INIT; struct http_pack_request *preq; preq = xcalloc(1, sizeof(*preq)); strbuf_init(&preq->tmpfile, 0); - end_url_with_slash(&buf, base_url); - strbuf_addf(&buf, "objects/pack/pack-%s.pack", - hash_to_hex(packed_git_hash)); - preq->url = strbuf_detach(&buf, NULL); + preq->url = url; strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash)); preq->packfile = fopen(preq->tmpfile.buf, "a"); diff --git a/http.h b/http.h index bbc6b070f1..dc49c60165 100644 --- a/http.h +++ b/http.h @@ -216,6 +216,15 @@ int http_get_info_packs(const char *base_url, struct http_pack_request { char *url; + + /* + * If this is true, finish_http_pack_request() will pass "--keep" to + * index-pack, resulting in the creation of a keep file, and will not + * suppress its stdout (that is, the "keep\t\n" line will be + * printed to stdout). + */ + unsigned generate_keep : 1; + FILE *packfile; struct strbuf tmpfile; struct active_request_slot *slot; @@ -223,6 +232,8 @@ struct http_pack_request { struct http_pack_request *new_http_pack_request( const unsigned char *packed_git_hash, const char *base_url); +struct http_pack_request *new_direct_http_pack_request( + const unsigned char *packed_git_hash, char *url); int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index 50485300eb..ca2e8af022 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -199,6 +199,28 @@ test_expect_success 'fetch packed objects' ' git clone $HTTPD_URL/dumb/repo_pack.git ' +test_expect_success 'http-fetch --packfile' ' + # Arbitrary hash. Use rev-parse so that we get one of the correct + # length. + ARBITRARY=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) && + + git init packfileclient && + p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) && + git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out && + + grep "^keep.[0-9a-f]\{16,\}$" out && + cut -c6- out >packhash && + + # Ensure that the expected files are generated + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).pack" && + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).idx" && + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).keep" && + + # Ensure that it has the HEAD of repo_pack, at least + HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) && + git -C packfileclient cat-file -e "$HASH" +' + test_expect_success 'fetch notices corrupt pack' ' cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && @@ -214,6 +236,14 @@ test_expect_success 'fetch notices corrupt pack' ' ) ' +test_expect_success 'http-fetch --packfile with corrupt pack' ' + rm -rf packfileclient && + git init packfileclient && + p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) && + test_must_fail git -C packfileclient http-fetch --packfile \ + "$HTTPD_URL"/dumb/repo_bad1.git/$p +' + test_expect_success 'fetch notices corrupt idx' ' cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && -- cgit v1.2.3 From fd194dd56ad4c21a561bce4182a61c38bdf4588c Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:19 -0700 Subject: Documentation: order protocol v2 sections The current C Git implementation expects Git servers to follow a specific order of sections when transmitting protocol v2 responses, but this is not explicit in the documentation. Make the order explicit. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/technical/protocol-v2.txt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 7e3766cafb..995f07481e 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -325,11 +325,11 @@ included in the client's request: The response of `fetch` is broken into a number of sections separated by delimiter packets (0001), with each section beginning with its section -header. +header. Most sections are sent only when the packfile is sent. - output = *section - section = (acknowledgments | shallow-info | wanted-refs | packfile) - (flush-pkt | delim-pkt) + output = acknowledgements flush-pkt | + [acknowledgments delim-pkt] [shallow-info delim-pkt] + [wanted-refs delim-pkt] packfile flush-pkt acknowledgments = PKT-LINE("acknowledgments" LF) (nak | *ack) @@ -351,9 +351,10 @@ header. *PKT-LINE(%x01-03 *%x00-ff) acknowledgments section - * If the client determines that it is finished with negotiations - by sending a "done" line, the acknowledgments sections MUST be - omitted from the server's response. + * If the client determines that it is finished with negotiations by + sending a "done" line (thus requiring the server to send a packfile), + the acknowledgments sections MUST be omitted from the server's + response. * Always begins with the section header "acknowledgments" @@ -404,9 +405,6 @@ header. which the client has not indicated was shallow as a part of its request. - * This section is only included if a packfile section is also - included in the response. - wanted-refs section * This section is only included if the client has requested a ref using a 'want-ref' line and if a packfile section is also -- cgit v1.2.3 From cd8402e0fd8cfc0ec9fb10e22ffb6aabd992eae1 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:20 -0700 Subject: Documentation: add Packfile URIs design doc Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/technical/packfile-uri.txt | 78 ++++++++++++++++++++++++++++++++ Documentation/technical/protocol-v2.txt | 32 ++++++++++++- 2 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 Documentation/technical/packfile-uri.txt diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt new file mode 100644 index 0000000000..318713abc3 --- /dev/null +++ b/Documentation/technical/packfile-uri.txt @@ -0,0 +1,78 @@ +Packfile URIs +============= + +This feature allows servers to serve part of their packfile response as URIs. +This allows server designs that improve scalability in bandwidth and CPU usage +(for example, by serving some data through a CDN), and (in the future) provides +some measure of resumability to clients. + +This feature is available only in protocol version 2. + +Protocol +-------- + +The server advertises the `packfile-uris` capability. + +If the client then communicates which protocols (HTTPS, etc.) it supports with +a `packfile-uris` argument, the server MAY send a `packfile-uris` section +directly before the `packfile` section (right after `wanted-refs` if it is +sent) containing URIs of any of the given protocols. The URIs point to +packfiles that use only features that the client has declared that it supports +(e.g. ofs-delta and thin-pack). See protocol-v2.txt for the documentation of +this section. + +Clients should then download and index all the given URIs (in addition to +downloading and indexing the packfile given in the `packfile` section of the +response) before performing the connectivity check. + +Server design +------------- + +The server can be trivially made compatible with the proposed protocol by +having it advertise `packfile-uris`, tolerating the client sending +`packfile-uris`, and never sending any `packfile-uris` section. But we should +include some sort of non-trivial implementation in the Minimum Viable Product, +at least so that we can test the client. + +This is the implementation: a feature, marked experimental, that allows the +server to be configured by one or more `uploadpack.blobPackfileUri= +` entries. Whenever the list of objects to be sent is assembled, all such +blobs are excluded, replaced with URIs. The client will download those URIs, +expecting them to each point to packfiles containing single blobs. + +Client design +------------- + +The client has a config variable `fetch.uriprotocols` that determines which +protocols the end user is willing to use. By default, this is empty. + +When the client downloads the given URIs, it should store them with "keep" +files, just like it does with the packfile in the `packfile` section. These +additional "keep" files can only be removed after the refs have been updated - +just like the "keep" file for the packfile in the `packfile` section. + +The division of work (initial fetch + additional URIs) introduces convenient +points for resumption of an interrupted clone - such resumption can be done +after the Minimum Viable Product (see "Future work"). + +Future work +----------- + +The protocol design allows some evolution of the server and client without any +need for protocol changes, so only a small-scoped design is included here to +form the MVP. For example, the following can be done: + + * On the server, more sophisticated means of excluding objects (e.g. by + specifying a commit to represent that commit and all objects that it + references). + * On the client, resumption of clone. If a clone is interrupted, information + could be recorded in the repository's config and a "clone-resume" command + can resume the clone in progress. (Resumption of subsequent fetches is more + difficult because that must deal with the user wanting to use the repository + even after the fetch was interrupted.) + +There are some possible features that will require a change in protocol: + + * Additional HTTP headers (e.g. authentication) + * Byte range support + * Different file formats referenced by URIs (e.g. raw object) diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 995f07481e..f9f4e4ddd0 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -323,13 +323,26 @@ included in the client's request: indicating its sideband (1, 2, or 3), and the server may send "0005\2" (a PKT-LINE of sideband 2 with no payload) as a keepalive packet. +If the 'packfile-uris' feature is advertised, the following argument +can be included in the client's request as well as the potential +addition of the 'packfile-uris' section in the server's response as +explained below. + + packfile-uris + Indicates to the server that the client is willing to receive + URIs of any of the given protocols in place of objects in the + sent packfile. Before performing the connectivity check, the + client should download from all given URIs. Currently, the + protocols supported are "http" and "https". + The response of `fetch` is broken into a number of sections separated by delimiter packets (0001), with each section beginning with its section header. Most sections are sent only when the packfile is sent. output = acknowledgements flush-pkt | [acknowledgments delim-pkt] [shallow-info delim-pkt] - [wanted-refs delim-pkt] packfile flush-pkt + [wanted-refs delim-pkt] [packfile-uris delim-pkt] + packfile flush-pkt acknowledgments = PKT-LINE("acknowledgments" LF) (nak | *ack) @@ -347,6 +360,9 @@ header. Most sections are sent only when the packfile is sent. *PKT-LINE(wanted-ref LF) wanted-ref = obj-id SP refname + packfile-uris = PKT-LINE("packfile-uris" LF) *packfile-uri + packfile-uri = PKT-LINE(40*(HEXDIGIT) SP *%x20-ff LF) + packfile = PKT-LINE("packfile" LF) *PKT-LINE(%x01-03 *%x00-ff) @@ -418,6 +434,20 @@ header. Most sections are sent only when the packfile is sent. * The server MUST NOT send any refs which were not requested using 'want-ref' lines. + packfile-uris section + * This section is only included if the client sent + 'packfile-uris' and the server has at least one such URI to + send. + + * Always begins with the section header "packfile-uris". + + * For each URI the server sends, it sends a hash of the pack's + contents (as output by git index-pack) followed by the URI. + + * The hashes are 40 hex characters long. When Git upgrades to a new + hash algorithm, this might need to be updated. (It should match + whatever index-pack outputs after "pack\t" or "keep\t". + packfile section * This section is only included if the client has sent 'want' lines in its request and either requested that no more -- cgit v1.2.3 From acaaca7d7030bb349441da10bf9ca6917f30cbb2 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:21 -0700 Subject: upload-pack: refactor reading of pack-objects out Subsequent patches will change how the output of pack-objects is processed, so extract that processing into its own function. Currently, at most 1 character can be buffered (in the "buffered" local variable). One of those patches will require a larger buffer, so replace that "buffered" local variable with a buffer array. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- upload-pack.c | 84 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index bc7e3ca19d..da1f749620 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -173,13 +173,52 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data) return 0; } +struct output_state { + char buffer[8193]; + int used; +}; + +static int relay_pack_data(int pack_objects_out, struct output_state *os, + int use_sideband) +{ + /* + * We keep the last byte to ourselves + * in case we detect broken rev-list, so that we + * can leave the stream corrupted. This is + * unfortunate -- unpack-objects would happily + * accept a valid packdata with trailing garbage, + * so appending garbage after we pass all the + * pack data is not good enough to signal + * breakage to downstream. + */ + ssize_t readsz; + + readsz = xread(pack_objects_out, os->buffer + os->used, + sizeof(os->buffer) - os->used); + if (readsz < 0) { + return readsz; + } + os->used += readsz; + + if (os->used > 1) { + send_client_data(1, os->buffer, os->used - 1, use_sideband); + os->buffer[0] = os->buffer[os->used - 1]; + os->used = 1; + } else { + send_client_data(1, os->buffer, os->used, use_sideband); + os->used = 0; + } + + return readsz; +} + static void create_pack_file(struct upload_pack_data *pack_data) { struct child_process pack_objects = CHILD_PROCESS_INIT; - char data[8193], progress[128]; + struct output_state output_state = { { 0 } }; + char progress[128]; char abort_msg[] = "aborting due to possible repository " "corruption on the remote side."; - int buffered = -1; ssize_t sz; int i; FILE *pipe_fd; @@ -312,40 +351,16 @@ static void create_pack_file(struct upload_pack_data *pack_data) continue; } if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) { - /* Data ready; we keep the last byte to ourselves - * in case we detect broken rev-list, so that we - * can leave the stream corrupted. This is - * unfortunate -- unpack-objects would happily - * accept a valid packdata with trailing garbage, - * so appending garbage after we pass all the - * pack data is not good enough to signal - * breakage to downstream. - */ - char *cp = data; - ssize_t outsz = 0; - if (0 <= buffered) { - *cp++ = buffered; - outsz++; - } - sz = xread(pack_objects.out, cp, - sizeof(data) - outsz); - if (0 < sz) - ; - else if (sz == 0) { + int result = relay_pack_data(pack_objects.out, + &output_state, + pack_data->use_sideband); + + if (result == 0) { close(pack_objects.out); pack_objects.out = -1; - } - else + } else if (result < 0) { goto fail; - sz += outsz; - if (1 < sz) { - buffered = data[sz-1] & 0xFF; - sz--; } - else - buffered = -1; - send_client_data(1, data, sz, - pack_data->use_sideband); } /* @@ -370,9 +385,8 @@ static void create_pack_file(struct upload_pack_data *pack_data) } /* flush the data */ - if (0 <= buffered) { - data[0] = buffered; - send_client_data(1, data, 1, + if (output_state.used > 0) { + send_client_data(1, output_state.buffer, output_state.used, pack_data->use_sideband); fprintf(stderr, "flushed.\n"); } -- cgit v1.2.3 From 9da69a6539e98da1b7ed8832cb54b494961463cb Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:22 -0700 Subject: fetch-pack: support more than one pack lockfile Whenever a fetch results in a packfile being downloaded, a .keep file is generated, so that the packfile can be preserved (from, say, a running "git repack") until refs are written referring to the contents of the packfile. In a subsequent patch, a successful fetch using protocol v2 may result in more than one .keep file being generated. Therefore, teach fetch_pack() and the transport mechanism to support multiple .keep files. Implementation notes: - builtin/fetch-pack.c normally does not generate .keep files, and thus is unaffected by this or future changes. However, it has an undocumented "--lock-pack" feature, used by remote-curl.c when implementing the "fetch" remote helper command. In keeping with the remote helper protocol, only one "lock" line will ever be written; the rest will result in warnings to stderr. However, in practice, warnings will never be written because the remote-curl.c "fetch" is only used for protocol v0/v1 (which will not generate multiple .keep files). (Protocol v2 uses the "stateless-connect" command, not the "fetch" command.) - connected.c has an optimization in that connectivity checks on a ref need not be done if the target object is in a pack known to be self-contained and connected. If there are multiple packfiles, this optimization can no longer be done. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 17 +++++++++++------ connected.c | 8 +++++--- fetch-pack.c | 29 +++++++++++++++-------------- fetch-pack.h | 2 +- transport-helper.c | 5 +++-- transport.c | 14 ++++++++------ transport.h | 6 +++--- 7 files changed, 46 insertions(+), 35 deletions(-) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 4771100072..f66891b010 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -48,8 +48,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) struct ref **sought = NULL; int nr_sought = 0, alloc_sought = 0; int fd[2]; - char *pack_lockfile = NULL; - char **pack_lockfile_ptr = NULL; + struct string_list pack_lockfiles = STRING_LIST_INIT_DUP; + struct string_list *pack_lockfiles_ptr = NULL; struct child_process *conn; struct fetch_pack_args args; struct oid_array shallow = OID_ARRAY_INIT; @@ -134,7 +134,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) } if (!strcmp("--lock-pack", arg)) { args.lock_pack = 1; - pack_lockfile_ptr = &pack_lockfile; + pack_lockfiles_ptr = &pack_lockfiles; continue; } if (!strcmp("--check-self-contained-and-connected", arg)) { @@ -235,10 +235,15 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) } ref = fetch_pack(&args, fd, ref, sought, nr_sought, - &shallow, pack_lockfile_ptr, version); - if (pack_lockfile) { - printf("lock %s\n", pack_lockfile); + &shallow, pack_lockfiles_ptr, version); + if (pack_lockfiles.nr) { + int i; + + printf("lock %s\n", pack_lockfiles.items[0].string); fflush(stdout); + for (i = 1; i < pack_lockfiles.nr; i++) + warning(_("Lockfile created but not reported: %s"), + pack_lockfiles.items[i].string); } if (args.check_self_contained_and_connected && args.self_contained_and_connected) { diff --git a/connected.c b/connected.c index 3135b71e19..937b4bae38 100644 --- a/connected.c +++ b/connected.c @@ -43,10 +43,12 @@ int check_connected(oid_iterate_fn fn, void *cb_data, if (transport && transport->smart_options && transport->smart_options->self_contained_and_connected && - transport->pack_lockfile && - strip_suffix(transport->pack_lockfile, ".keep", &base_len)) { + transport->pack_lockfiles.nr == 1 && + strip_suffix(transport->pack_lockfiles.items[0].string, + ".keep", &base_len)) { struct strbuf idx_file = STRBUF_INIT; - strbuf_add(&idx_file, transport->pack_lockfile, base_len); + strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string, + base_len); strbuf_addstr(&idx_file, ".idx"); new_pack = add_packed_git(idx_file.buf, idx_file.len, 1); strbuf_release(&idx_file); diff --git a/fetch-pack.c b/fetch-pack.c index 7eaa19d7c1..bc3af3c726 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -794,7 +794,7 @@ static void write_promisor_file(const char *keep_name, } static int get_pack(struct fetch_pack_args *args, - int xd[2], char **pack_lockfile, + int xd[2], struct string_list *pack_lockfiles, struct ref **sought, int nr_sought) { struct async demux; @@ -838,7 +838,7 @@ static int get_pack(struct fetch_pack_args *args, } if (do_keep || args->from_promisor) { - if (pack_lockfile) + if (pack_lockfiles) cmd.out = -1; cmd_name = "index-pack"; argv_array_push(&cmd.args, cmd_name); @@ -863,7 +863,7 @@ static int get_pack(struct fetch_pack_args *args, * information below. If not, we need index-pack to do it for * us. */ - if (!(do_keep && pack_lockfile) && args->from_promisor) + if (!(do_keep && pack_lockfiles) && args->from_promisor) argv_array_push(&cmd.args, "--promisor"); } else { @@ -899,8 +899,9 @@ static int get_pack(struct fetch_pack_args *args, cmd.git_cmd = 1; if (start_command(&cmd)) die(_("fetch-pack: unable to fork off %s"), cmd_name); - if (do_keep && pack_lockfile) { - *pack_lockfile = index_pack_lockfile(cmd.out); + if (do_keep && pack_lockfiles) { + string_list_append_nodup(pack_lockfiles, + index_pack_lockfile(cmd.out)); close(cmd.out); } @@ -922,8 +923,8 @@ static int get_pack(struct fetch_pack_args *args, * Now that index-pack has succeeded, write the promisor file using the * obtained .keep filename if necessary */ - if (do_keep && pack_lockfile && args->from_promisor) - write_promisor_file(*pack_lockfile, sought, nr_sought); + if (do_keep && pack_lockfiles && pack_lockfiles->nr && args->from_promisor) + write_promisor_file(pack_lockfiles->items[0].string, sought, nr_sought); return 0; } @@ -940,7 +941,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, const struct ref *orig_ref, struct ref **sought, int nr_sought, struct shallow_info *si, - char **pack_lockfile) + struct string_list *pack_lockfiles) { struct repository *r = the_repository; struct ref *ref = copy_ref_list(orig_ref); @@ -1067,7 +1068,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, alternate_shallow_file = setup_temporary_shallow(si->shallow); else alternate_shallow_file = NULL; - if (get_pack(args, fd, pack_lockfile, sought, nr_sought)) + if (get_pack(args, fd, pack_lockfiles, sought, nr_sought)) die(_("git fetch-pack: fetch failed.")); all_done: @@ -1457,7 +1458,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct ref **sought, int nr_sought, struct oid_array *shallows, struct shallow_info *si, - char **pack_lockfile) + struct string_list *pack_lockfiles) { struct repository *r = the_repository; struct ref *ref = copy_ref_list(orig_ref); @@ -1559,7 +1560,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, /* get the pack */ process_section_header(&reader, "packfile", 0); - if (get_pack(args, fd, pack_lockfile, sought, nr_sought)) + if (get_pack(args, fd, pack_lockfiles, sought, nr_sought)) die(_("git fetch-pack: fetch failed.")); state = FETCH_DONE; @@ -1759,7 +1760,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, const struct ref *ref, struct ref **sought, int nr_sought, struct oid_array *shallow, - char **pack_lockfile, + struct string_list *pack_lockfiles, enum protocol_version version) { struct ref *ref_cpy; @@ -1794,11 +1795,11 @@ struct ref *fetch_pack(struct fetch_pack_args *args, memset(&si, 0, sizeof(si)); ref_cpy = do_fetch_pack_v2(args, fd, ref, sought, nr_sought, &shallows_scratch, &si, - pack_lockfile); + pack_lockfiles); } else { prepare_shallow_info(&si, shallow); ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, - &si, pack_lockfile); + &si, pack_lockfiles); } reprepare_packed_git(the_repository); diff --git a/fetch-pack.h b/fetch-pack.h index 67f684229a..85d1e39fe7 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -83,7 +83,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, struct ref **sought, int nr_sought, struct oid_array *shallow, - char **pack_lockfile, + struct string_list *pack_lockfiles, enum protocol_version version); /* diff --git a/transport-helper.c b/transport-helper.c index a46afcb69d..93a6f50793 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -410,10 +410,11 @@ static int fetch_with_fetch(struct transport *transport, exit(128); if (skip_prefix(buf.buf, "lock ", &name)) { - if (transport->pack_lockfile) + if (transport->pack_lockfiles.nr) warning(_("%s also locked %s"), data->name, name); else - transport->pack_lockfile = xstrdup(name); + string_list_append(&transport->pack_lockfiles, + name); } else if (data->check_connectivity && data->transport_options.check_self_contained_and_connected && diff --git a/transport.c b/transport.c index 15f5ba4e8f..a67e1990bf 100644 --- a/transport.c +++ b/transport.c @@ -374,7 +374,7 @@ static int fetch_refs_via_pack(struct transport *transport, refs = fetch_pack(&args, data->fd, refs_tmp ? refs_tmp : transport->remote_refs, to_fetch, nr_heads, &data->shallow, - &transport->pack_lockfile, data->version); + &transport->pack_lockfiles, data->version); break; case protocol_v1: case protocol_v0: @@ -382,7 +382,7 @@ static int fetch_refs_via_pack(struct transport *transport, refs = fetch_pack(&args, data->fd, refs_tmp ? refs_tmp : transport->remote_refs, to_fetch, nr_heads, &data->shallow, - &transport->pack_lockfile, data->version); + &transport->pack_lockfiles, data->version); break; case protocol_unknown_version: BUG("unknown protocol version"); @@ -929,6 +929,7 @@ struct transport *transport_get(struct remote *remote, const char *url) struct transport *ret = xcalloc(1, sizeof(*ret)); ret->progress = isatty(2); + string_list_init(&ret->pack_lockfiles, 1); if (!remote) BUG("No remote provided to transport_get()"); @@ -1324,10 +1325,11 @@ int transport_fetch_refs(struct transport *transport, struct ref *refs) void transport_unlock_pack(struct transport *transport) { - if (transport->pack_lockfile) { - unlink_or_warn(transport->pack_lockfile); - FREE_AND_NULL(transport->pack_lockfile); - } + int i; + + for (i = 0; i < transport->pack_lockfiles.nr; i++) + unlink_or_warn(transport->pack_lockfiles.items[i].string); + string_list_clear(&transport->pack_lockfiles, 0); } int transport_connect(struct transport *transport, const char *name, diff --git a/transport.h b/transport.h index 4298c855be..05efa72db1 100644 --- a/transport.h +++ b/transport.h @@ -5,8 +5,7 @@ #include "run-command.h" #include "remote.h" #include "list-objects-filter-options.h" - -struct string_list; +#include "string-list.h" struct git_transport_options { unsigned thin : 1; @@ -98,7 +97,8 @@ struct transport { */ const struct string_list *server_options; - char *pack_lockfile; + struct string_list pack_lockfiles; + signed verbose : 3; /** * Transports should not set this directly, and should use this -- cgit v1.2.3 From dd4b732df73b06878b81fce050e7dcac4366a38e Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:23 -0700 Subject: upload-pack: send part of packfile response as uri Teach upload-pack to send part of its packfile response as URIs. An administrator may configure a repository with one or more "uploadpack.blobpackfileuri" lines, each line containing an OID, a pack hash, and a URI. A client may configure fetch.uriprotocols to be a comma-separated list of protocols that it is willing to use to fetch additional packfiles - this list will be sent to the server. Whenever an object with one of those OIDs would appear in the packfile transmitted by upload-pack, the server may exclude that object, and instead send the URI. The client will then download the packs referred to by those URIs before performing the connectivity check. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 76 +++++++++++++++++++++++++++++++++ fetch-pack.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++-- t/t5702-protocol-v2.sh | 88 ++++++++++++++++++++++++++++++++++++++ upload-pack.c | 77 +++++++++++++++++++++++++++++++--- 4 files changed, 343 insertions(+), 10 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c5b433a23f..7016b28485 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -117,6 +117,8 @@ static unsigned long window_memory_limit = 0; static struct list_objects_filter_options filter_options; +static struct string_list uri_protocols = STRING_LIST_INIT_NODUP; + enum missing_action { MA_ERROR = 0, /* fail if any missing objects are encountered */ MA_ALLOW_ANY, /* silently allow ALL missing objects */ @@ -125,6 +127,15 @@ enum missing_action { static enum missing_action arg_missing_action; static show_object_fn fn_show_object; +struct configured_exclusion { + struct oidmap_entry e; + char *pack_hash_hex; + char *uri; +}; +static struct oidmap configured_exclusions; + +static struct oidset excluded_by_config; + /* * stats */ @@ -969,6 +980,25 @@ static void write_reused_pack(struct hashfile *f) unuse_pack(&w_curs); } +static void write_excluded_by_configs(void) +{ + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&excluded_by_config, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct configured_exclusion *ex = + oidmap_get(&configured_exclusions, oid); + + if (!ex) + BUG("configured exclusion wasn't configured"); + write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex)); + write_in_full(1, " ", 1); + write_in_full(1, ex->uri, strlen(ex->uri)); + write_in_full(1, "\n", 1); + } +} + static const char no_split_warning[] = N_( "disabling bitmap writing, packs are split due to pack.packSizeLimit" ); @@ -1266,6 +1296,25 @@ static int want_object_in_pack(const struct object_id *oid, } } + if (uri_protocols.nr) { + struct configured_exclusion *ex = + oidmap_get(&configured_exclusions, oid); + int i; + const char *p; + + if (ex) { + for (i = 0; i < uri_protocols.nr; i++) { + if (skip_prefix(ex->uri, + uri_protocols.items[i].string, + &p) && + *p == ':') { + oidset_insert(&excluded_by_config, oid); + return 0; + } + } + } + } + return 1; } @@ -2864,6 +2913,29 @@ static int git_pack_config(const char *k, const char *v, void *cb) pack_idx_opts.version); return 0; } + if (!strcmp(k, "uploadpack.blobpackfileuri")) { + struct configured_exclusion *ex = xmalloc(sizeof(*ex)); + const char *oid_end, *pack_end; + /* + * Stores the pack hash. This is not a true object ID, but is + * of the same form. + */ + struct object_id pack_hash; + + if (parse_oid_hex(v, &ex->e.oid, &oid_end) || + *oid_end != ' ' || + parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) || + *pack_end != ' ') + die(_("value of uploadpack.blobpackfileuri must be " + "of the form ' ' (got '%s')"), v); + if (oidmap_get(&configured_exclusions, &ex->e.oid)) + die(_("object already configured in another " + "uploadpack.blobpackfileuri (got '%s')"), v); + ex->pack_hash_hex = xcalloc(1, pack_end - oid_end); + memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1); + ex->uri = xstrdup(pack_end + 1); + oidmap_put(&configured_exclusions, ex); + } return git_default_config(k, v, cb); } @@ -3462,6 +3534,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("do not pack objects in promisor packfiles")), OPT_BOOL(0, "delta-islands", &use_delta_islands, N_("respect islands during delta compression")), + OPT_STRING_LIST(0, "uri-protocol", &uri_protocols, + N_("protocol"), + N_("exclude any configured uploadpack.blobpackfileuri with this protocol")), OPT_END(), }; @@ -3650,6 +3725,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) } trace2_region_enter("pack-objects", "write-pack-file", the_repository); + write_excluded_by_configs(); write_pack_file(); trace2_region_leave("pack-objects", "write-pack-file", the_repository); diff --git a/fetch-pack.c b/fetch-pack.c index bc3af3c726..ca2b101b8d 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -38,6 +38,7 @@ static int server_supports_filtering; static struct shallow_lock shallow_lock; static const char *alternate_shallow_file; static struct strbuf fsck_msg_types = STRBUF_INIT; +static struct string_list uri_protocols = STRING_LIST_INIT_DUP; /* Remember to update object flag allocation in object.h */ #define COMPLETE (1U << 0) @@ -795,6 +796,7 @@ static void write_promisor_file(const char *keep_name, static int get_pack(struct fetch_pack_args *args, int xd[2], struct string_list *pack_lockfiles, + int only_packfile, struct ref **sought, int nr_sought) { struct async demux; @@ -855,8 +857,15 @@ static int get_pack(struct fetch_pack_args *args, "--keep=fetch-pack %"PRIuMAX " on %s", (uintmax_t)getpid(), hostname); } - if (args->check_self_contained_and_connected) + if (only_packfile && args->check_self_contained_and_connected) argv_array_push(&cmd.args, "--check-self-contained-and-connected"); + else + /* + * We cannot perform any connectivity checks because + * not all packs have been downloaded; let the caller + * have this responsibility. + */ + args->check_self_contained_and_connected = 0; /* * If we're obtaining the filename of a lockfile, we'll use * that filename to write a .promisor file with more @@ -1068,7 +1077,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, alternate_shallow_file = setup_temporary_shallow(si->shallow); else alternate_shallow_file = NULL; - if (get_pack(args, fd, pack_lockfiles, sought, nr_sought)) + if (get_pack(args, fd, pack_lockfiles, 1, sought, nr_sought)) die(_("git fetch-pack: fetch failed.")); all_done: @@ -1222,6 +1231,26 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, warning("filtering not recognized by server, ignoring"); } + if (server_supports_feature("fetch", "packfile-uris", 0)) { + int i; + struct strbuf to_send = STRBUF_INIT; + + for (i = 0; i < uri_protocols.nr; i++) { + const char *s = uri_protocols.items[i].string; + + if (!strcmp(s, "https") || !strcmp(s, "http")) { + if (to_send.len) + strbuf_addch(&to_send, ','); + strbuf_addstr(&to_send, s); + } + } + if (to_send.len) { + packet_buf_write(&req_buf, "packfile-uris %s", + to_send.buf); + strbuf_release(&to_send); + } + } + /* add wants */ add_wants(args->no_dependents, wants, &req_buf); @@ -1444,6 +1473,21 @@ static void receive_wanted_refs(struct packet_reader *reader, die(_("error processing wanted refs: %d"), reader->status); } +static void receive_packfile_uris(struct packet_reader *reader, + struct string_list *uris) +{ + process_section_header(reader, "packfile-uris", 0); + while (packet_reader_read(reader) == PACKET_READ_NORMAL) { + if (reader->pktlen < the_hash_algo->hexsz || + reader->line[the_hash_algo->hexsz] != ' ') + die("expected ' ', got: %s\n", reader->line); + + string_list_append(uris, reader->line); + } + if (reader->status != PACKET_READ_DELIM) + die("expected DELIM"); +} + enum fetch_state { FETCH_CHECK_LOCAL = 0, FETCH_SEND_REQUEST, @@ -1470,6 +1514,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct fetch_negotiator negotiator_alloc; struct fetch_negotiator *negotiator; int seen_ack = 0; + struct string_list packfile_uris = STRING_LIST_INIT_DUP; + int i; if (args->no_dependents) { negotiator = NULL; @@ -1558,9 +1604,12 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (process_section_header(&reader, "wanted-refs", 1)) receive_wanted_refs(&reader, sought, nr_sought); - /* get the pack */ + /* get the pack(s) */ + if (process_section_header(&reader, "packfile-uris", 1)) + receive_packfile_uris(&reader, &packfile_uris); process_section_header(&reader, "packfile", 0); - if (get_pack(args, fd, pack_lockfiles, sought, nr_sought)) + if (get_pack(args, fd, pack_lockfiles, + !packfile_uris.nr, sought, nr_sought)) die(_("git fetch-pack: fetch failed.")); state = FETCH_DONE; @@ -1570,8 +1619,55 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, } } + for (i = 0; i < packfile_uris.nr; i++) { + struct child_process cmd = CHILD_PROCESS_INIT; + char packname[GIT_MAX_HEXSZ + 1]; + const char *uri = packfile_uris.items[i].string + + the_hash_algo->hexsz + 1; + + argv_array_push(&cmd.args, "http-fetch"); + argv_array_pushf(&cmd.args, "--packfile=%.*s", + (int) the_hash_algo->hexsz, + packfile_uris.items[i].string); + argv_array_push(&cmd.args, uri); + cmd.git_cmd = 1; + cmd.no_stdin = 1; + cmd.out = -1; + if (start_command(&cmd)) + die("fetch-pack: unable to spawn http-fetch"); + + if (read_in_full(cmd.out, packname, 5) < 0 || + memcmp(packname, "keep\t", 5)) + die("fetch-pack: expected keep then TAB at start of http-fetch output"); + + if (read_in_full(cmd.out, packname, + the_hash_algo->hexsz + 1) < 0 || + packname[the_hash_algo->hexsz] != '\n') + die("fetch-pack: expected hash then LF at end of http-fetch output"); + + packname[the_hash_algo->hexsz] = '\0'; + + close(cmd.out); + + if (finish_command(&cmd)) + die("fetch-pack: unable to finish http-fetch"); + + if (memcmp(packfile_uris.items[i].string, packname, + the_hash_algo->hexsz)) + die("fetch-pack: pack downloaded from %s does not match expected hash %.*s", + uri, (int) the_hash_algo->hexsz, + packfile_uris.items[i].string); + + string_list_append_nodup(pack_lockfiles, + xstrfmt("%s/pack/pack-%s.keep", + get_object_directory(), + packname)); + } + string_list_clear(&packfile_uris, 0); + if (negotiator) negotiator->release(negotiator); + oidset_clear(&common); return ref; } @@ -1608,6 +1704,14 @@ static void fetch_pack_config(void) git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta); git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects); git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects); + if (!uri_protocols.nr) { + char *str; + + if (!git_config_get_string("fetch.uriprotocols", &str) && str) { + string_list_split(&uri_protocols, str, ',', -1); + free(str); + } + } git_config(fetch_pack_config_cb, NULL); } diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 5039e66dc4..53bda39fb7 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -748,6 +748,94 @@ test_expect_success 'when server does not send "ready", expect FLUSH' ' test_i18ngrep "expected no other sections to be sent after no .ready." err ' +configure_exclusion () { + git -C "$1" hash-object "$2" >objh && + git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" packh && + git -C "$1" config --add \ + "uploadpack.blobpackfileuri" \ + "$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" && + cat objh +} + +test_expect_success 'part of packfile response provided as URI' ' + P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + rm -rf "$P" http_child log && + + git init "$P" && + git -C "$P" config "uploadpack.allowsidebandall" "true" && + + echo my-blob >"$P/my-blob" && + git -C "$P" add my-blob && + echo other-blob >"$P/other-blob" && + git -C "$P" add other-blob && + git -C "$P" commit -m x && + + configure_exclusion "$P" my-blob >h && + configure_exclusion "$P" other-blob >h2 && + + GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \ + git -c protocol.version=2 \ + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child && + + # Ensure that my-blob and other-blob are in separate packfiles. + for idx in http_child/.git/objects/pack/*.idx + do + git verify-pack --verbose $idx >out && + { + grep "^[0-9a-f]\{16,\} " out || : + } >out.objectlist && + if test_line_count = 1 out.objectlist + then + if grep $(cat h) out + then + >hfound + fi && + if grep $(cat h2) out + then + >h2found + fi + fi + done && + test -f hfound && + test -f h2found && + + # Ensure that there are exactly 6 files (3 .pack and 3 .idx). + ls http_child/.git/objects/pack/* >filelist && + test_line_count = 6 filelist +' + +test_expect_success 'fetching with valid packfile URI but invalid hash fails' ' + P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + rm -rf "$P" http_child log && + + git init "$P" && + git -C "$P" config "uploadpack.allowsidebandall" "true" && + + echo my-blob >"$P/my-blob" && + git -C "$P" add my-blob && + echo other-blob >"$P/other-blob" && + git -C "$P" add other-blob && + git -C "$P" commit -m x && + + configure_exclusion "$P" my-blob >h && + # Configure a URL for other-blob. Just reuse the hash of the object as + # the hash of the packfile, since the hash does not matter for this + # test as long as it is not the hash of the pack, and it is of the + # expected length. + git -C "$P" hash-object other-blob >objh && + git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" packh && + git -C "$P" config --add \ + "uploadpack.blobpackfileuri" \ + "$(cat objh) $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" && + + test_must_fail env GIT_TEST_SIDEBAND_ALL=1 \ + git -c protocol.version=2 \ + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child 2>err && + test_i18ngrep "pack downloaded from.*does not match expected hash" err +' + # DO NOT add non-httpd-specific tests here, because the last part of this # test script is only executed when httpd is available and enabled. diff --git a/upload-pack.c b/upload-pack.c index da1f749620..764265ec40 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -83,6 +83,8 @@ struct upload_pack_data { /* 0 for no sideband, otherwise DEFAULT_PACKET_MAX or LARGE_PACKET_MAX */ int use_sideband; + struct string_list uri_protocols; + struct list_objects_filter_options filter_options; struct packet_writer writer; @@ -114,6 +116,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) struct oid_array haves = OID_ARRAY_INIT; struct object_array shallows = OBJECT_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; + struct string_list uri_protocols = STRING_LIST_INIT_DUP; memset(data, 0, sizeof(*data)); data->symref = symref; @@ -123,6 +126,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) data->haves = haves; data->shallows = shallows; data->deepen_not = deepen_not; + data->uri_protocols = uri_protocols; packet_writer_init(&data->writer, 1); data->keepalive = 5; @@ -176,10 +180,12 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data) struct output_state { char buffer[8193]; int used; + unsigned packfile_uris_started : 1; + unsigned packfile_started : 1; }; static int relay_pack_data(int pack_objects_out, struct output_state *os, - int use_sideband) + int use_sideband, int write_packfile_line) { /* * We keep the last byte to ourselves @@ -200,6 +206,37 @@ static int relay_pack_data(int pack_objects_out, struct output_state *os, } os->used += readsz; + while (!os->packfile_started) { + char *p; + if (os->used >= 4 && !memcmp(os->buffer, "PACK", 4)) { + os->packfile_started = 1; + if (write_packfile_line) { + if (os->packfile_uris_started) + packet_delim(1); + packet_write_fmt(1, "\1packfile\n"); + } + break; + } + if ((p = memchr(os->buffer, '\n', os->used))) { + if (!os->packfile_uris_started) { + os->packfile_uris_started = 1; + if (!write_packfile_line) + BUG("packfile_uris requires sideband-all"); + packet_write_fmt(1, "\1packfile-uris\n"); + } + *p = '\0'; + packet_write_fmt(1, "\1%s\n", os->buffer); + + os->used -= p - os->buffer + 1; + memmove(os->buffer, p + 1, os->used); + } else { + /* + * Incomplete line. + */ + return readsz; + } + } + if (os->used > 1) { send_client_data(1, os->buffer, os->used - 1, use_sideband); os->buffer[0] = os->buffer[os->used - 1]; @@ -212,7 +249,8 @@ static int relay_pack_data(int pack_objects_out, struct output_state *os, return readsz; } -static void create_pack_file(struct upload_pack_data *pack_data) +static void create_pack_file(struct upload_pack_data *pack_data, + const struct string_list *uri_protocols) { struct child_process pack_objects = CHILD_PROCESS_INIT; struct output_state output_state = { { 0 } }; @@ -262,6 +300,11 @@ static void create_pack_file(struct upload_pack_data *pack_data) spec); } } + if (uri_protocols) { + for (i = 0; i < uri_protocols->nr; i++) + argv_array_pushf(&pack_objects.args, "--uri-protocol=%s", + uri_protocols->items[i].string); + } pack_objects.in = -1; pack_objects.out = -1; @@ -353,7 +396,8 @@ static void create_pack_file(struct upload_pack_data *pack_data) if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) { int result = relay_pack_data(pack_objects.out, &output_state, - pack_data->use_sideband); + pack_data->use_sideband, + !!uri_protocols); if (result == 0) { close(pack_objects.out); @@ -1210,7 +1254,7 @@ void upload_pack(struct upload_pack_options *options) receive_needs(&data, &reader); if (data.want_obj.nr) { get_common_commits(&data, &reader); - create_pack_file(&data); + create_pack_file(&data, 0); } } @@ -1363,10 +1407,18 @@ static void process_args(struct packet_reader *request, continue; } + if (skip_prefix(arg, "packfile-uris ", &p)) { + string_list_split(&data->uri_protocols, p, ',', -1); + continue; + } + /* ignore unknown lines maybe? */ die("unexpected line: '%s'", arg); } + if (data->uri_protocols.nr && !data->writer.use_sideband) + string_list_clear(&data->uri_protocols, 0); + if (request->status != PACKET_READ_FLUSH) die(_("expected flush after fetch arguments")); } @@ -1553,8 +1605,12 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, send_wanted_ref_info(&data); send_shallow_info(&data); - packet_writer_write(&data.writer, "packfile\n"); - create_pack_file(&data); + if (data.uri_protocols.nr) { + create_pack_file(&data, &data.uri_protocols); + } else { + packet_writer_write(&data.writer, "packfile\n"); + create_pack_file(&data, NULL); + } state = FETCH_DONE; break; case FETCH_DONE: @@ -1573,6 +1629,7 @@ int upload_pack_advertise(struct repository *r, int allow_filter_value; int allow_ref_in_want; int allow_sideband_all_value; + char *str = NULL; strbuf_addstr(value, "shallow"); @@ -1594,6 +1651,14 @@ int upload_pack_advertise(struct repository *r, &allow_sideband_all_value) && allow_sideband_all_value)) strbuf_addstr(value, " sideband-all"); + + if (!repo_config_get_string(the_repository, + "uploadpack.blobpackfileuri", + &str) && + str) { + strbuf_addstr(value, " packfile-uris"); + free(str); + } } return 1; -- cgit v1.2.3 From cae2ee105505d45a92cf7abcc26f22890bce4202 Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Tue, 16 Jun 2020 00:00:20 +0100 Subject: upload-pack: fix a sparse '0 as NULL pointer' warning Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- upload-pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upload-pack.c b/upload-pack.c index 764265ec40..219e804dc1 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1254,7 +1254,7 @@ void upload_pack(struct upload_pack_options *options) receive_needs(&data, &reader); if (data.want_obj.nr) { get_common_commits(&data, &reader); - create_pack_file(&data, 0); + create_pack_file(&data, NULL); } } -- cgit v1.2.3