From 94c0956b60969bbbb3ead19638d52591a59d713c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:33 +0000 Subject: sparse-checkout: create builtin with 'list' subcommand The sparse-checkout feature is mostly hidden to users, as its only documentation is supplementary information in the docs for 'git read-tree'. In addition, users need to know how to edit the .git/info/sparse-checkout file with the right patterns, then run the appropriate 'git read-tree -mu HEAD' command. Keeping the working directory in sync with the sparse-checkout file requires care. Begin an effort to make the sparse-checkout feature a porcelain feature by creating a new 'git sparse-checkout' builtin. This builtin will be the preferred mechanism for manipulating the sparse-checkout file and syncing the working directory. The documentation provided is adapted from the "git read-tree" documentation with a few edits for clarity in the new context. Extra sections are added to hint toward a future change to a more restricted pattern set. Helped-by: Elijah Newren Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- .gitignore | 1 + Documentation/git-read-tree.txt | 2 +- Documentation/git-sparse-checkout.txt | 89 +++++++++++++++++++++++++++++++++++ Makefile | 1 + builtin.h | 1 + builtin/sparse-checkout.c | 86 +++++++++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + t/t1091-sparse-checkout-builtin.sh | 45 ++++++++++++++++++ 9 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 Documentation/git-sparse-checkout.txt create mode 100644 builtin/sparse-checkout.c create mode 100755 t/t1091-sparse-checkout-builtin.sh diff --git a/.gitignore b/.gitignore index 89b3b79c1a..aebe7c0908 100644 --- a/.gitignore +++ b/.gitignore @@ -158,6 +158,7 @@ /git-show-branch /git-show-index /git-show-ref +/git-sparse-checkout /git-stage /git-stash /git-status diff --git a/Documentation/git-read-tree.txt b/Documentation/git-read-tree.txt index d271842608..da33f84f33 100644 --- a/Documentation/git-read-tree.txt +++ b/Documentation/git-read-tree.txt @@ -436,7 +436,7 @@ support. SEE ALSO -------- linkgit:git-write-tree[1]; linkgit:git-ls-files[1]; -linkgit:gitignore[5] +linkgit:gitignore[5]; linkgit:git-sparse-checkout[1]; GIT --- diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt new file mode 100644 index 0000000000..87ffcbbcb0 --- /dev/null +++ b/Documentation/git-sparse-checkout.txt @@ -0,0 +1,89 @@ +git-sparse-checkout(1) +====================== + +NAME +---- +git-sparse-checkout - Initialize and modify the sparse-checkout +configuration, which reduces the checkout to a set of paths +given by a list of atterns. + + +SYNOPSIS +-------- +[verse] +'git sparse-checkout [options]' + + +DESCRIPTION +----------- + +Initialize and modify the sparse-checkout configuration, which reduces +the checkout to a set of paths given by a list of patterns. + +THIS COMMAND IS EXPERIMENTAL. ITS BEHAVIOR, AND THE BEHAVIOR OF OTHER +COMMANDS IN THE PRESENCE OF SPARSE-CHECKOUTS, WILL LIKELY CHANGE IN +THE FUTURE. + + +COMMANDS +-------- +'list':: + Provide a list of the contents in the sparse-checkout file. + + +SPARSE CHECKOUT +--------------- + +"Sparse checkout" allows populating the working directory sparsely. +It uses the skip-worktree bit (see linkgit:git-update-index[1]) to tell +Git whether a file in the working directory is worth looking at. If +the skip-worktree bit is set, then the file is ignored in the working +directory. Git will not populate the contents of those files, which +makes a sparse checkout helpful when working in a repository with many +files, but only a few are important to the current user. + +The `$GIT_DIR/info/sparse-checkout` file is used to define the +skip-worktree reference bitmap. When Git updates the working +directory, it updates the skip-worktree bits in the index based +on this file. The files matching the patterns in the file will +appear in the working directory, and the rest will not. + +## FULL PATTERN SET + +By default, the sparse-checkout file uses the same syntax as `.gitignore` +files. + +While `$GIT_DIR/info/sparse-checkout` is usually used to specify what +files are included, you can also specify what files are _not_ included, +using negative patterns. For example, to remove the file `unwanted`: + +---------------- +/* +!unwanted +---------------- + +Another tricky thing is fully repopulating the working directory when you +no longer want sparse checkout. You cannot just disable "sparse +checkout" because skip-worktree bits are still in the index and your working +directory is still sparsely populated. You should re-populate the working +directory with the `$GIT_DIR/info/sparse-checkout` file content as +follows: + +---------------- +/* +---------------- + +Then you can disable sparse checkout. Sparse checkout support in 'git +checkout' and similar commands is disabled by default. You need to +set `core.sparseCheckout` to `true` in order to have sparse checkout +support. + +SEE ALSO +-------- + +linkgit:git-read-tree[1] +linkgit:gitignore[5] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 58b92af54b..254b07a6b6 100644 --- a/Makefile +++ b/Makefile @@ -1125,6 +1125,7 @@ BUILTIN_OBJS += builtin/shortlog.o BUILTIN_OBJS += builtin/show-branch.o BUILTIN_OBJS += builtin/show-index.o BUILTIN_OBJS += builtin/show-ref.o +BUILTIN_OBJS += builtin/sparse-checkout.o BUILTIN_OBJS += builtin/stash.o BUILTIN_OBJS += builtin/stripspace.o BUILTIN_OBJS += builtin/submodule--helper.o diff --git a/builtin.h b/builtin.h index 5cf5df69f7..2b25a80cde 100644 --- a/builtin.h +++ b/builtin.h @@ -225,6 +225,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix); int cmd_show(int argc, const char **argv, const char *prefix); int cmd_show_branch(int argc, const char **argv, const char *prefix); int cmd_show_index(int argc, const char **argv, const char *prefix); +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix); int cmd_status(int argc, const char **argv, const char *prefix); int cmd_stash(int argc, const char **argv, const char *prefix); int cmd_stripspace(int argc, const char **argv, const char *prefix); diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c new file mode 100644 index 0000000000..5717c9b2cb --- /dev/null +++ b/builtin/sparse-checkout.c @@ -0,0 +1,86 @@ +#include "builtin.h" +#include "config.h" +#include "dir.h" +#include "parse-options.h" +#include "pathspec.h" +#include "repository.h" +#include "run-command.h" +#include "strbuf.h" + +static char const * const builtin_sparse_checkout_usage[] = { + N_("git sparse-checkout list"), + NULL +}; + +static char *get_sparse_checkout_filename(void) +{ + return git_pathdup("info/sparse-checkout"); +} + +static void write_patterns_to_file(FILE *fp, struct pattern_list *pl) +{ + int i; + + for (i = 0; i < pl->nr; i++) { + struct path_pattern *p = pl->patterns[i]; + + if (p->flags & PATTERN_FLAG_NEGATIVE) + fprintf(fp, "!"); + + fprintf(fp, "%s", p->pattern); + + if (p->flags & PATTERN_FLAG_MUSTBEDIR) + fprintf(fp, "/"); + + fprintf(fp, "\n"); + } +} + +static int sparse_checkout_list(int argc, const char **argv) +{ + struct pattern_list pl; + char *sparse_filename; + int res; + + memset(&pl, 0, sizeof(pl)); + + sparse_filename = get_sparse_checkout_filename(); + res = add_patterns_from_file_to_list(sparse_filename, "", 0, &pl, NULL); + free(sparse_filename); + + if (res < 0) { + warning(_("this worktree is not sparse (sparse-checkout file may not exist)")); + return 0; + } + + write_patterns_to_file(stdout, &pl); + clear_pattern_list(&pl); + + return 0; +} + +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) +{ + static struct option builtin_sparse_checkout_options[] = { + OPT_END(), + }; + + if (argc == 2 && !strcmp(argv[1], "-h")) + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); + + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_options, + builtin_sparse_checkout_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + git_config(git_default_config, NULL); + + if (argc > 0) { + if (!strcmp(argv[0], "list")) + return sparse_checkout_list(argc, argv); + } + + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); +} diff --git a/command-list.txt b/command-list.txt index a9ac72bef4..d3d28252b3 100644 --- a/command-list.txt +++ b/command-list.txt @@ -166,6 +166,7 @@ git-show-index plumbinginterrogators git-show-ref plumbinginterrogators git-sh-i18n purehelpers git-sh-setup purehelpers +git-sparse-checkout mainporcelain worktree git-stash mainporcelain git-stage complete git-status mainporcelain info diff --git a/git.c b/git.c index ce6ab0ece2..7be7ad34bd 100644 --- a/git.c +++ b/git.c @@ -572,6 +572,7 @@ static struct cmd_struct commands[] = { { "show-branch", cmd_show_branch, RUN_SETUP }, { "show-index", cmd_show_index }, { "show-ref", cmd_show_ref, RUN_SETUP }, + { "sparse-checkout", cmd_sparse_checkout, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, /* * NEEDSWORK: Until the builtin stash is thoroughly robust and no diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh new file mode 100755 index 0000000000..9b73d44907 --- /dev/null +++ b/t/t1091-sparse-checkout-builtin.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +test_description='sparse checkout builtin tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + git init repo && + ( + cd repo && + echo "initial" >a && + mkdir folder1 folder2 deep && + mkdir deep/deeper1 deep/deeper2 && + mkdir deep/deeper1/deepest && + cp a folder1 && + cp a folder2 && + cp a deep && + cp a deep/deeper1 && + cp a deep/deeper2 && + cp a deep/deeper1/deepest && + git add . && + git commit -m "initial commit" + ) +' + +test_expect_success 'git sparse-checkout list (empty)' ' + git -C repo sparse-checkout list >list 2>err && + test_must_be_empty list && + test_i18ngrep "this worktree is not sparse (sparse-checkout file may not exist)" err +' + +test_expect_success 'git sparse-checkout list (populated)' ' + test_when_finished rm -f repo/.git/info/sparse-checkout && + cat >repo/.git/info/sparse-checkout <<-EOF && + /folder1/* + /deep/ + **/a + !*bin* + EOF + cp repo/.git/info/sparse-checkout expect && + git -C repo sparse-checkout list >list && + test_cmp expect list +' + +test_done -- cgit v1.2.3 From bab3c3590879c5680060a5d5e2b93b4f97519025 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:34 +0000 Subject: sparse-checkout: create 'init' subcommand Getting started with a sparse-checkout file can be daunting. Help users start their sparse enlistment using 'git sparse-checkout init'. This will set 'core.sparseCheckout=true' in their config, write an initial set of patterns to the sparse-checkout file, and update their working directory. Make sure to use the `extensions.worktreeConfig` setting and write the sparse checkout config to the worktree-specific config file. This avoids confusing interactions with other worktrees. The use of running another process for 'git read-tree' is sub- optimal. This will be removed in a later change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 11 +++++ builtin/sparse-checkout.c | 75 ++++++++++++++++++++++++++++++++++- t/t1091-sparse-checkout-builtin.sh | 40 +++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index 87ffcbbcb0..491be1345f 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -30,6 +30,17 @@ COMMANDS 'list':: Provide a list of the contents in the sparse-checkout file. +'init':: + Enable the `core.sparseCheckout` setting. If the + sparse-checkout file does not exist, then populate it with + patterns that match every file in the root directory and + no other directories, then will remove all directories tracked + by Git. Add patterns to the sparse-checkout file to + repopulate the working directory. ++ +To avoid interfering with other worktrees, it first enables the +`extensions.worktreeConfig` setting and makes sure to set the +`core.sparseCheckout` setting in the worktree-specific config file. SPARSE CHECKOUT --------------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 5717c9b2cb..fcf97e9df8 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -8,7 +8,7 @@ #include "strbuf.h" static char const * const builtin_sparse_checkout_usage[] = { - N_("git sparse-checkout list"), + N_("git sparse-checkout (init|list)"), NULL }; @@ -59,6 +59,77 @@ static int sparse_checkout_list(int argc, const char **argv) return 0; } +static int update_working_directory(void) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL); + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to update index with new sparse-checkout paths")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + +enum sparse_checkout_mode { + MODE_NO_PATTERNS = 0, + MODE_ALL_PATTERNS = 1, +}; + +static int set_config(enum sparse_checkout_mode mode) +{ + const char *config_path; + + if (git_config_set_gently("extensions.worktreeConfig", "true")) { + error(_("failed to set extensions.worktreeConfig setting")); + return 1; + } + + config_path = git_path("config.worktree"); + git_config_set_in_file_gently(config_path, + "core.sparseCheckout", + mode ? "true" : NULL); + + return 0; +} + +static int sparse_checkout_init(int argc, const char **argv) +{ + struct pattern_list pl; + char *sparse_filename; + FILE *fp; + int res; + + if (set_config(MODE_ALL_PATTERNS)) + return 1; + + memset(&pl, 0, sizeof(pl)); + + sparse_filename = get_sparse_checkout_filename(); + res = add_patterns_from_file_to_list(sparse_filename, "", 0, &pl, NULL); + + /* If we already have a sparse-checkout file, use it. */ + if (res >= 0) { + free(sparse_filename); + goto reset_dir; + } + + /* initial mode: all blobs at root */ + fp = xfopen(sparse_filename, "w"); + if (!fp) + die(_("failed to open '%s'"), sparse_filename); + + free(sparse_filename); + fprintf(fp, "/*\n!/*/\n"); + fclose(fp); + +reset_dir: + return update_working_directory(); +} + int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) { static struct option builtin_sparse_checkout_options[] = { @@ -79,6 +150,8 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) if (argc > 0) { if (!strcmp(argv[0], "list")) return sparse_checkout_list(argc, argv); + if (!strcmp(argv[0], "init")) + return sparse_checkout_init(argc, argv); } usage_with_options(builtin_sparse_checkout_usage, diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 9b73d44907..21143c529c 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -42,4 +42,44 @@ test_expect_success 'git sparse-checkout list (populated)' ' test_cmp expect list ' +test_expect_success 'git sparse-checkout init' ' + git -C repo sparse-checkout init && + cat >expect <<-EOF && + /* + !/*/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + test_cmp_config -C repo true core.sparsecheckout && + ls repo >dir && + echo a >expect && + test_cmp expect dir +' + +test_expect_success 'git sparse-checkout list after init' ' + git -C repo sparse-checkout list >actual && + cat >expect <<-EOF && + /* + !/*/ + EOF + test_cmp expect actual +' + +test_expect_success 'init with existing sparse-checkout' ' + echo "*folder*" >> repo/.git/info/sparse-checkout && + git -C repo sparse-checkout init && + cat >expect <<-EOF && + /* + !/*/ + *folder* + EOF + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + test_done -- cgit v1.2.3 From d89f09c8289a764f0a974e02bd5b38cf60d1a7d7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:35 +0000 Subject: clone: add --sparse mode When someone wants to clone a large repository, but plans to work using a sparse-checkout file, they either need to do a full checkout first and then reduce the patterns they included, or clone with --no-checkout, set up their patterns, and then run a checkout manually. This requires knowing a lot about the repo shape and how sparse-checkout works. Add a new '--sparse' option to 'git clone' that initializes the sparse-checkout file to include the following patterns: /* !/*/ These patterns include every file in the root directory, but no directories. This allows a repo to include files like a README or a bootstrapping script to grow enlistments from that point. During the 'git sparse-checkout init' call, we must first look to see if HEAD is valid, since 'git clone' does not have a valid HEAD at the point where it initializes the sparse-checkout. The following checkout within the clone command will create the HEAD ref and update the working directory correctly. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-clone.txt | 8 +++++++- builtin/clone.c | 27 +++++++++++++++++++++++++++ builtin/sparse-checkout.c | 6 ++++++ t/t1091-sparse-checkout-builtin.sh | 13 +++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index 34011c2940..0fe91d2f04 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -15,7 +15,7 @@ SYNOPSIS [--dissociate] [--separate-git-dir ] [--depth ] [--[no-]single-branch] [--no-tags] [--recurse-submodules[=]] [--[no-]shallow-submodules] - [--[no-]remote-submodules] [--jobs ] [--] + [--[no-]remote-submodules] [--jobs ] [--sparse] [--] [] DESCRIPTION @@ -156,6 +156,12 @@ objects from the source repository into a pack in the cloned repository. used, neither remote-tracking branches nor the related configuration variables are created. +--sparse:: + Initialize the sparse-checkout file so the working + directory starts with only the files in the root + of the repository. The sparse-checkout file can be + modified to grow the working directory as needed. + --mirror:: Set up a mirror of the source repository. This implies `--bare`. Compared to `--bare`, `--mirror` not only maps local branches of the diff --git a/builtin/clone.c b/builtin/clone.c index c46ee29f0a..4348d962c9 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -59,6 +59,7 @@ static const char *real_git_dir; static char *option_upload_pack = "git-upload-pack"; static int option_verbosity; static int option_progress = -1; +static int option_sparse_checkout; static enum transport_family family; static struct string_list option_config = STRING_LIST_INIT_NODUP; static struct string_list option_required_reference = STRING_LIST_INIT_NODUP; @@ -146,6 +147,8 @@ static struct option builtin_clone_options[] = { OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_BOOL(0, "remote-submodules", &option_remote_submodules, N_("any cloned submodules will use their remote-tracking branch")), + OPT_BOOL(0, "sparse", &option_sparse_checkout, + N_("initialize sparse-checkout file to include only files at root")), OPT_END() }; @@ -733,6 +736,27 @@ static void update_head(const struct ref *our, const struct ref *remote, } } +static int git_sparse_checkout_init(const char *repo) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "-C", repo, "sparse-checkout", "init", NULL); + + /* + * We must apply the setting in the current process + * for the later checkout to use the sparse-checkout file. + */ + core_apply_sparse_checkout = 1; + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to initialize sparse-checkout")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + static int checkout(int submodule_progress) { struct object_id oid; @@ -1106,6 +1130,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_required_reference.nr || option_optional_reference.nr) setup_reference(); + if (option_sparse_checkout && git_sparse_checkout_init(repo)) + return 1; + remote = remote_get(option_origin); strbuf_addf(&default_refspec, "+%s*:%s*", src_ref_prefix, diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index fcf97e9df8..e3418fbe2a 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -102,6 +102,7 @@ static int sparse_checkout_init(int argc, const char **argv) char *sparse_filename; FILE *fp; int res; + struct object_id oid; if (set_config(MODE_ALL_PATTERNS)) return 1; @@ -126,6 +127,11 @@ static int sparse_checkout_init(int argc, const char **argv) fprintf(fp, "/*\n!/*/\n"); fclose(fp); + if (get_oid("HEAD", &oid)) { + /* assume we are in a fresh repo */ + return 0; + } + reset_dir: return update_working_directory(); } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 21143c529c..78c20cb7e3 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -82,4 +82,17 @@ test_expect_success 'init with existing sparse-checkout' ' test_cmp expect dir ' +test_expect_success 'clone --sparse' ' + git clone --sparse repo clone && + git -C clone sparse-checkout list >actual && + cat >expect <<-EOF && + /* + !/*/ + EOF + test_cmp expect actual && + ls clone >dir && + echo a >expect && + test_cmp expect dir +' + test_done -- cgit v1.2.3 From f6039a9423d042d61fb4cfccb395bd04c4bd5322 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:36 +0000 Subject: sparse-checkout: 'set' subcommand The 'git sparse-checkout set' subcommand takes a list of patterns as arguments and writes them to the sparse-checkout file. Then, it updates the working directory using 'git read-tree -mu HEAD'. The 'set' subcommand will replace the entire contents of the sparse-checkout file. The write_patterns_and_update() method is extracted from cmd_sparse_checkout() to make it easier to implement 'add' and/or 'remove' subcommands in the future. If the core.sparseCheckout config setting is disabled, then enable the config setting in the worktree config. If we set the config this way and the sparse-checkout fails, then re-disable the config setting. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 6 +++++ builtin/sparse-checkout.c | 47 +++++++++++++++++++++++++++++++++-- t/t1091-sparse-checkout-builtin.sh | 33 ++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index 491be1345f..ca62669b8c 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -42,6 +42,12 @@ To avoid interfering with other worktrees, it first enables the `extensions.worktreeConfig` setting and makes sure to set the `core.sparseCheckout` setting in the worktree-specific config file. +'set':: + Write a set of patterns to the sparse-checkout file, as given as + a list of arguments following the 'set' subcommand. Update the + working directory to match the new patterns. Enable the + core.sparseCheckout config setting if it is not already enabled. + SPARSE CHECKOUT --------------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index e3418fbe2a..95cbd0a42c 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -8,7 +8,7 @@ #include "strbuf.h" static char const * const builtin_sparse_checkout_usage[] = { - N_("git sparse-checkout (init|list)"), + N_("git sparse-checkout (init|list|set) "), NULL }; @@ -66,7 +66,7 @@ static int update_working_directory(void) argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL); if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { - error(_("failed to update index with new sparse-checkout paths")); + error(_("failed to update index with new sparse-checkout patterns")); result = 1; } @@ -136,6 +136,47 @@ reset_dir: return update_working_directory(); } +static int write_patterns_and_update(struct pattern_list *pl) +{ + char *sparse_filename; + FILE *fp; + + sparse_filename = get_sparse_checkout_filename(); + fp = fopen(sparse_filename, "w"); + write_patterns_to_file(fp, pl); + fclose(fp); + free(sparse_filename); + + return update_working_directory(); +} + +static int sparse_checkout_set(int argc, const char **argv, const char *prefix) +{ + static const char *empty_base = ""; + int i; + struct pattern_list pl; + int result; + int changed_config = 0; + memset(&pl, 0, sizeof(pl)); + + for (i = 1; i < argc; i++) + add_pattern(argv[i], empty_base, 0, &pl, 0); + + if (!core_apply_sparse_checkout) { + set_config(MODE_ALL_PATTERNS); + core_apply_sparse_checkout = 1; + changed_config = 1; + } + + result = write_patterns_and_update(&pl); + + if (result && changed_config) + set_config(MODE_NO_PATTERNS); + + clear_pattern_list(&pl); + return result; +} + int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) { static struct option builtin_sparse_checkout_options[] = { @@ -158,6 +199,8 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) return sparse_checkout_list(argc, argv); if (!strcmp(argv[0], "init")) return sparse_checkout_init(argc, argv); + if (!strcmp(argv[0], "set")) + return sparse_checkout_set(argc, argv, prefix); } usage_with_options(builtin_sparse_checkout_usage, diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 78c20cb7e3..72d8bc5c25 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -95,4 +95,37 @@ test_expect_success 'clone --sparse' ' test_cmp expect dir ' +test_expect_success 'set enables config' ' + git init empty-config && + ( + cd empty-config && + test_commit test file && + test_path_is_missing .git/config.worktree && + test_must_fail git sparse-checkout set nothing && + test_path_is_file .git/config.worktree && + test_must_fail git config core.sparseCheckout && + git sparse-checkout set "/*" && + test_cmp_config true core.sparseCheckout + ) +' + +test_expect_success 'set sparse-checkout using builtin' ' + git -C repo sparse-checkout set "/*" "!/*/" "*folder*" && + cat >expect <<-EOF && + /* + !/*/ + *folder* + EOF + git -C repo sparse-checkout list >actual && + test_cmp expect actual && + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + test_done -- cgit v1.2.3 From 7bffca95ea1ca4f55663374ea9b929b9df5be04b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:37 +0000 Subject: sparse-checkout: add '--stdin' option to set subcommand The 'git sparse-checkout set' subcommand takes a list of patterns and places them in the sparse-checkout file. Then, it updates the working directory to match those patterns. For a large list of patterns, the command-line call can get very cumbersome. Add a '--stdin' option to instead read patterns over standard in. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 3 +++ builtin/sparse-checkout.c | 34 ++++++++++++++++++++++++++++++++-- t/t1091-sparse-checkout-builtin.sh | 20 ++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index ca62669b8c..a724eae09c 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -47,6 +47,9 @@ To avoid interfering with other worktrees, it first enables the a list of arguments following the 'set' subcommand. Update the working directory to match the new patterns. Enable the core.sparseCheckout config setting if it is not already enabled. ++ +When the `--stdin` option is provided, the patterns are read from +standard in as a newline-delimited list instead of from the arguments. SPARSE CHECKOUT --------------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 95cbd0a42c..82bff0020d 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -150,6 +150,15 @@ static int write_patterns_and_update(struct pattern_list *pl) return update_working_directory(); } +static char const * const builtin_sparse_checkout_set_usage[] = { + N_("git sparse-checkout set (--stdin | )"), + NULL +}; + +static struct sparse_checkout_set_opts { + int use_stdin; +} set_opts; + static int sparse_checkout_set(int argc, const char **argv, const char *prefix) { static const char *empty_base = ""; @@ -157,10 +166,31 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) struct pattern_list pl; int result; int changed_config = 0; + + static struct option builtin_sparse_checkout_set_options[] = { + OPT_BOOL(0, "stdin", &set_opts.use_stdin, + N_("read patterns from standard in")), + OPT_END(), + }; + memset(&pl, 0, sizeof(pl)); - for (i = 1; i < argc; i++) - add_pattern(argv[i], empty_base, 0, &pl, 0); + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_set_options, + builtin_sparse_checkout_set_usage, + PARSE_OPT_KEEP_UNKNOWN); + + if (set_opts.use_stdin) { + struct strbuf line = STRBUF_INIT; + + while (!strbuf_getline(&line, stdin)) { + char *buf = strbuf_detach(&line, NULL); + add_pattern(buf, empty_base, 0, &pl, 0); + } + } else { + for (i = 0; i < argc; i++) + add_pattern(argv[i], empty_base, 0, &pl, 0); + } if (!core_apply_sparse_checkout) { set_config(MODE_ALL_PATTERNS); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 72d8bc5c25..07e73b4674 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -128,4 +128,24 @@ test_expect_success 'set sparse-checkout using builtin' ' test_cmp expect dir ' +test_expect_success 'set sparse-checkout using --stdin' ' + cat >expect <<-EOF && + /* + !/*/ + /folder1/ + /folder2/ + EOF + git -C repo sparse-checkout set --stdin actual && + test_cmp expect actual && + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + test_done -- cgit v1.2.3 From 72918c1ad91504f56c395cc91c5072651125662a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:38 +0000 Subject: sparse-checkout: create 'disable' subcommand The instructions for disabling a sparse-checkout to a full working directory are complicated and non-intuitive. Add a subcommand, 'git sparse-checkout disable', to perform those steps for the user. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 27 ++++++++++++--------------- builtin/sparse-checkout.c | 26 +++++++++++++++++++++++++- t/t1091-sparse-checkout-builtin.sh | 15 +++++++++++++++ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index a724eae09c..c2cb19f80d 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -51,6 +51,10 @@ To avoid interfering with other worktrees, it first enables the When the `--stdin` option is provided, the patterns are read from standard in as a newline-delimited list instead of from the arguments. +'disable':: + Remove the sparse-checkout file, set `core.sparseCheckout` to + `false`, and restore the working directory to include all files. + SPARSE CHECKOUT --------------- @@ -68,6 +72,14 @@ directory, it updates the skip-worktree bits in the index based on this file. The files matching the patterns in the file will appear in the working directory, and the rest will not. +To enable the sparse-checkout feature, run `git sparse-checkout init` to +initialize a simple sparse-checkout file and enable the `core.sparseCheckout` +config setting. Then, run `git sparse-checkout set` to modify the patterns in +the sparse-checkout file. + +To repopulate the working directory with all files, use the +`git sparse-checkout disable` command. + ## FULL PATTERN SET By default, the sparse-checkout file uses the same syntax as `.gitignore` @@ -82,21 +94,6 @@ using negative patterns. For example, to remove the file `unwanted`: !unwanted ---------------- -Another tricky thing is fully repopulating the working directory when you -no longer want sparse checkout. You cannot just disable "sparse -checkout" because skip-worktree bits are still in the index and your working -directory is still sparsely populated. You should re-populate the working -directory with the `$GIT_DIR/info/sparse-checkout` file content as -follows: - ----------------- -/* ----------------- - -Then you can disable sparse checkout. Sparse checkout support in 'git -checkout' and similar commands is disabled by default. You need to -set `core.sparseCheckout` to `true` in order to have sparse checkout -support. SEE ALSO -------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 82bff0020d..e3a8d3460a 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -8,7 +8,7 @@ #include "strbuf.h" static char const * const builtin_sparse_checkout_usage[] = { - N_("git sparse-checkout (init|list|set) "), + N_("git sparse-checkout (init|list|set|disable) "), NULL }; @@ -207,6 +207,28 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) return result; } +static int sparse_checkout_disable(int argc, const char **argv) +{ + char *sparse_filename; + FILE *fp; + + if (set_config(MODE_ALL_PATTERNS)) + die(_("failed to change config")); + + sparse_filename = get_sparse_checkout_filename(); + fp = xfopen(sparse_filename, "w"); + fprintf(fp, "/*\n"); + fclose(fp); + + if (update_working_directory()) + die(_("error while refreshing working directory")); + + unlink(sparse_filename); + free(sparse_filename); + + return set_config(MODE_NO_PATTERNS); +} + int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) { static struct option builtin_sparse_checkout_options[] = { @@ -231,6 +253,8 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) return sparse_checkout_init(argc, argv); if (!strcmp(argv[0], "set")) return sparse_checkout_set(argc, argv, prefix); + if (!strcmp(argv[0], "disable")) + return sparse_checkout_disable(argc, argv); } usage_with_options(builtin_sparse_checkout_usage, diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 07e73b4674..c385c62c92 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -148,4 +148,19 @@ test_expect_success 'set sparse-checkout using --stdin' ' test_cmp expect dir ' +test_expect_success 'sparse-checkout disable' ' + git -C repo sparse-checkout disable && + test_path_is_missing repo/.git/info/sparse-checkout && + git -C repo config --list >config && + test_must_fail git config core.sparseCheckout && + ls repo >dir && + cat >expect <<-EOF && + a + deep + folder1 + folder2 + EOF + test_cmp expect dir +' + test_done -- cgit v1.2.3 From e6152e35ff287ab58e2c17065f02cb1be9f4a0aa Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Nov 2019 22:04:39 +0000 Subject: trace2: add region in clear_ce_flags When Git updates the working directory with the sparse-checkout feature enabled, the unpack_trees() method calls clear_ce_flags() to update the skip-wortree bits on the cache entries. This check can be expensive, depending on the patterns used. Add trace2 regions around the method, including some flag information, so we can get granular performance data during experiments. This data will be used to measure improvements to the pattern-matching algorithms for sparse-checkout. Signed-off-by: Jeff Hostetler Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- unpack-trees.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/unpack-trees.c b/unpack-trees.c index 33ea7810d8..01a05ff66d 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1407,15 +1407,23 @@ static int clear_ce_flags(struct index_state *istate, struct pattern_list *pl) { static struct strbuf prefix = STRBUF_INIT; + char label[100]; + int rval; strbuf_reset(&prefix); - return clear_ce_flags_1(istate, + xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)", + (unsigned long)select_mask, (unsigned long)clear_mask); + trace2_region_enter("unpack_trees", label, the_repository); + rval = clear_ce_flags_1(istate, istate->cache, istate->cache_nr, &prefix, select_mask, clear_mask, pl, 0); + trace2_region_leave("unpack_trees", label, the_repository); + + return rval; } /* -- cgit v1.2.3 From 879321eb0bec25779386445d65242452825155be Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:40 +0000 Subject: sparse-checkout: add 'cone' mode The sparse-checkout feature can have quadratic performance as the number of patterns and number of entries in the index grow. If there are 1,000 patterns and 1,000,000 entries, this time can be very significant. Create a new Boolean config option, core.sparseCheckoutCone, to indicate that we expect the sparse-checkout file to contain a more limited set of patterns. This is a separate config setting from core.sparseCheckout to avoid breaking older clients by introducing a tri-state option. The config option does nothing right now, but will be expanded upon in a later commit. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/core.txt | 10 +++++-- Documentation/git-sparse-checkout.txt | 55 ++++++++++++++++++++++++++++++++++- cache.h | 4 ++- config.c | 5 ++++ environment.c | 1 + t/t1091-sparse-checkout-builtin.sh | 14 +++++++++ 6 files changed, 85 insertions(+), 4 deletions(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 852d2ba37a..bdbbee58b9 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -593,8 +593,14 @@ core.multiPackIndex:: multi-pack-index design document]. core.sparseCheckout:: - Enable "sparse checkout" feature. See section "Sparse checkout" in - linkgit:git-read-tree[1] for more information. + Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] + for more information. + +core.sparseCheckoutCone:: + Enables the "cone mode" of the sparse checkout feature. When the + sparse-checkout file contains a limited set of patterns, then this + mode provides significant performance advantages. See + linkgit:git-sparse-checkout[1] for more information. core.abbrev:: Set the length object names are abbreviated to. If diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index c2cb19f80d..8535f0cf40 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -80,7 +80,9 @@ the sparse-checkout file. To repopulate the working directory with all files, use the `git sparse-checkout disable` command. -## FULL PATTERN SET + +FULL PATTERN SET +---------------- By default, the sparse-checkout file uses the same syntax as `.gitignore` files. @@ -95,6 +97,57 @@ using negative patterns. For example, to remove the file `unwanted`: ---------------- +CONE PATTERN SET +---------------- + +The full pattern set allows for arbitrary pattern matches and complicated +inclusion/exclusion rules. These can result in O(N*M) pattern matches when +updating the index, where N is the number of patterns and M is the number +of paths in the index. To combat this performance issue, a more restricted +pattern set is allowed when `core.spareCheckoutCone` is enabled. + +The accepted patterns in the cone pattern set are: + +1. *Recursive:* All paths inside a directory are included. + +2. *Parent:* All files immediately inside a directory are included. + +In addition to the above two patterns, we also expect that all files in the +root directory are included. If a recursive pattern is added, then all +leading directories are added as parent patterns. + +By default, when running `git sparse-checkout init`, the root directory is +added as a parent pattern. At this point, the sparse-checkout file contains +the following patterns: + +---------------- +/* +!/*/ +---------------- + +This says "include everything in root, but nothing two levels below root." +If we then add the folder `A/B/C` as a recursive pattern, the folders `A` and +`A/B` are added as parent patterns. The resulting sparse-checkout file is +now + +---------------- +/* +!/*/ +/A/ +!/A/*/ +/A/B/ +!/A/B/*/ +/A/B/C/ +---------------- + +Here, order matters, so the negative patterns are overridden by the positive +patterns that appear lower in the file. + +If `core.sparseCheckoutCone=true`, then Git will parse the sparse-checkout file +expecting patterns of these types. Git will warn if the patterns do not match. +If the patterns do match the expected format, then Git will use faster hash- +based algorithms to compute inclusion in the sparse-checkout. + SEE ALSO -------- diff --git a/cache.h b/cache.h index 04cabaac11..4980ee198e 100644 --- a/cache.h +++ b/cache.h @@ -918,12 +918,14 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; -extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; extern const char *core_fsmonitor; +int core_apply_sparse_checkout; +int core_sparse_checkout_cone; + /* * Include broken refs in all ref iterations, which will * generally choke dangerous operations rather than letting diff --git a/config.c b/config.c index e7052b3977..d75f88ca0c 100644 --- a/config.c +++ b/config.c @@ -1364,6 +1364,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.sparsecheckoutcone")) { + core_sparse_checkout_cone = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.precomposeunicode")) { precomposed_unicode = git_config_bool(var, value); return 0; diff --git a/environment.c b/environment.c index efa072680a..2a1a866659 100644 --- a/environment.c +++ b/environment.c @@ -67,6 +67,7 @@ enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE; char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; +int core_sparse_checkout_cone; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index c385c62c92..0b2715db52 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -148,6 +148,20 @@ test_expect_success 'set sparse-checkout using --stdin' ' test_cmp expect dir ' +test_expect_success 'cone mode: match patterns' ' + git -C repo config --worktree core.sparseCheckoutCone true && + rm -rf repo/a repo/folder1 repo/folder2 && + git -C repo read-tree -mu HEAD && + git -C repo reset --hard && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + test_expect_success 'sparse-checkout disable' ' git -C repo sparse-checkout disable && test_path_is_missing repo/.git/info/sparse-checkout && -- cgit v1.2.3 From 96cc8ab5318cd57c8bc203b8f064b35883b2386f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:41 +0000 Subject: sparse-checkout: use hashmaps for cone patterns The parent and recursive patterns allowed by the "cone mode" option in sparse-checkout are restrictive enough that we can avoid using the regex parsing. Everything is based on prefix matches, so we can use hashsets to store the prefixes from the sparse-checkout file. When checking a path, we can strip path entries from the path and check the hashset for an exact match. As a test, I created a cone-mode sparse-checkout file for the Linux repository that actually includes every file. This was constructed by taking every folder in the Linux repo and creating the pattern pairs here: /$folder/ !/$folder/*/ This resulted in a sparse-checkout file sith 8,296 patterns. Running 'git read-tree -mu HEAD' on this file had the following performance: core.sparseCheckout=false: 0.21 s (0.00 s) core.sparseCheckout=true: 3.75 s (3.50 s) core.sparseCheckoutCone=true: 0.23 s (0.01 s) The times in parentheses above correspond to the time spent in the first clear_ce_flags() call, according to the trace2 performance traces. While this example is contrived, it demonstrates how these patterns can slow the sparse-checkout feature. Helped-by: Eric Wong Helped-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 207 +++++++++++++++++++++++++++++++++++-- dir.h | 31 ++++++ t/t1091-sparse-checkout-builtin.sh | 11 +- unpack-trees.c | 1 + 4 files changed, 241 insertions(+), 9 deletions(-) diff --git a/dir.c b/dir.c index 61f559f980..dfabf9982f 100644 --- a/dir.c +++ b/dir.c @@ -611,6 +611,150 @@ void parse_path_pattern(const char **pattern, *patternlen = len; } +static int pl_hashmap_cmp(const void *unused_cmp_data, + const struct hashmap_entry *a, + const struct hashmap_entry *b, + const void *key) +{ + const struct pattern_entry *ee1 = + container_of(a, struct pattern_entry, ent); + const struct pattern_entry *ee2 = + container_of(b, struct pattern_entry, ent); + + size_t min_len = ee1->patternlen <= ee2->patternlen + ? ee1->patternlen + : ee2->patternlen; + + return strncmp(ee1->pattern, ee2->pattern, min_len); +} + +static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given) +{ + struct pattern_entry *translated; + char *truncated; + char *data = NULL; + + if (!pl->use_cone_patterns) + return; + + if (given->flags & PATTERN_FLAG_NEGATIVE && + given->flags & PATTERN_FLAG_MUSTBEDIR && + !strcmp(given->pattern, "/*")) { + pl->full_cone = 0; + return; + } + + if (!given->flags && !strcmp(given->pattern, "/*")) { + pl->full_cone = 1; + return; + } + + if (given->patternlen > 2 && + !strcmp(given->pattern + given->patternlen - 2, "/*")) { + if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { + /* Not a cone pattern. */ + pl->use_cone_patterns = 0; + warning(_("unrecognized pattern: '%s'"), given->pattern); + goto clear_hashmaps; + } + + truncated = xstrdup(given->pattern); + truncated[given->patternlen - 2] = 0; + + translated = xmalloc(sizeof(struct pattern_entry)); + translated->pattern = truncated; + translated->patternlen = given->patternlen - 2; + hashmap_entry_init(&translated->ent, + memhash(translated->pattern, translated->patternlen)); + + if (!hashmap_get_entry(&pl->recursive_hashmap, + translated, ent, NULL)) { + /* We did not see the "parent" included */ + warning(_("unrecognized negative pattern: '%s'"), + given->pattern); + free(truncated); + free(translated); + goto clear_hashmaps; + } + + hashmap_add(&pl->parent_hashmap, &translated->ent); + hashmap_remove(&pl->recursive_hashmap, &translated->ent, &data); + free(data); + return; + } + + if (given->flags & PATTERN_FLAG_NEGATIVE) { + warning(_("unrecognized negative pattern: '%s'"), + given->pattern); + goto clear_hashmaps; + } + + translated = xmalloc(sizeof(struct pattern_entry)); + + translated->pattern = xstrdup(given->pattern); + translated->patternlen = given->patternlen; + hashmap_entry_init(&translated->ent, + memhash(translated->pattern, translated->patternlen)); + + hashmap_add(&pl->recursive_hashmap, &translated->ent); + + if (hashmap_get_entry(&pl->parent_hashmap, translated, ent, NULL)) { + /* we already included this at the parent level */ + warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"), + given->pattern); + hashmap_remove(&pl->parent_hashmap, &translated->ent, &data); + free(data); + free(translated); + } + + return; + +clear_hashmaps: + warning(_("disabling cone pattern matching")); + hashmap_free_entries(&pl->parent_hashmap, struct pattern_entry, ent); + hashmap_free_entries(&pl->recursive_hashmap, struct pattern_entry, ent); + pl->use_cone_patterns = 0; +} + +static int hashmap_contains_path(struct hashmap *map, + struct strbuf *pattern) +{ + struct pattern_entry p; + + /* Check straight mapping */ + p.pattern = pattern->buf; + p.patternlen = pattern->len; + hashmap_entry_init(&p.ent, memhash(p.pattern, p.patternlen)); + return !!hashmap_get_entry(map, &p, ent, NULL); +} + +int hashmap_contains_parent(struct hashmap *map, + const char *path, + struct strbuf *buffer) +{ + char *slash_pos; + + strbuf_setlen(buffer, 0); + + if (path[0] != '/') + strbuf_addch(buffer, '/'); + + strbuf_addstr(buffer, path); + + slash_pos = strrchr(buffer->buf, '/'); + + while (slash_pos > buffer->buf) { + strbuf_setlen(buffer, slash_pos - buffer->buf); + + if (hashmap_contains_path(map, buffer)) + return 1; + + slash_pos = strrchr(buffer->buf, '/'); + } + + return 0; +} + void add_pattern(const char *string, const char *base, int baselen, struct pattern_list *pl, int srcpos) { @@ -635,6 +779,8 @@ void add_pattern(const char *string, const char *base, ALLOC_GROW(pl->patterns, pl->nr + 1, pl->alloc); pl->patterns[pl->nr++] = pattern; pattern->pl = pl; + + add_pattern_to_hashsets(pl, pattern); } static int read_skip_worktree_file_from_index(const struct index_state *istate, @@ -860,6 +1006,9 @@ static int add_patterns_from_buffer(char *buf, size_t size, int i, lineno = 1; char *entry; + hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0); + hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0); + pl->filebuf = buf; if (skip_utf8_bom(&buf, size)) @@ -1096,16 +1245,58 @@ enum pattern_match_result path_matches_pattern_list( struct index_state *istate) { struct path_pattern *pattern; - pattern = last_matching_pattern_from_list(pathname, pathlen, basename, - dtype, pl, istate); - if (pattern) { - if (pattern->flags & PATTERN_FLAG_NEGATIVE) - return NOT_MATCHED; - else - return MATCHED; + struct strbuf parent_pathname = STRBUF_INIT; + int result = NOT_MATCHED; + const char *slash_pos; + + if (!pl->use_cone_patterns) { + pattern = last_matching_pattern_from_list(pathname, pathlen, basename, + dtype, pl, istate); + if (pattern) { + if (pattern->flags & PATTERN_FLAG_NEGATIVE) + return NOT_MATCHED; + else + return MATCHED; + } + + return UNDECIDED; + } + + if (pl->full_cone) + return MATCHED; + + strbuf_addch(&parent_pathname, '/'); + strbuf_add(&parent_pathname, pathname, pathlen); + + if (hashmap_contains_path(&pl->recursive_hashmap, + &parent_pathname)) { + result = MATCHED; + goto done; + } + + slash_pos = strrchr(parent_pathname.buf, '/'); + + if (slash_pos == parent_pathname.buf) { + /* include every file in root */ + result = MATCHED; + goto done; } - return UNDECIDED; + strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf); + + if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) { + result = MATCHED; + goto done; + } + + if (hashmap_contains_parent(&pl->recursive_hashmap, + pathname, + &parent_pathname)) + result = MATCHED; + +done: + strbuf_release(&parent_pathname); + return result; } static struct path_pattern *last_matching_pattern_from_lists( diff --git a/dir.h b/dir.h index 2fbdef014f..f8edbca72b 100644 --- a/dir.h +++ b/dir.h @@ -4,6 +4,7 @@ /* See Documentation/technical/api-directory-listing.txt */ #include "cache.h" +#include "hashmap.h" #include "strbuf.h" struct dir_entry { @@ -37,6 +38,13 @@ struct path_pattern { int srcpos; }; +/* used for hashmaps for cone patterns */ +struct pattern_entry { + struct hashmap_entry ent; + char *pattern; + size_t patternlen; +}; + /* * Each excludes file will be parsed into a fresh exclude_list which * is appended to the relevant exclude_list_group (either EXC_DIRS or @@ -55,6 +63,26 @@ struct pattern_list { const char *src; struct path_pattern **patterns; + + /* + * While scanning the excludes, we attempt to match the patterns + * with a more restricted set that allows us to use hashsets for + * matching logic, which is faster than the linear lookup in the + * excludes array above. If non-zero, that check succeeded. + */ + unsigned use_cone_patterns; + unsigned full_cone; + + /* + * Stores paths where everything starting with those paths + * is included. + */ + struct hashmap recursive_hashmap; + + /* + * Used to check single-level parents of blobs. + */ + struct hashmap parent_hashmap; }; /* @@ -271,6 +299,9 @@ int is_excluded(struct dir_struct *dir, struct index_state *istate, const char *name, int *dtype); +int hashmap_contains_parent(struct hashmap *map, + const char *path, + struct strbuf *buffer); struct pattern_list *add_pattern_list(struct dir_struct *dir, int group_type, const char *src); int add_patterns_from_file_to_list(const char *fname, const char *base, int baselen, diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 0b2715db52..1ed003ac1d 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -151,7 +151,8 @@ test_expect_success 'set sparse-checkout using --stdin' ' test_expect_success 'cone mode: match patterns' ' git -C repo config --worktree core.sparseCheckoutCone true && rm -rf repo/a repo/folder1 repo/folder2 && - git -C repo read-tree -mu HEAD && + git -C repo read-tree -mu HEAD 2>err && + test_i18ngrep ! "disabling cone patterns" err && git -C repo reset --hard && ls repo >dir && cat >expect <<-EOF && @@ -162,6 +163,14 @@ test_expect_success 'cone mode: match patterns' ' test_cmp expect dir ' +test_expect_success 'cone mode: warn on bad pattern' ' + test_when_finished mv sparse-checkout repo/.git/info/ && + cp repo/.git/info/sparse-checkout . && + echo "!/deep/deeper/*" >>repo/.git/info/sparse-checkout && + git -C repo read-tree -mu HEAD 2>err && + test_i18ngrep "unrecognized negative pattern" err +' + test_expect_success 'sparse-checkout disable' ' git -C repo sparse-checkout disable && test_path_is_missing repo/.git/info/sparse-checkout && diff --git a/unpack-trees.c b/unpack-trees.c index 01a05ff66d..a90d71845d 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1482,6 +1482,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->skip_sparse_checkout = 1; if (!o->skip_sparse_checkout) { char *sparse = git_pathdup("info/sparse-checkout"); + pl.use_cone_patterns = core_sparse_checkout_cone; if (add_patterns_from_file_to_list(sparse, "", 0, &pl, NULL) < 0) o->skip_sparse_checkout = 1; else -- cgit v1.2.3 From af09ce24a9c79f6efc12d1d8f1052e1d1dbe5016 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:42 +0000 Subject: sparse-checkout: init and set in cone mode To make the cone pattern set easy to use, update the behavior of 'git sparse-checkout (init|set)'. Add '--cone' flag to 'git sparse-checkout init' to set the config option 'core.sparseCheckoutCone=true'. When running 'git sparse-checkout set' in cone mode, a user only needs to supply a list of recursive folder matches. Git will automatically add the necessary parent matches for the leading directories. When testing 'git sparse-checkout set' in cone mode, check the error stream to ensure we do not see any errors. Specifically, we want to avoid the warning that the patterns do not match the cone-mode patterns. Helped-by: Eric Wong Helped-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 163 +++++++++++++++++++++++++++++++++---- dir.c | 8 +- dir.h | 4 + t/t1091-sparse-checkout-builtin.sh | 51 ++++++++++++ 4 files changed, 206 insertions(+), 20 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index e3a8d3460a..85cc801f03 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -6,6 +6,7 @@ #include "repository.h" #include "run-command.h" #include "strbuf.h" +#include "string-list.h" static char const * const builtin_sparse_checkout_usage[] = { N_("git sparse-checkout (init|list|set|disable) "), @@ -74,9 +75,65 @@ static int update_working_directory(void) return result; } +static void write_cone_to_file(FILE *fp, struct pattern_list *pl) +{ + int i; + struct pattern_entry *pe; + struct hashmap_iter iter; + struct string_list sl = STRING_LIST_INIT_DUP; + + hashmap_for_each_entry(&pl->parent_hashmap, &iter, pe, ent) + string_list_insert(&sl, pe->pattern); + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + fprintf(fp, "/*\n!/*/\n"); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + + if (strlen(pattern)) + fprintf(fp, "%s/\n!%s/*/\n", pattern, pattern); + } + + string_list_clear(&sl, 0); + + hashmap_for_each_entry(&pl->recursive_hashmap, &iter, pe, ent) + string_list_insert(&sl, pe->pattern); + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + fprintf(fp, "%s/\n", pattern); + } +} + +static int write_patterns_and_update(struct pattern_list *pl) +{ + char *sparse_filename; + FILE *fp; + + sparse_filename = get_sparse_checkout_filename(); + fp = fopen(sparse_filename, "w"); + + if (core_sparse_checkout_cone) + write_cone_to_file(fp, pl); + else + write_patterns_to_file(fp, pl); + + fclose(fp); + free(sparse_filename); + + return update_working_directory(); +} + enum sparse_checkout_mode { MODE_NO_PATTERNS = 0, MODE_ALL_PATTERNS = 1, + MODE_CONE_PATTERNS = 2, }; static int set_config(enum sparse_checkout_mode mode) @@ -93,9 +150,22 @@ static int set_config(enum sparse_checkout_mode mode) "core.sparseCheckout", mode ? "true" : NULL); + git_config_set_in_file_gently(config_path, + "core.sparseCheckoutCone", + mode == MODE_CONE_PATTERNS ? "true" : NULL); + return 0; } +static char const * const builtin_sparse_checkout_init_usage[] = { + N_("git sparse-checkout init [--cone]"), + NULL +}; + +static struct sparse_checkout_init_opts { + int cone_mode; +} init_opts; + static int sparse_checkout_init(int argc, const char **argv) { struct pattern_list pl; @@ -103,8 +173,21 @@ static int sparse_checkout_init(int argc, const char **argv) FILE *fp; int res; struct object_id oid; + int mode; - if (set_config(MODE_ALL_PATTERNS)) + static struct option builtin_sparse_checkout_init_options[] = { + OPT_BOOL(0, "cone", &init_opts.cone_mode, + N_("initialize the sparse-checkout in cone mode")), + OPT_END(), + }; + + argc = parse_options(argc, argv, NULL, + builtin_sparse_checkout_init_options, + builtin_sparse_checkout_init_usage, 0); + + mode = init_opts.cone_mode ? MODE_CONE_PATTERNS : MODE_ALL_PATTERNS; + + if (set_config(mode)) return 1; memset(&pl, 0, sizeof(pl)); @@ -136,18 +219,47 @@ reset_dir: return update_working_directory(); } -static int write_patterns_and_update(struct pattern_list *pl) +static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path) { - char *sparse_filename; - FILE *fp; + struct pattern_entry *e = xmalloc(sizeof(*e)); + e->patternlen = path->len; + e->pattern = strbuf_detach(path, NULL); + hashmap_entry_init(&e->ent, memhash(e->pattern, e->patternlen)); - sparse_filename = get_sparse_checkout_filename(); - fp = fopen(sparse_filename, "w"); - write_patterns_to_file(fp, pl); - fclose(fp); - free(sparse_filename); + hashmap_add(&pl->recursive_hashmap, &e->ent); - return update_working_directory(); + while (e->patternlen) { + char *slash = strrchr(e->pattern, '/'); + char *oldpattern = e->pattern; + size_t newlen; + + if (slash == e->pattern) + break; + + newlen = slash - e->pattern; + e = xmalloc(sizeof(struct pattern_entry)); + e->patternlen = newlen; + e->pattern = xstrndup(oldpattern, newlen); + hashmap_entry_init(&e->ent, memhash(e->pattern, e->patternlen)); + + if (!hashmap_get_entry(&pl->parent_hashmap, e, ent, NULL)) + hashmap_add(&pl->parent_hashmap, &e->ent); + } +} + +static void strbuf_to_cone_pattern(struct strbuf *line, struct pattern_list *pl) +{ + strbuf_trim(line); + + strbuf_trim_trailing_dir_sep(line); + + if (!line->len) + return; + + if (line->buf[0] != '/') + strbuf_insert(line, 0, "/", 1); + + insert_recursive_pattern(pl, line); } static char const * const builtin_sparse_checkout_set_usage[] = { @@ -180,16 +292,35 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) builtin_sparse_checkout_set_usage, PARSE_OPT_KEEP_UNKNOWN); - if (set_opts.use_stdin) { + if (core_sparse_checkout_cone) { struct strbuf line = STRBUF_INIT; - while (!strbuf_getline(&line, stdin)) { - char *buf = strbuf_detach(&line, NULL); - add_pattern(buf, empty_base, 0, &pl, 0); + hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); + hashmap_init(&pl.parent_hashmap, pl_hashmap_cmp, NULL, 0); + + if (set_opts.use_stdin) { + while (!strbuf_getline(&line, stdin)) + strbuf_to_cone_pattern(&line, &pl); + } else { + for (i = 0; i < argc; i++) { + strbuf_setlen(&line, 0); + strbuf_addstr(&line, argv[i]); + strbuf_to_cone_pattern(&line, &pl); + } } } else { - for (i = 0; i < argc; i++) - add_pattern(argv[i], empty_base, 0, &pl, 0); + if (set_opts.use_stdin) { + struct strbuf line = STRBUF_INIT; + + while (!strbuf_getline(&line, stdin)) { + size_t len; + char *buf = strbuf_detach(&line, &len); + add_pattern(buf, empty_base, 0, &pl, 0); + } + } else { + for (i = 0; i < argc; i++) + add_pattern(argv[i], empty_base, 0, &pl, 0); + } } if (!core_apply_sparse_checkout) { diff --git a/dir.c b/dir.c index dfabf9982f..35c1ca9e24 100644 --- a/dir.c +++ b/dir.c @@ -611,10 +611,10 @@ void parse_path_pattern(const char **pattern, *patternlen = len; } -static int pl_hashmap_cmp(const void *unused_cmp_data, - const struct hashmap_entry *a, - const struct hashmap_entry *b, - const void *key) +int pl_hashmap_cmp(const void *unused_cmp_data, + const struct hashmap_entry *a, + const struct hashmap_entry *b, + const void *key) { const struct pattern_entry *ee1 = container_of(a, struct pattern_entry, ent); diff --git a/dir.h b/dir.h index f8edbca72b..8e232085cd 100644 --- a/dir.h +++ b/dir.h @@ -299,6 +299,10 @@ int is_excluded(struct dir_struct *dir, struct index_state *istate, const char *name, int *dtype); +int pl_hashmap_cmp(const void *unused_cmp_data, + const struct hashmap_entry *a, + const struct hashmap_entry *b, + const void *key); int hashmap_contains_parent(struct hashmap *map, const char *path, struct strbuf *buffer); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 1ed003ac1d..fbd46c3f61 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -186,4 +186,55 @@ test_expect_success 'sparse-checkout disable' ' test_cmp expect dir ' +test_expect_success 'cone mode: init and set' ' + git -C repo sparse-checkout init --cone && + git -C repo config --list >config && + test_i18ngrep "core.sparsecheckoutcone=true" config && + ls repo >dir && + echo a >expect && + test_cmp expect dir && + git -C repo sparse-checkout set deep/deeper1/deepest/ 2>err && + test_must_be_empty err && + ls repo >dir && + cat >expect <<-EOF && + a + deep + EOF + test_cmp expect dir && + ls repo/deep >dir && + cat >expect <<-EOF && + a + deeper1 + EOF + test_cmp expect dir && + ls repo/deep/deeper1 >dir && + cat >expect <<-EOF && + a + deepest + EOF + test_cmp expect dir && + cat >expect <<-EOF && + /* + !/*/ + /deep/ + !/deep/*/ + /deep/deeper1/ + !/deep/deeper1/*/ + /deep/deeper1/deepest/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + git -C repo sparse-checkout set --stdin 2>err <<-EOF && + folder1 + folder2 + EOF + test_must_be_empty err && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + ls repo >dir && + test_cmp expect dir +' + test_done -- cgit v1.2.3 From eb42feca974a333e58c2ca0f3cfa8bf0dd421402 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:43 +0000 Subject: unpack-trees: hash less in cone mode The sparse-checkout feature in "cone mode" can use the fact that the recursive patterns are "connected" to the root via parent patterns to decide if a directory is entirely contained in the sparse-checkout or entirely removed. In these cases, we can skip hashing the paths within those directories and simply set the skipworktree bit to the correct value. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 4 ++-- dir.h | 1 + unpack-trees.c | 38 +++++++++++++++++++++++--------------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/dir.c b/dir.c index 35c1ca9e24..2ef92a50a0 100644 --- a/dir.c +++ b/dir.c @@ -1270,7 +1270,7 @@ enum pattern_match_result path_matches_pattern_list( if (hashmap_contains_path(&pl->recursive_hashmap, &parent_pathname)) { - result = MATCHED; + result = MATCHED_RECURSIVE; goto done; } @@ -1292,7 +1292,7 @@ enum pattern_match_result path_matches_pattern_list( if (hashmap_contains_parent(&pl->recursive_hashmap, pathname, &parent_pathname)) - result = MATCHED; + result = MATCHED_RECURSIVE; done: strbuf_release(&parent_pathname); diff --git a/dir.h b/dir.h index 8e232085cd..77a43dbf89 100644 --- a/dir.h +++ b/dir.h @@ -264,6 +264,7 @@ enum pattern_match_result { UNDECIDED = -1, NOT_MATCHED = 0, MATCHED = 1, + MATCHED_RECURSIVE = 2, }; /* diff --git a/unpack-trees.c b/unpack-trees.c index a90d71845d..c0dca20865 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1283,15 +1283,17 @@ static int clear_ce_flags_dir(struct index_state *istate, struct cache_entry **cache_end; int dtype = DT_DIR; int rc; - enum pattern_match_result ret; - ret = path_matches_pattern_list(prefix->buf, prefix->len, - basename, &dtype, pl, istate); + enum pattern_match_result ret, orig_ret; + orig_ret = path_matches_pattern_list(prefix->buf, prefix->len, + basename, &dtype, pl, istate); strbuf_addch(prefix, '/'); /* If undecided, use matching result of parent dir in defval */ - if (ret == UNDECIDED) + if (orig_ret == UNDECIDED) ret = default_match; + else + ret = orig_ret; for (cache_end = cache; cache_end != cache + nr; cache_end++) { struct cache_entry *ce = *cache_end; @@ -1299,17 +1301,23 @@ static int clear_ce_flags_dir(struct index_state *istate, break; } - /* - * TODO: check pl, if there are no patterns that may conflict - * with ret (iow, we know in advance the incl/excl - * decision for the entire directory), clear flag here without - * calling clear_ce_flags_1(). That function will call - * the expensive path_matches_pattern_list() on every entry. - */ - rc = clear_ce_flags_1(istate, cache, cache_end - cache, - prefix, - select_mask, clear_mask, - pl, ret); + if (pl->use_cone_patterns && orig_ret == MATCHED_RECURSIVE) { + struct cache_entry **ce = cache; + rc = (cache_end - cache) / sizeof(struct cache_entry *); + + while (ce < cache_end) { + (*ce)->ce_flags &= ~clear_mask; + ce++; + } + } else if (pl->use_cone_patterns && orig_ret == NOT_MATCHED) { + rc = (cache_end - cache) / sizeof(struct cache_entry *); + } else { + rc = clear_ce_flags_1(istate, cache, cache_end - cache, + prefix, + select_mask, clear_mask, + pl, ret); + } + strbuf_setlen(prefix, prefix->len - 1); return rc; } -- cgit v1.2.3 From 4dcd4def3ca46fc0e8cb28cbfc815a35dd2c3f1d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:44 +0000 Subject: unpack-trees: add progress to clear_ce_flags() When a large repository has many sparse-checkout patterns, the process for updating the skip-worktree bits can take long enough that a user gets confused why nothing is happening. Update the clear_ce_flags() method to write progress. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache.h | 2 ++ unpack-trees.c | 56 +++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/cache.h b/cache.h index 4980ee198e..d3c89e7a53 100644 --- a/cache.h +++ b/cache.h @@ -304,6 +304,7 @@ static inline unsigned int canon_mode(unsigned int mode) struct split_index; struct untracked_cache; +struct progress; struct index_state { struct cache_entry **cache; @@ -326,6 +327,7 @@ struct index_state { uint64_t fsmonitor_last_update; struct ewah_bitmap *fsmonitor_dirty; struct mem_pool *ce_mem_pool; + struct progress *progress; }; /* Name hashing */ diff --git a/unpack-trees.c b/unpack-trees.c index c0dca20865..8bb684ad62 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1269,7 +1269,8 @@ static int clear_ce_flags_1(struct index_state *istate, struct strbuf *prefix, int select_mask, int clear_mask, struct pattern_list *pl, - enum pattern_match_result default_match); + enum pattern_match_result default_match, + int progress_nr); /* Whole directory matching */ static int clear_ce_flags_dir(struct index_state *istate, @@ -1278,7 +1279,8 @@ static int clear_ce_flags_dir(struct index_state *istate, char *basename, int select_mask, int clear_mask, struct pattern_list *pl, - enum pattern_match_result default_match) + enum pattern_match_result default_match, + int progress_nr) { struct cache_entry **cache_end; int dtype = DT_DIR; @@ -1315,7 +1317,8 @@ static int clear_ce_flags_dir(struct index_state *istate, rc = clear_ce_flags_1(istate, cache, cache_end - cache, prefix, select_mask, clear_mask, - pl, ret); + pl, ret, + progress_nr); } strbuf_setlen(prefix, prefix->len - 1); @@ -1342,7 +1345,8 @@ static int clear_ce_flags_1(struct index_state *istate, struct strbuf *prefix, int select_mask, int clear_mask, struct pattern_list *pl, - enum pattern_match_result default_match) + enum pattern_match_result default_match, + int progress_nr) { struct cache_entry **cache_end = cache + nr; @@ -1356,8 +1360,11 @@ static int clear_ce_flags_1(struct index_state *istate, int len, dtype; enum pattern_match_result ret; + display_progress(istate->progress, progress_nr); + if (select_mask && !(ce->ce_flags & select_mask)) { cache++; + progress_nr++; continue; } @@ -1378,20 +1385,26 @@ static int clear_ce_flags_1(struct index_state *istate, prefix, prefix->buf + prefix->len - len, select_mask, clear_mask, - pl, default_match); + pl, default_match, + progress_nr); /* clear_c_f_dir eats a whole dir already? */ if (processed) { cache += processed; + progress_nr += processed; strbuf_setlen(prefix, prefix->len - len); continue; } strbuf_addch(prefix, '/'); - cache += clear_ce_flags_1(istate, cache, cache_end - cache, - prefix, - select_mask, clear_mask, pl, - default_match); + processed = clear_ce_flags_1(istate, cache, cache_end - cache, + prefix, + select_mask, clear_mask, pl, + default_match, progress_nr); + + cache += processed; + progress_nr += processed; + strbuf_setlen(prefix, prefix->len - len - 1); continue; } @@ -1406,19 +1419,27 @@ static int clear_ce_flags_1(struct index_state *istate, if (ret == MATCHED) ce->ce_flags &= ~clear_mask; cache++; + progress_nr++; } + + display_progress(istate->progress, progress_nr); return nr - (cache_end - cache); } static int clear_ce_flags(struct index_state *istate, int select_mask, int clear_mask, - struct pattern_list *pl) + struct pattern_list *pl, + int show_progress) { static struct strbuf prefix = STRBUF_INIT; char label[100]; int rval; strbuf_reset(&prefix); + if (show_progress) + istate->progress = start_delayed_progress( + _("Updating index flags"), + istate->cache_nr); xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)", (unsigned long)select_mask, (unsigned long)clear_mask); @@ -1428,9 +1449,10 @@ static int clear_ce_flags(struct index_state *istate, istate->cache_nr, &prefix, select_mask, clear_mask, - pl, 0); + pl, 0, 0); trace2_region_leave("unpack_trees", label, the_repository); + stop_progress(&istate->progress); return rval; } @@ -1439,7 +1461,8 @@ static int clear_ce_flags(struct index_state *istate, */ static void mark_new_skip_worktree(struct pattern_list *pl, struct index_state *istate, - int select_flag, int skip_wt_flag) + int select_flag, int skip_wt_flag, + int show_progress) { int i; @@ -1463,7 +1486,7 @@ static void mark_new_skip_worktree(struct pattern_list *pl, * 2. Widen worktree according to sparse-checkout file. * Matched entries will have skip_wt_flag cleared (i.e. "in") */ - clear_ce_flags(istate, select_flag, skip_wt_flag, pl); + clear_ce_flags(istate, select_flag, skip_wt_flag, pl, show_progress); } static int verify_absent(const struct cache_entry *, @@ -1525,7 +1548,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries */ if (!o->skip_sparse_checkout) - mark_new_skip_worktree(o->pl, o->src_index, 0, CE_NEW_SKIP_WORKTREE); + mark_new_skip_worktree(o->pl, o->src_index, 0, + CE_NEW_SKIP_WORKTREE, o->verbose_update); if (!dfc) dfc = xcalloc(1, cache_entry_size(0)); @@ -1590,7 +1614,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options * If the will have NEW_SKIP_WORKTREE, also set CE_SKIP_WORKTREE * so apply_sparse_checkout() won't attempt to remove it from worktree */ - mark_new_skip_worktree(o->pl, &o->result, CE_ADDED, CE_SKIP_WORKTREE | CE_NEW_SKIP_WORKTREE); + mark_new_skip_worktree(o->pl, &o->result, + CE_ADDED, CE_SKIP_WORKTREE | CE_NEW_SKIP_WORKTREE, + o->verbose_update); ret = 0; for (i = 0; i < o->result.cache_nr; i++) { -- cgit v1.2.3 From e9de487aa36aa75b5c9068c6bd07cfb8bf2ee955 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:45 +0000 Subject: sparse-checkout: sanitize for nested folders If a user provides folders A/ and A/B/ for inclusion in a cone-mode sparse-checkout file, the parsing logic will notice that A/ appears both as a "parent" type pattern and as a "recursive" type pattern. This is unexpected and hence will complain via a warning and revert to the old logic for checking sparse-checkout patterns. Prevent this from happening accidentally by sanitizing the folders for this type of inclusion in the 'git sparse-checkout' builtin. This happens in two ways: 1. Do not include any parent patterns that also appear as recursive patterns. 2. Do not include any recursive patterns deeper than other recursive patterns. In order to minimize duplicate code for scanning parents, create hashmap_contains_parent() method. It takes a strbuf buffer to avoid reallocating a buffer when calling in a tight loop. Helped-by: Eric Wong Helped-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 22 ++++++++++++++++++---- t/t1091-sparse-checkout-builtin.sh | 11 +++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 85cc801f03..55b337ad8e 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -81,9 +81,17 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) struct pattern_entry *pe; struct hashmap_iter iter; struct string_list sl = STRING_LIST_INIT_DUP; + struct strbuf parent_pattern = STRBUF_INIT; - hashmap_for_each_entry(&pl->parent_hashmap, &iter, pe, ent) - string_list_insert(&sl, pe->pattern); + hashmap_for_each_entry(&pl->parent_hashmap, &iter, pe, ent) { + if (hashmap_get_entry(&pl->recursive_hashmap, pe, ent, NULL)) + continue; + + if (!hashmap_contains_parent(&pl->recursive_hashmap, + pe->pattern, + &parent_pattern)) + string_list_insert(&sl, pe->pattern); + } string_list_sort(&sl); string_list_remove_duplicates(&sl, 0); @@ -99,8 +107,14 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) string_list_clear(&sl, 0); - hashmap_for_each_entry(&pl->recursive_hashmap, &iter, pe, ent) - string_list_insert(&sl, pe->pattern); + hashmap_for_each_entry(&pl->recursive_hashmap, &iter, pe, ent) { + if (!hashmap_contains_parent(&pl->recursive_hashmap, + pe->pattern, + &parent_pattern)) + string_list_insert(&sl, pe->pattern); + } + + strbuf_release(&parent_pattern); string_list_sort(&sl); string_list_remove_duplicates(&sl, 0); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index fbd46c3f61..b88d08da98 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -237,4 +237,15 @@ test_expect_success 'cone mode: init and set' ' test_cmp expect dir ' +test_expect_success 'cone mode: set with nested folders' ' + git -C repo sparse-checkout set deep deep/deeper1/deepest 2>err && + test_line_count = 0 err && + cat >expect <<-EOF && + /* + !/*/ + /deep/ + EOF + test_cmp repo/.git/info/sparse-checkout expect +' + test_done -- cgit v1.2.3 From e091228e17e88b1bc16cb50d5c3aff10dc5119d1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:46 +0000 Subject: sparse-checkout: update working directory in-process The sparse-checkout builtin used 'git read-tree -mu HEAD' to update the skip-worktree bits in the index and to update the working directory. This extra process is overly complex, and prone to failure. It also requires that we write our changes to the sparse-checkout file before trying to update the index. Remove this extra process call by creating a direct call to unpack_trees() in the same way 'git read-tree -mu HEAD' does. In addition, provide an in-memory list of patterns so we can avoid reading from the sparse-checkout file. This allows us to test a proposed change to the file before writing to it. An earlier version of this patch included a bug when the 'set' command failed due to the "Sparse checkout leaves no entry on working directory" error. It would not rollback the index.lock file, so the replay of the old sparse-checkout specification would fail. A test in t1091 now covers that scenario. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/read-tree.c | 2 +- builtin/sparse-checkout.c | 83 ++++++++++++++++++++++++++++++++------ t/t1091-sparse-checkout-builtin.sh | 28 +++++++++++++ unpack-trees.c | 5 ++- unpack-trees.h | 3 +- 5 files changed, 105 insertions(+), 16 deletions(-) diff --git a/builtin/read-tree.c b/builtin/read-tree.c index ca5e655d2f..af7424b94c 100644 --- a/builtin/read-tree.c +++ b/builtin/read-tree.c @@ -185,7 +185,7 @@ int cmd_read_tree(int argc, const char **argv, const char *cmd_prefix) if (opts.reset || opts.merge || opts.prefix) { if (read_cache_unmerged() && (opts.prefix || opts.merge)) - die("You need to resolve your current index first"); + die(_("You need to resolve your current index first")); stage = opts.merge = 1; } resolve_undo_clear(); diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 55b337ad8e..a5d32e4702 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -7,6 +7,11 @@ #include "run-command.h" #include "strbuf.h" #include "string-list.h" +#include "cache.h" +#include "cache-tree.h" +#include "lockfile.h" +#include "resolve-undo.h" +#include "unpack-trees.h" static char const * const builtin_sparse_checkout_usage[] = { N_("git sparse-checkout (init|list|set|disable) "), @@ -60,18 +65,54 @@ static int sparse_checkout_list(int argc, const char **argv) return 0; } -static int update_working_directory(void) +static int update_working_directory(struct pattern_list *pl) { - struct argv_array argv = ARGV_ARRAY_INIT; int result = 0; - argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL); + struct unpack_trees_options o; + struct lock_file lock_file = LOCK_INIT; + struct object_id oid; + struct tree *tree; + struct tree_desc t; + struct repository *r = the_repository; - if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { - error(_("failed to update index with new sparse-checkout patterns")); - result = 1; - } + if (repo_read_index_unmerged(r)) + die(_("you need to resolve your current index first")); + + if (get_oid("HEAD", &oid)) + return 0; + + tree = parse_tree_indirect(&oid); + parse_tree(tree); + init_tree_desc(&t, tree->buffer, tree->size); + + memset(&o, 0, sizeof(o)); + o.verbose_update = isatty(2); + o.merge = 1; + o.update = 1; + o.fn = oneway_merge; + o.head_idx = -1; + o.src_index = r->index; + o.dst_index = r->index; + o.skip_sparse_checkout = 0; + o.pl = pl; + o.keep_pattern_list = !!pl; + + resolve_undo_clear_index(r->index); + setup_work_tree(); + + cache_tree_free(&r->index->cache_tree); + + repo_hold_locked_index(r, &lock_file, LOCK_DIE_ON_ERROR); + + core_apply_sparse_checkout = 1; + result = unpack_trees(1, &t, &o); + + if (!result) { + prime_cache_tree(r, r->index, tree); + write_locked_index(r->index, &lock_file, COMMIT_LOCK); + } else + rollback_lock_file(&lock_file); - argv_array_clear(&argv); return result; } @@ -129,6 +170,15 @@ static int write_patterns_and_update(struct pattern_list *pl) { char *sparse_filename; FILE *fp; + int result; + + result = update_working_directory(pl); + + if (result) { + clear_pattern_list(pl); + update_working_directory(NULL); + return result; + } sparse_filename = get_sparse_checkout_filename(); fp = fopen(sparse_filename, "w"); @@ -139,9 +189,11 @@ static int write_patterns_and_update(struct pattern_list *pl) write_patterns_to_file(fp, pl); fclose(fp); + free(sparse_filename); + clear_pattern_list(pl); - return update_working_directory(); + return 0; } enum sparse_checkout_mode { @@ -199,7 +251,11 @@ static int sparse_checkout_init(int argc, const char **argv) builtin_sparse_checkout_init_options, builtin_sparse_checkout_init_usage, 0); - mode = init_opts.cone_mode ? MODE_CONE_PATTERNS : MODE_ALL_PATTERNS; + if (init_opts.cone_mode) { + mode = MODE_CONE_PATTERNS; + core_sparse_checkout_cone = 1; + } else + mode = MODE_ALL_PATTERNS; if (set_config(mode)) return 1; @@ -230,7 +286,8 @@ static int sparse_checkout_init(int argc, const char **argv) } reset_dir: - return update_working_directory(); + core_apply_sparse_checkout = 1; + return update_working_directory(NULL); } static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path) @@ -311,6 +368,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); hashmap_init(&pl.parent_hashmap, pl_hashmap_cmp, NULL, 0); + pl.use_cone_patterns = 1; if (set_opts.use_stdin) { while (!strbuf_getline(&line, stdin)) @@ -365,7 +423,8 @@ static int sparse_checkout_disable(int argc, const char **argv) fprintf(fp, "/*\n"); fclose(fp); - if (update_working_directory()) + core_apply_sparse_checkout = 1; + if (update_working_directory(NULL)) die(_("error while refreshing working directory")); unlink(sparse_filename); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index b88d08da98..53aeb5980f 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -248,4 +248,32 @@ test_expect_success 'cone mode: set with nested folders' ' test_cmp repo/.git/info/sparse-checkout expect ' +test_expect_success 'revert to old sparse-checkout on bad update' ' + echo update >repo/deep/deeper2/a && + cp repo/.git/info/sparse-checkout expect && + test_must_fail git -C repo sparse-checkout set deep/deeper1 2>err && + test_i18ngrep "Cannot update sparse checkout" err && + test_cmp repo/.git/info/sparse-checkout expect && + ls repo/deep >dir && + cat >expect <<-EOF && + a + deeper1 + deeper2 + EOF + test_cmp dir expect +' + +test_expect_success 'revert to old sparse-checkout on empty update' ' + git init empty-test && + ( + echo >file && + git add file && + git commit -m "test" && + test_must_fail git sparse-checkout set nothing 2>err && + test_i18ngrep "Sparse checkout leaves no entry on working directory" err && + test_i18ngrep ! ".git/index.lock" err && + git sparse-checkout set file + ) +' + test_done diff --git a/unpack-trees.c b/unpack-trees.c index 8bb684ad62..3789a22cf0 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1511,7 +1511,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options memset(&pl, 0, sizeof(pl)); if (!core_apply_sparse_checkout || !o->update) o->skip_sparse_checkout = 1; - if (!o->skip_sparse_checkout) { + if (!o->skip_sparse_checkout && !o->pl) { char *sparse = git_pathdup("info/sparse-checkout"); pl.use_cone_patterns = core_sparse_checkout_cone; if (add_patterns_from_file_to_list(sparse, "", 0, &pl, NULL) < 0) @@ -1684,7 +1684,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options done: trace_performance_leave("unpack_trees"); - clear_pattern_list(&pl); + if (!o->keep_pattern_list) + clear_pattern_list(&pl); return ret; return_failed: diff --git a/unpack-trees.h b/unpack-trees.h index f2eee0c7c5..ca94a421a5 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -59,7 +59,8 @@ struct unpack_trees_options { quiet, exiting_early, show_all_errors, - dry_run; + dry_run, + keep_pattern_list; const char *prefix; int cache_bottom; struct dir_struct *dir; -- cgit v1.2.3 From 99dfa6f9702ee81c44ef9382933e4e391ec5d6ee Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:47 +0000 Subject: sparse-checkout: use in-process update for disable subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'git sparse-checkout disable' subcommand returns a user to a full working directory. The old process for doing this required updating the sparse-checkout file with the "/*" pattern and then updating the working directory with core.sparseCheckout enabled. Finally, the sparse-checkout file could be removed and the config setting disabled. However, it is valuable to keep a user's sparse-checkout file intact so they can re-enable the sparse-checkout they previously used with 'git sparse-checkout init'. This is now possible with the in-process mechanism for updating the working directory. Reported-by: Szeder Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 6 ++++-- builtin/sparse-checkout.c | 25 ++++++++++++------------- t/t1091-sparse-checkout-builtin.sh | 3 ++- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index 8535f0cf40..b975285673 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -52,8 +52,10 @@ When the `--stdin` option is provided, the patterns are read from standard in as a newline-delimited list instead of from the arguments. 'disable':: - Remove the sparse-checkout file, set `core.sparseCheckout` to - `false`, and restore the working directory to include all files. + Disable the `core.sparseCheckout` config setting, and restore the + working directory to include all files. Leaves the sparse-checkout + file intact so a later 'git sparse-checkout init' command may + return the working directory to the same state. SPARSE CHECKOUT --------------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index a5d32e4702..a11ea65599 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -412,24 +412,23 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) static int sparse_checkout_disable(int argc, const char **argv) { - char *sparse_filename; - FILE *fp; + static const char *empty_base = ""; + struct pattern_list pl; + struct strbuf match_all = STRBUF_INIT; - if (set_config(MODE_ALL_PATTERNS)) - die(_("failed to change config")); + memset(&pl, 0, sizeof(pl)); + hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); + hashmap_init(&pl.parent_hashmap, pl_hashmap_cmp, NULL, 0); + pl.use_cone_patterns = 0; + core_apply_sparse_checkout = 1; - sparse_filename = get_sparse_checkout_filename(); - fp = xfopen(sparse_filename, "w"); - fprintf(fp, "/*\n"); - fclose(fp); + strbuf_addstr(&match_all, "/*"); + add_pattern(strbuf_detach(&match_all, NULL), empty_base, 0, &pl, 0); - core_apply_sparse_checkout = 1; - if (update_working_directory(NULL)) + if (update_working_directory(&pl)) die(_("error while refreshing working directory")); - unlink(sparse_filename); - free(sparse_filename); - + clear_pattern_list(&pl); return set_config(MODE_NO_PATTERNS); } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 53aeb5980f..b8f18e2a09 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -172,8 +172,9 @@ test_expect_success 'cone mode: warn on bad pattern' ' ' test_expect_success 'sparse-checkout disable' ' + test_when_finished rm -rf repo/.git/info/sparse-checkout && git -C repo sparse-checkout disable && - test_path_is_missing repo/.git/info/sparse-checkout && + test_path_is_file repo/.git/info/sparse-checkout && git -C repo config --list >config && test_must_fail git config core.sparseCheckout && ls repo >dir && -- cgit v1.2.3 From fb10ca5b54362e6f860e1e9049e03924fcf5f05b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:48 +0000 Subject: sparse-checkout: write using lockfile If two 'git sparse-checkout set' subcommands are launched at the same time, the behavior can be unexpected as they compete to write the sparse-checkout file and update the working directory. Take a lockfile around the writes to the sparse-checkout file. In addition, acquire this lock around the working directory update to avoid two commands updating the working directory in different ways. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 15 +++++++++++---- t/t1091-sparse-checkout-builtin.sh | 7 +++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index a11ea65599..9a620ff014 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -170,25 +170,32 @@ static int write_patterns_and_update(struct pattern_list *pl) { char *sparse_filename; FILE *fp; + int fd; + struct lock_file lk = LOCK_INIT; int result; - result = update_working_directory(pl); + sparse_filename = get_sparse_checkout_filename(); + fd = hold_lock_file_for_update(&lk, sparse_filename, + LOCK_DIE_ON_ERROR); + result = update_working_directory(pl); if (result) { + rollback_lock_file(&lk); + free(sparse_filename); clear_pattern_list(pl); update_working_directory(NULL); return result; } - sparse_filename = get_sparse_checkout_filename(); - fp = fopen(sparse_filename, "w"); + fp = xfdopen(fd, "w"); if (core_sparse_checkout_cone) write_cone_to_file(fp, pl); else write_patterns_to_file(fp, pl); - fclose(fp); + fflush(fp); + commit_lock_file(&lk); free(sparse_filename); clear_pattern_list(pl); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index b8f18e2a09..f074b7f3be 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -277,4 +277,11 @@ test_expect_success 'revert to old sparse-checkout on empty update' ' ) ' +test_expect_success 'fail when lock is taken' ' + test_when_finished rm -rf repo/.git/info/sparse-checkout.lock && + touch repo/.git/info/sparse-checkout.lock && + test_must_fail git -C repo sparse-checkout set deep 2>err && + test_i18ngrep "File exists" err +' + test_done -- cgit v1.2.3 From f75a69f88099689b510c7a7261d637bc7004a749 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:49 +0000 Subject: sparse-checkout: cone mode should not interact with .gitignore During the development of the sparse-checkout "cone mode" feature, an incorrect placement of the initializer for "use_cone_patterns = 1" caused warnings to show up when a .gitignore file was present with non-cone-mode patterns. This was fixed in the original commit introducing the cone mode, but now we should add a test to avoid hitting this problem again in the future. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1091-sparse-checkout-builtin.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index f074b7f3be..e61ddb4ad5 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -284,4 +284,11 @@ test_expect_success 'fail when lock is taken' ' test_i18ngrep "File exists" err ' +test_expect_success '.gitignore should not warn about cone mode' ' + git -C repo config --worktree core.sparseCheckoutCone true && + echo "**/bin/*" >repo/.gitignore && + git -C repo reset --hard 2>err && + test_i18ngrep ! "disabling cone patterns" err +' + test_done -- cgit v1.2.3 From 416adc8711d16c5231693f19ca483dd3d59bef5d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:50 +0000 Subject: sparse-checkout: update working directory in-process for 'init' The 'git sparse-checkout init' subcommand previously wrote directly to the sparse-checkout file and then updated the working directory. This may fail if there are modified files not included in the initial pattern set. However, that left a populated sparse-checkout file. Use the in-process working directory update to guarantee that the init subcommand only changes the sparse-checkout file if the working directory update succeeds. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 9a620ff014..5dbfb2a7e0 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -13,6 +13,8 @@ #include "resolve-undo.h" #include "unpack-trees.h" +static const char *empty_base = ""; + static char const * const builtin_sparse_checkout_usage[] = { N_("git sparse-checkout (init|list|set|disable) "), NULL @@ -243,10 +245,10 @@ static int sparse_checkout_init(int argc, const char **argv) { struct pattern_list pl; char *sparse_filename; - FILE *fp; int res; struct object_id oid; int mode; + struct strbuf pattern = STRBUF_INIT; static struct option builtin_sparse_checkout_init_options[] = { OPT_BOOL(0, "cone", &init_opts.cone_mode, @@ -275,26 +277,30 @@ static int sparse_checkout_init(int argc, const char **argv) /* If we already have a sparse-checkout file, use it. */ if (res >= 0) { free(sparse_filename); - goto reset_dir; + core_apply_sparse_checkout = 1; + return update_working_directory(NULL); } - /* initial mode: all blobs at root */ - fp = xfopen(sparse_filename, "w"); - if (!fp) - die(_("failed to open '%s'"), sparse_filename); + if (get_oid("HEAD", &oid)) { + FILE *fp; - free(sparse_filename); - fprintf(fp, "/*\n!/*/\n"); - fclose(fp); + /* assume we are in a fresh repo, but update the sparse-checkout file */ + fp = xfopen(sparse_filename, "w"); + if (!fp) + die(_("failed to open '%s'"), sparse_filename); - if (get_oid("HEAD", &oid)) { - /* assume we are in a fresh repo */ + free(sparse_filename); + fprintf(fp, "/*\n!/*/\n"); + fclose(fp); return 0; } -reset_dir: - core_apply_sparse_checkout = 1; - return update_working_directory(NULL); + strbuf_addstr(&pattern, "/*"); + add_pattern(strbuf_detach(&pattern, NULL), empty_base, 0, &pl, 0); + strbuf_addstr(&pattern, "!/*/"); + add_pattern(strbuf_detach(&pattern, NULL), empty_base, 0, &pl, 0); + + return write_patterns_and_update(&pl); } static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path) @@ -351,7 +357,6 @@ static struct sparse_checkout_set_opts { static int sparse_checkout_set(int argc, const char **argv, const char *prefix) { - static const char *empty_base = ""; int i; struct pattern_list pl; int result; @@ -419,7 +424,6 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) static int sparse_checkout_disable(int argc, const char **argv) { - static const char *empty_base = ""; struct pattern_list pl; struct strbuf match_all = STRBUF_INIT; -- cgit v1.2.3 From cff4e9138d8df45e3b6199171092ee781cdadaeb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:51 +0000 Subject: sparse-checkout: check for dirty status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The index-merge performed by 'git sparse-checkout' will erase any staged changes, which can lead to data loss. Prevent these attempts by requiring a clean 'git status' output. Helped-by: Szeder Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 13 +++++++++++++ t/t1091-sparse-checkout-builtin.sh | 15 ++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 5dbfb2a7e0..a542d617a5 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -12,6 +12,7 @@ #include "lockfile.h" #include "resolve-undo.h" #include "unpack-trees.h" +#include "wt-status.h" static const char *empty_base = ""; @@ -256,6 +257,10 @@ static int sparse_checkout_init(int argc, const char **argv) OPT_END(), }; + repo_read_index(the_repository); + require_clean_work_tree(the_repository, + N_("initialize sparse-checkout"), NULL, 1, 0); + argc = parse_options(argc, argv, NULL, builtin_sparse_checkout_init_options, builtin_sparse_checkout_init_usage, 0); @@ -368,6 +373,10 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) OPT_END(), }; + repo_read_index(the_repository); + require_clean_work_tree(the_repository, + N_("set sparse-checkout patterns"), NULL, 1, 0); + memset(&pl, 0, sizeof(pl)); argc = parse_options(argc, argv, prefix, @@ -427,6 +436,10 @@ static int sparse_checkout_disable(int argc, const char **argv) struct pattern_list pl; struct strbuf match_all = STRBUF_INIT; + repo_read_index(the_repository); + require_clean_work_tree(the_repository, + N_("disable sparse-checkout"), NULL, 1, 0); + memset(&pl, 0, sizeof(pl)); hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); hashmap_init(&pl.parent_hashmap, pl_hashmap_cmp, NULL, 0); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index e61ddb4ad5..d5e2892526 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -250,10 +250,11 @@ test_expect_success 'cone mode: set with nested folders' ' ' test_expect_success 'revert to old sparse-checkout on bad update' ' + test_when_finished git -C repo reset --hard && echo update >repo/deep/deeper2/a && cp repo/.git/info/sparse-checkout expect && test_must_fail git -C repo sparse-checkout set deep/deeper1 2>err && - test_i18ngrep "Cannot update sparse checkout" err && + test_i18ngrep "cannot set sparse-checkout patterns" err && test_cmp repo/.git/info/sparse-checkout expect && ls repo/deep >dir && cat >expect <<-EOF && @@ -291,4 +292,16 @@ test_expect_success '.gitignore should not warn about cone mode' ' test_i18ngrep ! "disabling cone patterns" err ' +test_expect_success 'sparse-checkout (init|set|disable) fails with dirty status' ' + git clone repo dirty && + echo dirty >dirty/folder1/a && + test_must_fail git -C dirty sparse-checkout init && + test_must_fail git -C dirty sparse-checkout set /folder2/* /deep/deeper1/* && + test_must_fail git -C dirty sparse-checkout disable && + git -C dirty reset --hard && + git -C dirty sparse-checkout init && + git -C dirty sparse-checkout set /folder2/* /deep/deeper1/* && + git -C dirty sparse-checkout disable +' + test_done -- cgit v1.2.3 From 190a65f9db8db9d87d54351429f7879fcb4ad608 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Dec 2019 18:09:53 +0000 Subject: sparse-checkout: respect core.ignoreCase in cone mode When a user uses the sparse-checkout feature in cone mode, they add patterns using "git sparse-checkout set ..." or by using "--stdin" to provide the directories line-by-line over stdin. This behaviour naturally looks a lot like the way a user would type "git add ..." If core.ignoreCase is enabled, then "git add" will match the input using a case-insensitive match. Do the same for the sparse-checkout feature. Perform case-insensitive checks while updating the skip-worktree bits during unpack_trees(). This is done by changing the hash algorithm and hashmap comparison methods to optionally use case- insensitive methods. When this is enabled, there is a small performance cost in the hashing algorithm. To tease out the worst possible case, the following was run on a repo with a deep directory structure: git ls-tree -d -r --name-only HEAD | git sparse-checkout set --stdin The 'set' command was timed with core.ignoreCase disabled or enabled. For the repo with a deep history, the numbers were core.ignoreCase=false: 62s core.ignoreCase=true: 74s (+19.3%) For reproducibility, the equivalent test on the Linux kernel repository had these numbers: core.ignoreCase=false: 3.1s core.ignoreCase=true: 3.6s (+16%) Now, this is not an entirely fair comparison, as most users will define their sparse cone using more shallow directories, and the performance improvement from eb42feca97 ("unpack-trees: hash less in cone mode" 2019-11-21) can remove most of the hash cost. For a more realistic test, drop the "-r" from the ls-tree command to store only the first-level directories. In that case, the Linux kernel repository takes 0.2-0.25s in each case, and the deep repository takes one second, plus or minus 0.05s, in each case. Thus, we _can_ demonstrate a cost to this change, but it is unlikely to matter to any reasonable sparse-checkout cone. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 5 +++++ builtin/sparse-checkout.c | 10 ++++++++-- dir.c | 15 ++++++++++++--- t/t1091-sparse-checkout-builtin.sh | 17 +++++++++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index b975285673..9c3c66cc37 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -150,6 +150,11 @@ expecting patterns of these types. Git will warn if the patterns do not match. If the patterns do match the expected format, then Git will use faster hash- based algorithms to compute inclusion in the sparse-checkout. +If `core.ignoreCase=true`, then the pattern-matching algorithm will use a +case-insensitive check. This corrects for case mismatched filenames in the +'git sparse-checkout set' command to reflect the expected cone in the working +directory. + SEE ALSO -------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index a542d617a5..5d62f7a66d 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -313,7 +313,10 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat struct pattern_entry *e = xmalloc(sizeof(*e)); e->patternlen = path->len; e->pattern = strbuf_detach(path, NULL); - hashmap_entry_init(&e->ent, memhash(e->pattern, e->patternlen)); + hashmap_entry_init(&e->ent, + ignore_case ? + strihash(e->pattern) : + strhash(e->pattern)); hashmap_add(&pl->recursive_hashmap, &e->ent); @@ -329,7 +332,10 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat e = xmalloc(sizeof(struct pattern_entry)); e->patternlen = newlen; e->pattern = xstrndup(oldpattern, newlen); - hashmap_entry_init(&e->ent, memhash(e->pattern, e->patternlen)); + hashmap_entry_init(&e->ent, + ignore_case ? + strihash(e->pattern) : + strhash(e->pattern)); if (!hashmap_get_entry(&pl->parent_hashmap, e, ent, NULL)) hashmap_add(&pl->parent_hashmap, &e->ent); diff --git a/dir.c b/dir.c index 2ef92a50a0..22d08e61c2 100644 --- a/dir.c +++ b/dir.c @@ -625,6 +625,8 @@ int pl_hashmap_cmp(const void *unused_cmp_data, ? ee1->patternlen : ee2->patternlen; + if (ignore_case) + return strncasecmp(ee1->pattern, ee2->pattern, min_len); return strncmp(ee1->pattern, ee2->pattern, min_len); } @@ -665,7 +667,9 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern translated->pattern = truncated; translated->patternlen = given->patternlen - 2; hashmap_entry_init(&translated->ent, - memhash(translated->pattern, translated->patternlen)); + ignore_case ? + strihash(translated->pattern) : + strhash(translated->pattern)); if (!hashmap_get_entry(&pl->recursive_hashmap, translated, ent, NULL)) { @@ -694,7 +698,9 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern translated->pattern = xstrdup(given->pattern); translated->patternlen = given->patternlen; hashmap_entry_init(&translated->ent, - memhash(translated->pattern, translated->patternlen)); + ignore_case ? + strihash(translated->pattern) : + strhash(translated->pattern)); hashmap_add(&pl->recursive_hashmap, &translated->ent); @@ -724,7 +730,10 @@ static int hashmap_contains_path(struct hashmap *map, /* Check straight mapping */ p.pattern = pattern->buf; p.patternlen = pattern->len; - hashmap_entry_init(&p.ent, memhash(p.pattern, p.patternlen)); + hashmap_entry_init(&p.ent, + ignore_case ? + strihash(p.pattern) : + strhash(p.pattern)); return !!hashmap_get_entry(map, &p, ent, NULL); } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index d5e2892526..cee98a1c8a 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -304,4 +304,21 @@ test_expect_success 'sparse-checkout (init|set|disable) fails with dirty status' git -C dirty sparse-checkout disable ' +test_expect_success 'cone mode: set with core.ignoreCase=true' ' + git -C repo sparse-checkout init --cone && + git -C repo -c core.ignoreCase=true sparse-checkout set folder1 && + cat >expect <<-EOF && + /* + !/*/ + /folder1/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + EOF + test_cmp expect dir +' + test_done -- cgit v1.2.3 From 761e3d26bbe44c51f83c4f1ad198461f57029ebd Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 20 Dec 2019 19:41:14 +0000 Subject: sparse-checkout: improve OS ls compatibility On FreeBSD, when executed by root ls enables the '-A' option: -A Include directory entries whose names begin with a dot (`.') except for . and ... Automatically set for the super-user unless -I is specified. As a result the .git directory appeared in the output when run as root. Simulate no-dotfile ls behaviour using a shell glob. Helped-by: Eric Wong Helped-by: Junio C Hamano Helped-by: Eric Sunshine Signed-off-by: Ed Maste Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1091-sparse-checkout-builtin.sh | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index cee98a1c8a..6f7e2d0c9e 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -4,6 +4,14 @@ test_description='sparse checkout builtin tests' . ./test-lib.sh +list_files() { + # Do not replace this with 'ls "$1"', as "ls" with BSD-lineage + # enables "-A" by default for root and ends up including ".git" and + # such in its output. (Note, though, that running the test suite as + # root is generally not recommended.) + (cd "$1" && printf '%s\n' *) +} + test_expect_success 'setup' ' git init repo && ( @@ -50,7 +58,7 @@ test_expect_success 'git sparse-checkout init' ' EOF test_cmp expect repo/.git/info/sparse-checkout && test_cmp_config -C repo true core.sparsecheckout && - ls repo >dir && + list_files repo >dir && echo a >expect && test_cmp expect dir ' @@ -73,7 +81,7 @@ test_expect_success 'init with existing sparse-checkout' ' *folder* EOF test_cmp expect repo/.git/info/sparse-checkout && - ls repo >dir && + list_files repo >dir && cat >expect <<-EOF && a folder1 @@ -90,7 +98,7 @@ test_expect_success 'clone --sparse' ' !/*/ EOF test_cmp expect actual && - ls clone >dir && + list_files clone >dir && echo a >expect && test_cmp expect dir ' @@ -119,7 +127,7 @@ test_expect_success 'set sparse-checkout using builtin' ' git -C repo sparse-checkout list >actual && test_cmp expect actual && test_cmp expect repo/.git/info/sparse-checkout && - ls repo >dir && + list_files repo >dir && cat >expect <<-EOF && a folder1 @@ -139,7 +147,7 @@ test_expect_success 'set sparse-checkout using --stdin' ' git -C repo sparse-checkout list >actual && test_cmp expect actual && test_cmp expect repo/.git/info/sparse-checkout && - ls repo >dir && + list_files repo >dir && cat >expect <<-EOF && a folder1 @@ -154,7 +162,7 @@ test_expect_success 'cone mode: match patterns' ' git -C repo read-tree -mu HEAD 2>err && test_i18ngrep ! "disabling cone patterns" err && git -C repo reset --hard && - ls repo >dir && + list_files repo >dir && cat >expect <<-EOF && a folder1 @@ -177,7 +185,7 @@ test_expect_success 'sparse-checkout disable' ' test_path_is_file repo/.git/info/sparse-checkout && git -C repo config --list >config && test_must_fail git config core.sparseCheckout && - ls repo >dir && + list_files repo >dir && cat >expect <<-EOF && a deep @@ -191,24 +199,24 @@ test_expect_success 'cone mode: init and set' ' git -C repo sparse-checkout init --cone && git -C repo config --list >config && test_i18ngrep "core.sparsecheckoutcone=true" config && - ls repo >dir && + list_files repo >dir && echo a >expect && test_cmp expect dir && git -C repo sparse-checkout set deep/deeper1/deepest/ 2>err && test_must_be_empty err && - ls repo >dir && + list_files repo >dir && cat >expect <<-EOF && a deep EOF test_cmp expect dir && - ls repo/deep >dir && + list_files repo/deep >dir && cat >expect <<-EOF && a deeper1 EOF test_cmp expect dir && - ls repo/deep/deeper1 >dir && + list_files repo/deep/deeper1 >dir && cat >expect <<-EOF && a deepest @@ -234,7 +242,7 @@ test_expect_success 'cone mode: init and set' ' folder1 folder2 EOF - ls repo >dir && + list_files repo >dir && test_cmp expect dir ' @@ -256,7 +264,7 @@ test_expect_success 'revert to old sparse-checkout on bad update' ' test_must_fail git -C repo sparse-checkout set deep/deeper1 2>err && test_i18ngrep "cannot set sparse-checkout patterns" err && test_cmp repo/.git/info/sparse-checkout expect && - ls repo/deep >dir && + list_files repo/deep >dir && cat >expect <<-EOF && a deeper1 @@ -313,7 +321,7 @@ test_expect_success 'cone mode: set with core.ignoreCase=true' ' /folder1/ EOF test_cmp expect repo/.git/info/sparse-checkout && - ls repo >dir && + list_files repo >dir && cat >expect <<-EOF && a folder1 -- cgit v1.2.3