Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Schindelin <johannes.schindelin@gmx.de>2019-09-16 14:26:40 +0300
committerJohannes Schindelin <johannes.schindelin@gmx.de>2019-12-05 17:37:07 +0300
commit7f3551dd686e2237490c17946335a675c4f59881 (patch)
tree00b0bb3ed0584501a3638e7eaaf28d29bc792cf6
parent4778452597027f6033db9f3ba6709913eadc3c8c (diff)
parent91bd46588e6959e6903e275f78b10bd07830d547 (diff)
Merge branch 'disallow-dotgit-via-ntfs-alternate-data-streams'
This patch series plugs an attack vector we had overlooked in our December 2014 work on `core.protectNTFS`. Essentially, the path `.git::$INDEX_ALLOCATION/config` is interpreted as `.git/config` when NTFS Alternate Data Streams are available (which they are on Windows, and at least on network shares that are SMB-mounted on macOS). Needless to say: we don't want that. In fact, we want to stay on the very safe side and not even special-case the `$INDEX_ALLOCATION` stream type: let's just prevent Git from touching _any_ explicitly specified Alternate Data Stream of `.git`. In essence, we'll prevent Git from tracking, or writing to, any path with a segment of the form `.git:<anything>`. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
-rw-r--r--fsck.c11
-rw-r--r--path.c47
-rw-r--r--read-cache.c8
-rw-r--r--t/helper/test-path-utils.c96
-rwxr-xr-xt/t0060-path-utils.sh7
-rwxr-xr-xt/t1014-read-tree-confusing.sh1
6 files changed, 162 insertions, 8 deletions
diff --git a/fsck.c b/fsck.c
index b1579c7e28..d80a96f4be 100644
--- a/fsck.c
+++ b/fsck.c
@@ -551,7 +551,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
while (desc.size) {
unsigned mode;
- const char *name;
+ const char *name, *backslash;
const struct object_id *oid;
oid = tree_entry_extract(&desc, &name, &mode);
@@ -565,6 +565,15 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
is_hfs_dotgit(name) ||
is_ntfs_dotgit(name));
has_zero_pad |= *(char *)desc.buffer == '0';
+
+ if ((backslash = strchr(name, '\\'))) {
+ while (backslash) {
+ backslash++;
+ has_dotgit |= is_ntfs_dotgit(backslash);
+ backslash = strchr(backslash, '\\');
+ }
+ }
+
if (update_tree_entry_gently(&desc)) {
retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
break;
diff --git a/path.c b/path.c
index 9ac0531a29..2037e2d8c1 100644
--- a/path.c
+++ b/path.c
@@ -1302,22 +1302,57 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip)
return 1;
}
+/*
+ * On NTFS, we need to be careful to disallow certain synonyms of the `.git/`
+ * directory:
+ *
+ * - For historical reasons, file names that end in spaces or periods are
+ * automatically trimmed. Therefore, `.git . . ./` is a valid way to refer
+ * to `.git/`.
+ *
+ * - For other historical reasons, file names that do not conform to the 8.3
+ * format (up to eight characters for the basename, three for the file
+ * extension, certain characters not allowed such as `+`, etc) are associated
+ * with a so-called "short name", at least on the `C:` drive by default.
+ * Which means that `git~1/` is a valid way to refer to `.git/`.
+ *
+ * Note: Technically, `.git/` could receive the short name `git~2` if the
+ * short name `git~1` were already used. In Git, however, we guarantee that
+ * `.git` is the first item in a directory, therefore it will be associated
+ * with the short name `git~1` (unless short names are disabled).
+ *
+ * - For yet other historical reasons, NTFS supports so-called "Alternate Data
+ * Streams", i.e. metadata associated with a given file, referred to via
+ * `<filename>:<stream-name>:<stream-type>`. There exists a default stream
+ * type for directories, allowing `.git/` to be accessed via
+ * `.git::$INDEX_ALLOCATION/`.
+ *
+ * When this function returns 1, it indicates that the specified file/directory
+ * name refers to a `.git` file or directory, or to any of these synonyms, and
+ * Git should therefore not track it.
+ *
+ * For performance reasons, _all_ Alternate Data Streams of `.git/` are
+ * forbidden, not just `::$INDEX_ALLOCATION`.
+ *
+ * This function is intended to be used by `git fsck` even on platforms where
+ * the backslash is a regular filename character, therefore it needs to handle
+ * backlash characters in the provided `name` specially: they are interpreted
+ * as directory separators.
+ */
int is_ntfs_dotgit(const char *name)
{
size_t len;
for (len = 0; ; len++)
- if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) {
+ if (!name[len] || name[len] == '\\' || is_dir_sep(name[len]) ||
+ name[len] == ':') {
if (only_spaces_and_periods(name, len, 4) &&
!strncasecmp(name, ".git", 4))
return 1;
if (only_spaces_and_periods(name, len, 5) &&
!strncasecmp(name, "git~1", 5))
return 1;
- if (name[len] != '\\')
- return 0;
- name += len + 1;
- len = -1;
+ return 0;
}
}
@@ -1334,7 +1369,7 @@ static int is_ntfs_dot_generic(const char *name,
only_spaces_and_periods:
for (;;) {
char c = name[i++];
- if (!c)
+ if (!c || c == ':')
return 1;
if (c != ' ' && c != '.')
return 0;
diff --git a/read-cache.c b/read-cache.c
index 5b57b369e8..bde1e70c51 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -874,7 +874,15 @@ inside:
if ((c == '.' && !verify_dotfile(path, mode)) ||
is_dir_sep(c) || c == '\0')
return 0;
+ } else if (c == '\\' && protect_ntfs) {
+ if (is_ntfs_dotgit(path))
+ return 0;
+ if (S_ISLNK(mode)) {
+ if (is_ntfs_dotgitmodules(path))
+ return 0;
+ }
}
+
c = *path++;
}
}
diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c
index 94846550f7..16d8e689c8 100644
--- a/t/helper/test-path-utils.c
+++ b/t/helper/test-path-utils.c
@@ -176,6 +176,99 @@ static int is_dotgitmodules(const char *path)
return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path);
}
+/*
+ * A very simple, reproducible pseudo-random generator. Copied from
+ * `test-genrandom.c`.
+ */
+static uint64_t my_random_value = 1234;
+
+static uint64_t my_random(void)
+{
+ my_random_value = my_random_value * 1103515245 + 12345;
+ return my_random_value;
+}
+
+/*
+ * A fast approximation of the square root, without requiring math.h.
+ *
+ * It uses Newton's method to approximate the solution of 0 = x^2 - value.
+ */
+static double my_sqrt(double value)
+{
+ const double epsilon = 1e-6;
+ double x = value;
+
+ if (value == 0)
+ return 0;
+
+ for (;;) {
+ double delta = (value / x - x) / 2;
+ if (delta < epsilon && delta > -epsilon)
+ return x + delta;
+ x += delta;
+ }
+}
+
+static int protect_ntfs_hfs_benchmark(int argc, const char **argv)
+{
+ size_t i, j, nr, min_len = 3, max_len = 20;
+ char **names;
+ int repetitions = 15, file_mode = 0100644;
+ uint64_t begin, end;
+ double m[3][2], v[3][2];
+ uint64_t cumul;
+ double cumul2;
+
+ if (argc > 1 && !strcmp(argv[1], "--with-symlink-mode")) {
+ file_mode = 0120000;
+ argc--;
+ argv++;
+ }
+
+ nr = argc > 1 ? strtoul(argv[1], NULL, 0) : 1000000;
+ ALLOC_ARRAY(names, nr);
+
+ if (argc > 2) {
+ min_len = strtoul(argv[2], NULL, 0);
+ if (argc > 3)
+ max_len = strtoul(argv[3], NULL, 0);
+ if (min_len > max_len)
+ die("min_len > max_len");
+ }
+
+ for (i = 0; i < nr; i++) {
+ size_t len = min_len + (my_random() % (max_len + 1 - min_len));
+
+ names[i] = xmallocz(len);
+ while (len > 0)
+ names[i][--len] = (char)(' ' + (my_random() % ('\x7f' - ' ')));
+ }
+
+ for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++)
+ for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) {
+ cumul = 0;
+ cumul2 = 0;
+ for (i = 0; i < repetitions; i++) {
+ begin = getnanotime();
+ for (j = 0; j < nr; j++)
+ verify_path(names[j], file_mode);
+ end = getnanotime();
+ printf("protect_ntfs = %d, protect_hfs = %d: %lfms\n", protect_ntfs, protect_hfs, (end-begin) / (double)1e6);
+ cumul += end - begin;
+ cumul2 += (end - begin) * (end - begin);
+ }
+ m[protect_ntfs][protect_hfs] = cumul / (double)repetitions;
+ v[protect_ntfs][protect_hfs] = my_sqrt(cumul2 / (double)repetitions - m[protect_ntfs][protect_hfs] * m[protect_ntfs][protect_hfs]);
+ printf("mean: %lfms, stddev: %lfms\n", m[protect_ntfs][protect_hfs] / (double)1e6, v[protect_ntfs][protect_hfs] / (double)1e6);
+ }
+
+ for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++)
+ for (protect_hfs = 0; protect_hfs < 2; protect_hfs++)
+ printf("ntfs=%d/hfs=%d: %lf%% slower\n", protect_ntfs, protect_hfs, (m[protect_ntfs][protect_hfs] - m[0][0]) * 100 / m[0][0]);
+
+ return 0;
+}
+
int cmd_main(int argc, const char **argv)
{
if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) {
@@ -290,6 +383,9 @@ int cmd_main(int argc, const char **argv)
return !!res;
}
+ if (argc > 1 && !strcmp(argv[1], "protect_ntfs_hfs"))
+ return !!protect_ntfs_hfs_benchmark(argc - 1, argv + 1);
+
fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
argv[1] ? argv[1] : "(there was none)");
return 1;
diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index 3f3357ed9f..2b8589e921 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -408,6 +408,9 @@ test_expect_success 'match .gitmodules' '
~1000000 \
~9999999 \
\
+ .gitmodules:\$DATA \
+ "gitmod~4 . :\$DATA" \
+ \
--not \
".gitmodules x" \
".gitmodules .x" \
@@ -432,7 +435,9 @@ test_expect_success 'match .gitmodules' '
\
GI7EB~1 \
GI7EB~01 \
- GI7EB~1X
+ GI7EB~1X \
+ \
+ .gitmodules,:\$DATA
'
test_done
diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh
index 2f5a25d503..da3376b3bb 100755
--- a/t/t1014-read-tree-confusing.sh
+++ b/t/t1014-read-tree-confusing.sh
@@ -49,6 +49,7 @@ git~1
.git.SPACE .git.{space}
.\\\\.GIT\\\\foobar backslashes
.git\\\\foobar backslashes2
+.git...:alternate-stream
EOF
test_expect_success 'utf-8 paths allowed with core.protectHFS off' '