From 0f871cf56e83d13116b021295688e57f26bbf93d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 28 Feb 2012 14:20:53 -0800 Subject: grep: use static trans-case table In order to prepare the kwset machinery for a case-insensitive search, we used to use a static table of 256 elements and filled it every time before calling kwsalloc(). Because the kwset machinery will never modify this table, just allocate a single instance globally and fill it at the compile time. Signed-off-by: Junio C Hamano --- cache.h | 3 +++ ctype.c | 36 ++++++++++++++++++++++++++++++++++++ grep.c | 11 +++-------- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/cache.h b/cache.h index 79c612fc2f..79dc30574e 100644 --- a/cache.h +++ b/cache.h @@ -1258,4 +1258,7 @@ extern struct startup_info *startup_info; /* builtin/merge.c */ int checkout_fast_forward(const unsigned char *from, const unsigned char *to); +/* in ctype.c, for kwset users */ +extern const char tolower_trans_tbl[256]; + #endif /* CACHE_H */ diff --git a/ctype.c b/ctype.c index b5d856fd26..7c14d85c15 100644 --- a/ctype.c +++ b/ctype.c @@ -25,3 +25,39 @@ unsigned char sane_ctype[256] = { A, A, A, A, A, A, A, A, A, A, A, R, R, 0, P, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; + +/* For case-insensitive kwset */ +const char tolower_trans_tbl[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + ' ', '!', '"', '#', '$', '%', '&', 0x27, + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '[', 0x5c, ']', '^', '_', + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '{', '|', '}', '~', 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +}; diff --git a/grep.c b/grep.c index b29d09c7f6..1030f38f53 100644 --- a/grep.c +++ b/grep.c @@ -168,15 +168,10 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->fixed = 0; if (p->fixed) { - if (opt->regflags & REG_ICASE || p->ignore_case) { - static char trans[256]; - int i; - for (i = 0; i < 256; i++) - trans[i] = tolower(i); - p->kws = kwsalloc(trans); - } else { + if (opt->regflags & REG_ICASE || p->ignore_case) + p->kws = kwsalloc(tolower_trans_tbl); + else p->kws = kwsalloc(NULL); - } kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; -- cgit v1.2.3 From accccde483c3cfd55ef55037e8802ca0baaee5a1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 21 Feb 2012 01:02:46 -0800 Subject: pickaxe: allow -i to search in patch case-insensitively "git log -S" is a useful way to find the last commit in the codebase that touched the . As it was designed to be used by a porcelain script to dig the history starting from a block of text that appear in the starting commit, it never had to look for anything but an exact match. When used by an end user who wants to look for the last commit that removed a string (e.g. name of a variable) that he vaguely remembers, however, it is useful to support case insensitive match. When given the "--regexp-ignore-case" (or "-i") option, which originally was designed to affect case sensitivity of the search done in the commit log part, e.g. "log --grep", the matches made with -S/-G pickaxe search is done case insensitively now. Signed-off-by: Junio C Hamano --- diff.h | 1 + diffcore-pickaxe.c | 9 +++- revision.c | 1 + t/t4209-log-pickaxe.sh | 119 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100755 t/t4209-log-pickaxe.sh diff --git a/diff.h b/diff.h index 0c51724493..436b574a23 100644 --- a/diff.h +++ b/diff.h @@ -80,6 +80,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data) #define DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG (1 << 27) #define DIFF_OPT_DIRSTAT_BY_LINE (1 << 28) #define DIFF_OPT_FUNCCONTEXT (1 << 29) +#define DIFF_OPT_PICKAXE_IGNORE_CASE (1 << 30) #define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag) #define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 380a837b5b..ed23eb4bdd 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -138,8 +138,12 @@ static void diffcore_pickaxe_grep(struct diff_options *o) { int err; regex_t regex; + int cflags = REG_EXTENDED | REG_NEWLINE; - err = regcomp(®ex, o->pickaxe, REG_EXTENDED | REG_NEWLINE); + if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)) + cflags |= REG_ICASE; + + err = regcomp(®ex, o->pickaxe, cflags); if (err) { char errbuf[1024]; regerror(err, ®ex, errbuf, 1024); @@ -237,7 +241,8 @@ static void diffcore_pickaxe_count(struct diff_options *o) } regexp = ®ex; } else { - kws = kwsalloc(NULL); + kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) + ? tolower_trans_tbl : NULL); kwsincr(kws, needle, len); kwsprep(kws); } diff --git a/revision.c b/revision.c index 8764dde381..971b7dc98d 100644 --- a/revision.c +++ b/revision.c @@ -1559,6 +1559,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->grep_filter.regflags |= REG_EXTENDED; } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) { revs->grep_filter.regflags |= REG_ICASE; + DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE); } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) { revs->grep_filter.fixed = 1; } else if (!strcmp(arg, "--all-match")) { diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh new file mode 100755 index 0000000000..eed727341d --- /dev/null +++ b/t/t4209-log-pickaxe.sh @@ -0,0 +1,119 @@ +#!/bin/sh + +test_description='log --grep/--author/--regexp-ignore-case/-S/-G' +. ./test-lib.sh + +test_expect_success setup ' + >file && + git add file && + test_tick && + git commit -m initial && + + echo Picked >file && + test_tick && + git commit -a --author="Another Person " -m second +' + +test_expect_success 'log --grep' ' + git log --grep=initial --format=%H >actual && + git rev-parse --verify HEAD^ >expect && + test_cmp expect actual +' + +test_expect_success 'log --grep --regexp-ignore-case' ' + git log --regexp-ignore-case --grep=InItial --format=%H >actual && + git rev-parse --verify HEAD^ >expect && + test_cmp expect actual +' + +test_expect_success 'log --grep -i' ' + git log -i --grep=InItial --format=%H >actual && + git rev-parse --verify HEAD^ >expect && + test_cmp expect actual +' + +test_expect_success 'log --author --regexp-ignore-case' ' + git log --regexp-ignore-case --author=person --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log --author -i' ' + git log -i --author=person --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log -G (nomatch)' ' + git log -Gpicked --format=%H >actual && + >expect && + test_cmp expect actual +' + +test_expect_success 'log -G (match)' ' + git log -GPicked --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log -G --regexp-ignore-case (nomatch)' ' + git log --regexp-ignore-case -Gpickle --format=%H >actual && + >expect && + test_cmp expect actual +' + +test_expect_success 'log -G -i (nomatch)' ' + git log -i -Gpickle --format=%H >actual && + >expect && + test_cmp expect actual +' + +test_expect_success 'log -G --regexp-ignore-case (match)' ' + git log --regexp-ignore-case -Gpicked --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log -G -i (match)' ' + git log -i -Gpicked --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log -S (nomatch)' ' + git log -Spicked --format=%H >actual && + >expect && + test_cmp expect actual +' + +test_expect_success 'log -S (match)' ' + git log -SPicked --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log -S --regexp-ignore-case (match)' ' + git log --regexp-ignore-case -Spicked --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log -S -i (match)' ' + git log -i -Spicked --format=%H >actual && + git rev-parse --verify HEAD >expect && + test_cmp expect actual +' + +test_expect_success 'log -S --regexp-ignore-case (nomatch)' ' + git log --regexp-ignore-case -Spickle --format=%H >actual && + >expect && + test_cmp expect actual +' + +test_expect_success 'log -S -i (nomatch)' ' + git log -i -Spickle --format=%H >actual && + >expect && + test_cmp expect actual +' + +test_done -- cgit v1.2.3 From f1589d100796c58615033dde10c1c6446b814357 Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Sun, 4 Mar 2012 19:10:57 +0000 Subject: ctype.c: Fix a sparse warning In particular, sparse complains as follows: SP ctype.c ctype.c:30:12: warning: symbol 'tolower_trans_tbl' was not declared.\ Should it be static? An appropriate extern declaration for the 'tolower_trans_tbl' symbol is included in the "cache.h" header file. In order to suppress the warning, therefore, we could replace the "git-compat-util.h" header inclusion with "cache.h", since "cache.h" includes "git-compat-util.h" in turn. Here, however, we choose to move the extern declaration for 'tolower_trans_tbl' into "git-compat-util.h", alongside the other extern declaration from ctype.c for 'sane_ctype'. Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- cache.h | 3 --- git-compat-util.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cache.h b/cache.h index 79dc30574e..79c612fc2f 100644 --- a/cache.h +++ b/cache.h @@ -1258,7 +1258,4 @@ extern struct startup_info *startup_info; /* builtin/merge.c */ int checkout_fast_forward(const unsigned char *from, const unsigned char *to); -/* in ctype.c, for kwset users */ -extern const char tolower_trans_tbl[256]; - #endif /* CACHE_H */ diff --git a/git-compat-util.h b/git-compat-util.h index 230e198fc3..ac0a87bdb9 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -457,6 +457,9 @@ static inline int has_extension(const char *filename, const char *ext) return len > extlen && !memcmp(filename + len - extlen, ext, extlen); } +/* in ctype.c, for kwset users */ +extern const char tolower_trans_tbl[256]; + /* Sane ctype - no locale, and works with signed chars */ #undef isascii #undef isspace -- cgit v1.2.3