blob: 65665a2518bb9958e6e8425350c40883723620ea (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
//
// assembly: System
// namespace: System.Text.RegularExpressions
// file: quicksearch.cs
//
// author: Dan Lewis (dlewis@gmx.co.uk)
// (c) 2002
using System;
namespace System.Text.RegularExpressions {
// TODO use simple test for single character strings
class QuickSearch {
// simplified boyer-moore for fast substring matching
public QuickSearch (string str, bool ignore) {
this.str = str;
this.len = str.Length;
this.ignore = ignore;
Setup ();
}
public string String {
get { return str; }
}
public int Length {
get { return len; }
}
public bool IgnoreCase {
get { return ignore; }
}
public int Search (string text, int start, int end) {
if (end > text.Length - len)
end = text.Length - len;
int ptr = start;
if (!ignore) {
while (ptr <= end) {
int i = len - 1;
while (str[i] == text[ptr + i]) {
if (-- i < 0)
return ptr;
}
if (ptr < end)
ptr += shift[text[ptr + len]];
else
break;
}
}
else {
// ignore case: same as above, but we convert text
// to lower case before doing the string compare
while (ptr <= end) {
int i = len - 1;
while (str[i] == Char.ToLower (text[ptr + i])) {
if (-- i < 0)
return ptr;
}
if (ptr < end)
ptr += shift[text[ptr + len]];
else
break;
}
}
return -1;
}
// private
private void Setup () {
if (ignore)
str = str.ToLower ();
// this is a 64k entry shift table. that's 128kb per pattern!
// is it worth compressing this by only storing shifts within
// a (lo, hi) character range? for most substrings this would
// be around 50 bytes...
shift = new int[0x1000];
for (int i = 0; i < 0x1000; ++ i)
shift[i] = len + 1;
for (int i = 0; i < len; ++ i) {
char c = str[i];
shift[c] = len - i;
if (ignore)
shift[Char.ToUpper (c)] = len - i;
}
}
private string str;
private int len;
private bool ignore;
private int[] shift;
}
}
|