Welcome to mirror list, hosted at ThFree Co, Russian Federation.

quicksearch.cs « System.Text.RegularExpressions « System « class « mcs - github.com/mono/mono.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 65665a2518bb9958e6e8425350c40883723620ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
//
// assembly:	System
// namespace:	System.Text.RegularExpressions
// file:	quicksearch.cs
//
// author:	Dan Lewis (dlewis@gmx.co.uk)
// 		(c) 2002

using System;

namespace System.Text.RegularExpressions {

	// TODO use simple test for single character strings

	class QuickSearch {
		// simplified boyer-moore for fast substring matching
	
		public QuickSearch (string str, bool ignore) {
			this.str = str;
			this.len = str.Length;
			this.ignore = ignore;
		
			Setup ();
		}
		
		public string String {
			get { return str; }
		}

		public int Length {
			get { return len; }
		}

		public bool IgnoreCase {
			get { return ignore; }
		}

		public int Search (string text, int start, int end) {
			if (end > text.Length - len)
				end = text.Length - len;
		
			int ptr = start;
			if (!ignore) {
				while (ptr <= end) {
					int i = len - 1;
					while (str[i] == text[ptr + i]) {
						if (-- i < 0)
							return ptr;
					}

					if (ptr < end)
						ptr += shift[text[ptr + len]];
					else
						break;
				}
			}
			else {
				// ignore case: same as above, but we convert text
				// to lower case before doing the string compare
			
				while (ptr <= end) {
					int i = len - 1;
					while (str[i] == Char.ToLower (text[ptr + i])) {
						if (-- i < 0)
							return ptr;
					}

					if (ptr < end)
						ptr += shift[text[ptr + len]];
					else
						break;
				}
			}

			return -1;
		}

		// private

		private void Setup () {
			if (ignore)
				str = str.ToLower ();

			// this is a 64k entry shift table. that's 128kb per pattern!
			// is it worth compressing this by only storing shifts within
			// a (lo, hi) character range? for most substrings this would
			// be around 50 bytes...

			shift = new int[0x1000];
			for (int i = 0; i < 0x1000; ++ i)
				shift[i] = len + 1;

			for (int i = 0; i < len; ++ i) {
				char c = str[i];

				shift[c] = len - i;
				if (ignore)
					shift[Char.ToUpper (c)] = len - i;
			}
		}

		private string str;
		private int len;
		private bool ignore;

		private int[] shift;
	}
}