49 files changed, 11040 insertions, 0 deletions
diff --git a/src/core/Analysis/ASCIIFoldingFilter.cs b/src/core/Analysis/ASCIIFoldingFilter.cs
new file mode 100644
index 0000000..6133870
--- /dev/null
+++ b/src/core/Analysis/ASCIIFoldingFilter.cs
@@ -0,0 +1,3285 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> This class converts alphabetic, numeric, and symbolic Unicode characters
+	/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
+	/// block) into their ASCII equivalents, if one exists.
+	/// 
+	/// Characters from the following Unicode blocks are converted; however, only
+	/// those characters with reasonable ASCII alternatives are converted:
+	/// 
+	/// <list type="bullet">
+	/// <item>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a></item>
+    /// <item>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a></item>
+    /// <item>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a></item>
+    /// <item>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a></item>
+    /// <item>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a></item>
+    /// <item>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a></item>
+    /// <item>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a></item>
+    /// <item>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a></item>
+    /// <item>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a></item>
+    /// <item>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a></item>
+    /// <item>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a></item>
+    /// <item>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a></item>
+    /// <item>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a></item>
+    /// <item>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a></item>
+    /// <item>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a></item>
+    /// <item>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a></item>
+	/// </list>
+	/// 
+	/// See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
+	/// 
+	/// The set of character conversions supported by this class is a superset of
+	/// those supported by Lucene's <see cref="ISOLatin1AccentFilter" /> which strips
+	/// accents from Latin1 characters.  For example, '&#192;' will be replaced by
+	/// 'a'.
+	/// </summary>
+	public sealed class ASCIIFoldingFilter : TokenFilter
+	{
+		public ASCIIFoldingFilter(TokenStream input):base(input)
+		{
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+		
+		private char[] output = new char[512];
+		private int outputPos;
+		private ITermAttribute termAtt;
+		
+		public override bool IncrementToken()
+		{
+			if (input.IncrementToken())
+			{
+				char[] buffer = termAtt.TermBuffer();
+				int length = termAtt.TermLength();
+				
+				// If no characters actually require rewriting then we
+				// just return token as-is:
+				for (int i = 0; i < length; ++i)
+				{
+					char c = buffer[i];
+					if (c >= '\u0080')
+					{
+						FoldToASCII(buffer, length);
+						termAtt.SetTermBuffer(output, 0, outputPos);
+						break;
+					}
+				}
+				return true;
+			}
+			else
+			{
+				return false;
+			}
+		}
+		
+		/// <summary> Converts characters above ASCII to their ASCII equivalents.  For example,
+		/// accents are removed from accented characters.
+		/// </summary>
+		/// <param name="input">The string to fold
+		/// </param>
+		/// <param name="length">The number of characters in the input string
+		/// </param>
+		public void  FoldToASCII(char[] input, int length)
+		{
+			// Worst-case length required:
+			int maxSizeNeeded = 4 * length;
+			if (output.Length < maxSizeNeeded)
+			{
+				output = new char[ArrayUtil.GetNextSize(maxSizeNeeded)];
+			}
+			
+			outputPos = 0;
+			
+			for (int pos = 0; pos < length; ++pos)
+			{
+				char c = input[pos];
+				
+				// Quick test: if it's not in range then just keep current character
+				if (c < '\u0080')
+				{
+					output[outputPos++] = c;
+				}
+				else
+				{
+					switch (c)
+					{
+						
+						case '\u00C0': 
+						// Ãƒâ‚¬  [LATIN CAPITAL LETTER A WITH GRAVE]
+						case '\u00C1': 
+						// Ãƒï¿½  [LATIN CAPITAL LETTER A WITH ACUTE]
+						case '\u00C2': 
+						// Ãƒâ€š  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+						case '\u00C3': 
+						// ÃƒÆ’  [LATIN CAPITAL LETTER A WITH TILDE]
+						case '\u00C4': 
+						// Ãƒâ€ž  [LATIN CAPITAL LETTER A WITH DIAERESIS]
+						case '\u00C5': 
+						// Ãƒâ€¦  [LATIN CAPITAL LETTER A WITH RING ABOVE]
+						case '\u0100': 
+						// Ã„â‚¬  [LATIN CAPITAL LETTER A WITH MACRON]
+						case '\u0102': 
+						// Ã„â€š  [LATIN CAPITAL LETTER A WITH BREVE]
+						case '\u0104': 
+						// Ã„â€ž  [LATIN CAPITAL LETTER A WITH OGONEK]
+						case '\u018F': 
+						// Ã†ï¿½  http://en.wikipedia.org/wiki/Schwa  [LATIN CAPITAL LETTER SCHWA]
+						case '\u01CD': 
+						// Ã‡ï¿½  [LATIN CAPITAL LETTER A WITH CARON]
+						case '\u01DE': 
+						// Ã‡Å¾  [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+						case '\u01E0': 
+						// Ã‡Â   [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+						case '\u01FA': 
+						// Ã‡Âº  [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+						case '\u0200': 
+						// Ãˆâ‚¬  [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+						case '\u0202': 
+						// Ãˆâ€š  [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+						case '\u0226': 
+						// ÃˆÂ¦  [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+						case '\u023A': 
+						// ÃˆÂº  [LATIN CAPITAL LETTER A WITH STROKE]
+						case '\u1D00': 
+						// Ã¡Â´â‚¬  [LATIN LETTER SMALL CAPITAL A]
+						case '\u1E00': 
+						// Ã¡Â¸â‚¬  [LATIN CAPITAL LETTER A WITH RING BELOW]
+						case '\u1EA0': 
+						// Ã¡ÂºÂ   [LATIN CAPITAL LETTER A WITH DOT BELOW]
+						case '\u1EA2': 
+						// Ã¡ÂºÂ¢  [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+						case '\u1EA4': 
+						// Ã¡ÂºÂ¤  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+						case '\u1EA6': 
+						// Ã¡ÂºÂ¦  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+						case '\u1EA8': 
+						// Ã¡ÂºÂ¨  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+						case '\u1EAA': 
+						// Ã¡ÂºÂª  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+						case '\u1EAC': 
+						// Ã¡ÂºÂ¬  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+						case '\u1EAE': 
+						// Ã¡ÂºÂ®  [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+						case '\u1EB0': 
+						// Ã¡ÂºÂ°  [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+						case '\u1EB2': 
+						// Ã¡ÂºÂ²  [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+						case '\u1EB4': 
+						// Ã¡ÂºÂ´  [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+						case '\u1EB6': 
+						// Ã¡ÂºÂ¶  [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+						case '\u24B6': 
+						// Ã¢â€™Â¶  [CIRCLED LATIN CAPITAL LETTER A]
+						case '\uFF21':  // Ã¯Â¼Â¡  [FULLWIDTH LATIN CAPITAL LETTER A]
+							output[outputPos++] = 'A';
+							break;
+						
+						case '\u00E0': 
+						// ÃƒÂ   [LATIN SMALL LETTER A WITH GRAVE]
+						case '\u00E1': 
+						// ÃƒÂ¡  [LATIN SMALL LETTER A WITH ACUTE]
+						case '\u00E2': 
+						// ÃƒÂ¢  [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+						case '\u00E3': 
+						// ÃƒÂ£  [LATIN SMALL LETTER A WITH TILDE]
+						case '\u00E4': 
+						// ÃƒÂ¤  [LATIN SMALL LETTER A WITH DIAERESIS]
+						case '\u00E5': 
+						// ÃƒÂ¥  [LATIN SMALL LETTER A WITH RING ABOVE]
+						case '\u0101': 
+						// Ã„ï¿½  [LATIN SMALL LETTER A WITH MACRON]
+						case '\u0103': 
+						// Ã„Æ’  [LATIN SMALL LETTER A WITH BREVE]
+						case '\u0105': 
+						// Ã„â€¦  [LATIN SMALL LETTER A WITH OGONEK]
+						case '\u01CE': 
+						// Ã‡Å½  [LATIN SMALL LETTER A WITH CARON]
+						case '\u01DF': 
+						// Ã‡Å¸  [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+						case '\u01E1': 
+						// Ã‡Â¡  [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+						case '\u01FB': 
+						// Ã‡Â»  [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+						case '\u0201': 
+						// Ãˆï¿½  [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+						case '\u0203': 
+						// ÃˆÆ’  [LATIN SMALL LETTER A WITH INVERTED BREVE]
+						case '\u0227': 
+						// ÃˆÂ§  [LATIN SMALL LETTER A WITH DOT ABOVE]
+						case '\u0250': 
+						// Ã‰ï¿½  [LATIN SMALL LETTER TURNED A]
+						case '\u0259': 
+						// Ã‰â„¢  [LATIN SMALL LETTER SCHWA]
+						case '\u025A': 
+						// Ã‰Å¡  [LATIN SMALL LETTER SCHWA WITH HOOK]
+						case '\u1D8F': 
+						// Ã¡Â¶ï¿½  [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+						case '\u1D95': 
+						// Ã¡Â¶â€¢  [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+						case '\u1E01': 
+						// Ã¡ÂºÂ¡  [LATIN SMALL LETTER A WITH RING BELOW]
+						case '\u1E9A': 
+						// Ã¡ÂºÂ£  [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+						case '\u1EA1': 
+						// Ã¡ÂºÂ¡  [LATIN SMALL LETTER A WITH DOT BELOW]
+						case '\u1EA3': 
+						// Ã¡ÂºÂ£  [LATIN SMALL LETTER A WITH HOOK ABOVE]
+						case '\u1EA5': 
+						// Ã¡ÂºÂ¥  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+						case '\u1EA7': 
+						// Ã¡ÂºÂ§  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+						case '\u1EA9': 
+						// Ã¡ÂºÂ©  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+						case '\u1EAB': 
+						// Ã¡ÂºÂ«  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+						case '\u1EAD': 
+						// Ã¡ÂºÂ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+						case '\u1EAF': 
+						// Ã¡ÂºÂ¯  [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+						case '\u1EB1': 
+						// Ã¡ÂºÂ±  [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+						case '\u1EB3': 
+						// Ã¡ÂºÂ³  [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+						case '\u1EB5': 
+						// Ã¡ÂºÂµ  [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+						case '\u1EB7': 
+						// Ã¡ÂºÂ·  [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+						case '\u2090': 
+						// Ã¢â€šï¿½  [LATIN SUBSCRIPT SMALL LETTER A]
+						case '\u2094': 
+						// Ã¢â€šï¿½?  [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+						case '\u24D0': 
+						// Ã¢â€œï¿½  [CIRCLED LATIN SMALL LETTER A]
+						case '\u2C65': 
+						// Ã¢Â±Â¥  [LATIN SMALL LETTER A WITH STROKE]
+						case '\u2C6F': 
+						// Ã¢Â±Â¯  [LATIN CAPITAL LETTER TURNED A]
+						case '\uFF41':  // Ã¯Â½ï¿½  [FULLWIDTH LATIN SMALL LETTER A]
+							output[outputPos++] = 'a';
+							break;
+						
+						case '\uA732':  // ÃªÅ“Â²  [LATIN CAPITAL LETTER AA]
+							output[outputPos++] = 'A';
+							output[outputPos++] = 'A';
+							break;
+						
+						case '\u00C6': 
+						// Ãƒâ€   [LATIN CAPITAL LETTER AE]
+						case '\u01E2': 
+						// Ã‡Â¢  [LATIN CAPITAL LETTER AE WITH MACRON]
+						case '\u01FC': 
+						// Ã‡Â¼  [LATIN CAPITAL LETTER AE WITH ACUTE]
+						case '\u1D01':  // Ã¡Â´ï¿½  [LATIN LETTER SMALL CAPITAL AE]
+							output[outputPos++] = 'A';
+							output[outputPos++] = 'E';
+							break;
+						
+						case '\uA734':  // ÃªÅ“Â´  [LATIN CAPITAL LETTER AO]
+							output[outputPos++] = 'A';
+							output[outputPos++] = 'O';
+							break;
+						
+						case '\uA736':  // ÃªÅ“Â¶  [LATIN CAPITAL LETTER AU]
+							output[outputPos++] = 'A';
+							output[outputPos++] = 'U';
+							break;
+						
+						case '\uA738': 
+						// ÃªÅ“Â¸  [LATIN CAPITAL LETTER AV]
+						case '\uA73A':  // ÃªÅ“Âº  [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+							output[outputPos++] = 'A';
+							output[outputPos++] = 'V';
+							break;
+						
+						case '\uA73C':  // ÃªÅ“Â¼  [LATIN CAPITAL LETTER AY]
+							output[outputPos++] = 'A';
+							output[outputPos++] = 'Y';
+							break;
+						
+						case '\u249C':  // Ã¢â€™Å“  [PARENTHESIZED LATIN SMALL LETTER A]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'a';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\uA733':  // ÃªÅ“Â³  [LATIN SMALL LETTER AA]
+							output[outputPos++] = 'a';
+							output[outputPos++] = 'a';
+							break;
+						
+						case '\u00E6': 
+						// ÃƒÂ¦  [LATIN SMALL LETTER AE]
+						case '\u01E3': 
+						// Ã‡Â£  [LATIN SMALL LETTER AE WITH MACRON]
+						case '\u01FD': 
+						// Ã‡Â½  [LATIN SMALL LETTER AE WITH ACUTE]
+						case '\u1D02':  // Ã¡Â´â€š  [LATIN SMALL LETTER TURNED AE]
+							output[outputPos++] = 'a';
+							output[outputPos++] = 'e';
+							break;
+						
+						case '\uA735':  // ÃªÅ“Âµ  [LATIN SMALL LETTER AO]
+							output[outputPos++] = 'a';
+							output[outputPos++] = 'o';
+							break;
+						
+						case '\uA737':  // ÃªÅ“Â·  [LATIN SMALL LETTER AU]
+							output[outputPos++] = 'a';
+							output[outputPos++] = 'u';
+							break;
+						
+						case '\uA739': 
+						// ÃªÅ“Â¹  [LATIN SMALL LETTER AV]
+						case '\uA73B':  // ÃªÅ“Â»  [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+							output[outputPos++] = 'a';
+							output[outputPos++] = 'v';
+							break;
+						
+						case '\uA73D':  // ÃªÅ“Â½  [LATIN SMALL LETTER AY]
+							output[outputPos++] = 'a';
+							output[outputPos++] = 'y';
+							break;
+						
+						case '\u0181': 
+						// Ã†ï¿½  [LATIN CAPITAL LETTER B WITH HOOK]
+						case '\u0182': 
+						// Ã†â€š  [LATIN CAPITAL LETTER B WITH TOPBAR]
+						case '\u0243': 
+						// Ã‰Æ’  [LATIN CAPITAL LETTER B WITH STROKE]
+						case '\u0299': 
+						// ÃŠâ„¢  [LATIN LETTER SMALL CAPITAL B]
+						case '\u1D03': 
+						// Ã¡Â´Æ’  [LATIN LETTER SMALL CAPITAL BARRED B]
+						case '\u1E02': 
+						// Ã¡Â¸â€š  [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+						case '\u1E04': 
+						// Ã¡Â¸â€ž  [LATIN CAPITAL LETTER B WITH DOT BELOW]
+						case '\u1E06': 
+						// Ã¡Â¸â€   [LATIN CAPITAL LETTER B WITH LINE BELOW]
+						case '\u24B7': 
+						// Ã¢â€™Â·  [CIRCLED LATIN CAPITAL LETTER B]
+						case '\uFF22':  // Ã¯Â¼Â¢  [FULLWIDTH LATIN CAPITAL LETTER B]
+							output[outputPos++] = 'B';
+							break;
+						
+						case '\u0180': 
+						// Ã†â‚¬  [LATIN SMALL LETTER B WITH STROKE]
+						case '\u0183': 
+						// Ã†Æ’  [LATIN SMALL LETTER B WITH TOPBAR]
+						case '\u0253': 
+						// Ã‰â€œ  [LATIN SMALL LETTER B WITH HOOK]
+						case '\u1D6C': 
+						// Ã¡ÂµÂ¬  [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+						case '\u1D80': 
+						// Ã¡Â¶â‚¬  [LATIN SMALL LETTER B WITH PALATAL HOOK]
+						case '\u1E03': 
+						// Ã¡Â¸Æ’  [LATIN SMALL LETTER B WITH DOT ABOVE]
+						case '\u1E05': 
+						// Ã¡Â¸â€¦  [LATIN SMALL LETTER B WITH DOT BELOW]
+						case '\u1E07': 
+						// Ã¡Â¸â€¡  [LATIN SMALL LETTER B WITH LINE BELOW]
+						case '\u24D1': 
+						// Ã¢â€œâ€˜  [CIRCLED LATIN SMALL LETTER B]
+						case '\uFF42':  // Ã¯Â½â€š  [FULLWIDTH LATIN SMALL LETTER B]
+							output[outputPos++] = 'b';
+							break;
+						
+						case '\u249D':  // Ã¢â€™ï¿½  [PARENTHESIZED LATIN SMALL LETTER B]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'b';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00C7': 
+						// Ãƒâ€¡  [LATIN CAPITAL LETTER C WITH CEDILLA]
+						case '\u0106': 
+						// Ã„â€   [LATIN CAPITAL LETTER C WITH ACUTE]
+						case '\u0108': 
+						// Ã„Ë†  [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+						case '\u010A': 
+						// Ã„Å   [LATIN CAPITAL LETTER C WITH DOT ABOVE]
+						case '\u010C': 
+						// Ã„Å’  [LATIN CAPITAL LETTER C WITH CARON]
+						case '\u0187': 
+						// Ã†â€¡  [LATIN CAPITAL LETTER C WITH HOOK]
+						case '\u023B': 
+						// ÃˆÂ»  [LATIN CAPITAL LETTER C WITH STROKE]
+						case '\u0297': 
+						// ÃŠâ€”  [LATIN LETTER STRETCHED C]
+						case '\u1D04': 
+						// Ã¡Â´â€ž  [LATIN LETTER SMALL CAPITAL C]
+						case '\u1E08': 
+						// Ã¡Â¸Ë†  [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+						case '\u24B8': 
+						// Ã¢â€™Â¸  [CIRCLED LATIN CAPITAL LETTER C]
+						case '\uFF23':  // Ã¯Â¼Â£  [FULLWIDTH LATIN CAPITAL LETTER C]
+							output[outputPos++] = 'C';
+							break;
+						
+						case '\u00E7': 
+						// ÃƒÂ§  [LATIN SMALL LETTER C WITH CEDILLA]
+						case '\u0107': 
+						// Ã„â€¡  [LATIN SMALL LETTER C WITH ACUTE]
+						case '\u0109': 
+						// Ã„â€°  [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+						case '\u010B': 
+						// Ã„â€¹  [LATIN SMALL LETTER C WITH DOT ABOVE]
+						case '\u010D': 
+						// Ã„ï¿½  [LATIN SMALL LETTER C WITH CARON]
+						case '\u0188': 
+						// Ã†Ë†  [LATIN SMALL LETTER C WITH HOOK]
+						case '\u023C': 
+						// ÃˆÂ¼  [LATIN SMALL LETTER C WITH STROKE]
+						case '\u0255': 
+						// Ã‰â€¢  [LATIN SMALL LETTER C WITH CURL]
+						case '\u1E09': 
+						// Ã¡Â¸â€°  [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+						case '\u2184': 
+						// Ã¢â€ â€ž  [LATIN SMALL LETTER REVERSED C]
+						case '\u24D2': 
+						// Ã¢â€œâ€™  [CIRCLED LATIN SMALL LETTER C]
+						case '\uA73E': 
+						// ÃªÅ“Â¾  [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+						case '\uA73F': 
+						// ÃªÅ“Â¿  [LATIN SMALL LETTER REVERSED C WITH DOT]
+						case '\uFF43':  // Ã¯Â½Æ’  [FULLWIDTH LATIN SMALL LETTER C]
+							output[outputPos++] = 'c';
+							break;
+						
+						case '\u249E':  // Ã¢â€™Å¾  [PARENTHESIZED LATIN SMALL LETTER C]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'c';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00D0': 
+						// Ãƒï¿½  [LATIN CAPITAL LETTER ETH]
+						case '\u010E': 
+						// Ã„Å½  [LATIN CAPITAL LETTER D WITH CARON]
+						case '\u0110': 
+						// Ã„ï¿½  [LATIN CAPITAL LETTER D WITH STROKE]
+						case '\u0189': 
+						// Ã†â€°  [LATIN CAPITAL LETTER AFRICAN D]
+						case '\u018A': 
+						// Ã†Å   [LATIN CAPITAL LETTER D WITH HOOK]
+						case '\u018B': 
+						// Ã†â€¹  [LATIN CAPITAL LETTER D WITH TOPBAR]
+						case '\u1D05': 
+						// Ã¡Â´â€¦  [LATIN LETTER SMALL CAPITAL D]
+						case '\u1D06': 
+						// Ã¡Â´â€   [LATIN LETTER SMALL CAPITAL ETH]
+						case '\u1E0A': 
+						// Ã¡Â¸Å   [LATIN CAPITAL LETTER D WITH DOT ABOVE]
+						case '\u1E0C': 
+						// Ã¡Â¸Å’  [LATIN CAPITAL LETTER D WITH DOT BELOW]
+						case '\u1E0E': 
+						// Ã¡Â¸Å½  [LATIN CAPITAL LETTER D WITH LINE BELOW]
+						case '\u1E10': 
+						// Ã¡Â¸ï¿½  [LATIN CAPITAL LETTER D WITH CEDILLA]
+						case '\u1E12': 
+						// Ã¡Â¸â€™  [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+						case '\u24B9': 
+						// Ã¢â€™Â¹  [CIRCLED LATIN CAPITAL LETTER D]
+						case '\uA779': 
+						// Ãªï¿½Â¹  [LATIN CAPITAL LETTER INSULAR D]
+						case '\uFF24':  // Ã¯Â¼Â¤  [FULLWIDTH LATIN CAPITAL LETTER D]
+							output[outputPos++] = 'D';
+							break;
+						
+						case '\u00F0': 
+						// ÃƒÂ°  [LATIN SMALL LETTER ETH]
+						case '\u010F': 
+						// Ã„ï¿½  [LATIN SMALL LETTER D WITH CARON]
+						case '\u0111': 
+						// Ã„â€˜  [LATIN SMALL LETTER D WITH STROKE]
+						case '\u018C': 
+						// Ã†Å’  [LATIN SMALL LETTER D WITH TOPBAR]
+						case '\u0221': 
+						// ÃˆÂ¡  [LATIN SMALL LETTER D WITH CURL]
+						case '\u0256': 
+						// Ã‰â€“  [LATIN SMALL LETTER D WITH TAIL]
+						case '\u0257': 
+						// Ã‰â€”  [LATIN SMALL LETTER D WITH HOOK]
+						case '\u1D6D': 
+						// Ã¡ÂµÂ  [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+						case '\u1D81': 
+						// Ã¡Â¶ï¿½  [LATIN SMALL LETTER D WITH PALATAL HOOK]
+						case '\u1D91': 
+						// Ã¡Â¶â€˜  [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+						case '\u1E0B': 
+						// Ã¡Â¸â€¹  [LATIN SMALL LETTER D WITH DOT ABOVE]
+						case '\u1E0D': 
+						// Ã¡Â¸ï¿½  [LATIN SMALL LETTER D WITH DOT BELOW]
+						case '\u1E0F': 
+						// Ã¡Â¸ï¿½  [LATIN SMALL LETTER D WITH LINE BELOW]
+						case '\u1E11': 
+						// Ã¡Â¸â€˜  [LATIN SMALL LETTER D WITH CEDILLA]
+						case '\u1E13': 
+						// Ã¡Â¸â€œ  [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+						case '\u24D3': 
+						// Ã¢â€œâ€œ  [CIRCLED LATIN SMALL LETTER D]
+						case '\uA77A': 
+						// Ãªï¿½Âº  [LATIN SMALL LETTER INSULAR D]
+						case '\uFF44':  // Ã¯Â½â€ž  [FULLWIDTH LATIN SMALL LETTER D]
+							output[outputPos++] = 'd';
+							break;
+						
+						case '\u01C4': 
+						// Ã‡â€ž  [LATIN CAPITAL LETTER DZ WITH CARON]
+						case '\u01F1':  // Ã‡Â±  [LATIN CAPITAL LETTER DZ]
+							output[outputPos++] = 'D';
+							output[outputPos++] = 'Z';
+							break;
+						
+						case '\u01C5': 
+						// Ã‡â€¦  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+						case '\u01F2':  // Ã‡Â²  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+							output[outputPos++] = 'D';
+							output[outputPos++] = 'z';
+							break;
+						
+						case '\u249F':  // Ã¢â€™Å¸  [PARENTHESIZED LATIN SMALL LETTER D]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'd';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0238':  // ÃˆÂ¸  [LATIN SMALL LETTER DB DIGRAPH]
+							output[outputPos++] = 'd';
+							output[outputPos++] = 'b';
+							break;
+						
+						case '\u01C6': 
+						// Ã‡â€   [LATIN SMALL LETTER DZ WITH CARON]
+						case '\u01F3': 
+						// Ã‡Â³  [LATIN SMALL LETTER DZ]
+						case '\u02A3': 
+						// ÃŠÂ£  [LATIN SMALL LETTER DZ DIGRAPH]
+						case '\u02A5':  // ÃŠÂ¥  [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+							output[outputPos++] = 'd';
+							output[outputPos++] = 'z';
+							break;
+						
+						case '\u00C8': 
+						// ÃƒË†  [LATIN CAPITAL LETTER E WITH GRAVE]
+						case '\u00C9': 
+						// Ãƒâ€°  [LATIN CAPITAL LETTER E WITH ACUTE]
+						case '\u00CA': 
+						// ÃƒÅ   [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+						case '\u00CB': 
+						// Ãƒâ€¹  [LATIN CAPITAL LETTER E WITH DIAERESIS]
+						case '\u0112': 
+						// Ã„â€™  [LATIN CAPITAL LETTER E WITH MACRON]
+						case '\u0114': 
+						// Ã„ï¿½?  [LATIN CAPITAL LETTER E WITH BREVE]
+						case '\u0116': 
+						// Ã„â€“  [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+						case '\u0118': 
+						// Ã„Ëœ  [LATIN CAPITAL LETTER E WITH OGONEK]
+						case '\u011A': 
+						// Ã„Å¡  [LATIN CAPITAL LETTER E WITH CARON]
+						case '\u018E': 
+						// Ã†Å½  [LATIN CAPITAL LETTER REVERSED E]
+						case '\u0190': 
+						// Ã†ï¿½  [LATIN CAPITAL LETTER OPEN E]
+						case '\u0204': 
+						// Ãˆâ€ž  [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+						case '\u0206': 
+						// Ãˆâ€   [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+						case '\u0228': 
+						// ÃˆÂ¨  [LATIN CAPITAL LETTER E WITH CEDILLA]
+						case '\u0246': 
+						// Ã‰â€   [LATIN CAPITAL LETTER E WITH STROKE]
+						case '\u1D07': 
+						// Ã¡Â´â€¡  [LATIN LETTER SMALL CAPITAL E]
+						case '\u1E14': 
+						// Ã¡Â¸ï¿½?  [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+						case '\u1E16': 
+						// Ã¡Â¸â€“  [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+						case '\u1E18': 
+						// Ã¡Â¸Ëœ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+						case '\u1E1A': 
+						// Ã¡Â¸Å¡  [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+						case '\u1E1C': 
+						// Ã¡Â¸Å“  [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+						case '\u1EB8': 
+						// Ã¡ÂºÂ¸  [LATIN CAPITAL LETTER E WITH DOT BELOW]
+						case '\u1EBA': 
+						// Ã¡ÂºÂº  [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+						case '\u1EBC': 
+						// Ã¡ÂºÂ¼  [LATIN CAPITAL LETTER E WITH TILDE]
+						case '\u1EBE': 
+						// Ã¡ÂºÂ¾  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+						case '\u1EC0': 
+						// Ã¡Â»â‚¬  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+						case '\u1EC2': 
+						// Ã¡Â»â€š  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+						case '\u1EC4': 
+						// Ã¡Â»â€ž  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+						case '\u1EC6': 
+						// Ã¡Â»â€   [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+						case '\u24BA': 
+						// Ã¢â€™Âº  [CIRCLED LATIN CAPITAL LETTER E]
+						case '\u2C7B': 
+						// Ã¢Â±Â»  [LATIN LETTER SMALL CAPITAL TURNED E]
+						case '\uFF25':  // Ã¯Â¼Â¥  [FULLWIDTH LATIN CAPITAL LETTER E]
+							output[outputPos++] = 'E';
+							break;
+						
+						case '\u00E8': 
+						// ÃƒÂ¨  [LATIN SMALL LETTER E WITH GRAVE]
+						case '\u00E9': 
+						// ÃƒÂ©  [LATIN SMALL LETTER E WITH ACUTE]
+						case '\u00EA': 
+						// ÃƒÂª  [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+						case '\u00EB': 
+						// ÃƒÂ«  [LATIN SMALL LETTER E WITH DIAERESIS]
+						case '\u0113': 
+						// Ã„â€œ  [LATIN SMALL LETTER E WITH MACRON]
+						case '\u0115': 
+						// Ã„â€¢  [LATIN SMALL LETTER E WITH BREVE]
+						case '\u0117': 
+						// Ã„â€”  [LATIN SMALL LETTER E WITH DOT ABOVE]
+						case '\u0119': 
+						// Ã„â„¢  [LATIN SMALL LETTER E WITH OGONEK]
+						case '\u011B': 
+						// Ã„â€º  [LATIN SMALL LETTER E WITH CARON]
+						case '\u01DD': 
+						// Ã‡ï¿½  [LATIN SMALL LETTER TURNED E]
+						case '\u0205': 
+						// Ãˆâ€¦  [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+						case '\u0207': 
+						// Ãˆâ€¡  [LATIN SMALL LETTER E WITH INVERTED BREVE]
+						case '\u0229': 
+						// ÃˆÂ©  [LATIN SMALL LETTER E WITH CEDILLA]
+						case '\u0247': 
+						// Ã‰â€¡  [LATIN SMALL LETTER E WITH STROKE]
+						case '\u0258': 
+						// Ã‰Ëœ  [LATIN SMALL LETTER REVERSED E]
+						case '\u025B': 
+						// Ã‰â€º  [LATIN SMALL LETTER OPEN E]
+						case '\u025C': 
+						// Ã‰Å“  [LATIN SMALL LETTER REVERSED OPEN E]
+						case '\u025D': 
+						// Ã‰ï¿½  [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+						case '\u025E': 
+						// Ã‰Å¾  [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+						case '\u029A': 
+						// ÃŠÅ¡  [LATIN SMALL LETTER CLOSED OPEN E]
+						case '\u1D08': 
+						// Ã¡Â´Ë†  [LATIN SMALL LETTER TURNED OPEN E]
+						case '\u1D92': 
+						// Ã¡Â¶â€™  [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+						case '\u1D93': 
+						// Ã¡Â¶â€œ  [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+						case '\u1D94': 
+						// Ã¡Â¶ï¿½?  [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+						case '\u1E15': 
+						// Ã¡Â¸â€¢  [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+						case '\u1E17': 
+						// Ã¡Â¸â€”  [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+						case '\u1E19': 
+						// Ã¡Â¸â„¢  [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+						case '\u1E1B': 
+						// Ã¡Â¸â€º  [LATIN SMALL LETTER E WITH TILDE BELOW]
+						case '\u1E1D': 
+						// Ã¡Â¸ï¿½  [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+						case '\u1EB9': 
+						// Ã¡ÂºÂ¹  [LATIN SMALL LETTER E WITH DOT BELOW]
+						case '\u1EBB': 
+						// Ã¡ÂºÂ»  [LATIN SMALL LETTER E WITH HOOK ABOVE]
+						case '\u1EBD': 
+						// Ã¡ÂºÂ½  [LATIN SMALL LETTER E WITH TILDE]
+						case '\u1EBF': 
+						// Ã¡ÂºÂ¿  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+						case '\u1EC1': 
+						// Ã¡Â»ï¿½  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+						case '\u1EC3': 
+						// Ã¡Â»Æ’  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+						case '\u1EC5': 
+						// Ã¡Â»â€¦  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+						case '\u1EC7': 
+						// Ã¡Â»â€¡  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+						case '\u2091': 
+						// Ã¢â€šâ€˜  [LATIN SUBSCRIPT SMALL LETTER E]
+						case '\u24D4': 
+						// Ã¢â€œï¿½?  [CIRCLED LATIN SMALL LETTER E]
+						case '\u2C78': 
+						// Ã¢Â±Â¸  [LATIN SMALL LETTER E WITH NOTCH]
+						case '\uFF45':  // Ã¯Â½â€¦  [FULLWIDTH LATIN SMALL LETTER E]
+							output[outputPos++] = 'e';
+							break;
+						
+						case '\u24A0':  // Ã¢â€™Â   [PARENTHESIZED LATIN SMALL LETTER E]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'e';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0191': 
+						// Ã†â€˜  [LATIN CAPITAL LETTER F WITH HOOK]
+						case '\u1E1E': 
+						// Ã¡Â¸Å¾  [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+						case '\u24BB': 
+						// Ã¢â€™Â»  [CIRCLED LATIN CAPITAL LETTER F]
+						case '\uA730': 
+						// ÃªÅ“Â°  [LATIN LETTER SMALL CAPITAL F]
+						case '\uA77B': 
+						// Ãªï¿½Â»  [LATIN CAPITAL LETTER INSULAR F]
+						case '\uA7FB': 
+						// ÃªÅ¸Â»  [LATIN EPIGRAPHIC LETTER REVERSED F]
+						case '\uFF26':  // Ã¯Â¼Â¦  [FULLWIDTH LATIN CAPITAL LETTER F]
+							output[outputPos++] = 'F';
+							break;
+						
+						case '\u0192': 
+						// Ã†â€™  [LATIN SMALL LETTER F WITH HOOK]
+						case '\u1D6E': 
+						// Ã¡ÂµÂ®  [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+						case '\u1D82': 
+						// Ã¡Â¶â€š  [LATIN SMALL LETTER F WITH PALATAL HOOK]
+						case '\u1E1F': 
+						// Ã¡Â¸Å¸  [LATIN SMALL LETTER F WITH DOT ABOVE]
+						case '\u1E9B': 
+						// Ã¡Âºâ€º  [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+						case '\u24D5': 
+						// Ã¢â€œâ€¢  [CIRCLED LATIN SMALL LETTER F]
+						case '\uA77C': 
+						// Ãªï¿½Â¼  [LATIN SMALL LETTER INSULAR F]
+						case '\uFF46':  // Ã¯Â½â€   [FULLWIDTH LATIN SMALL LETTER F]
+							output[outputPos++] = 'f';
+							break;
+						
+						case '\u24A1':  // Ã¢â€™Â¡  [PARENTHESIZED LATIN SMALL LETTER F]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'f';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\uFB00':  // Ã¯Â¬â‚¬  [LATIN SMALL LIGATURE FF]
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'f';
+							break;
+						
+						case '\uFB03':  // Ã¯Â¬Æ’  [LATIN SMALL LIGATURE FFI]
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'i';
+							break;
+						
+						case '\uFB04':  // Ã¯Â¬â€ž  [LATIN SMALL LIGATURE FFL]
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'l';
+							break;
+						
+						case '\uFB01':  // Ã¯Â¬ï¿½  [LATIN SMALL LIGATURE FI]
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'i';
+							break;
+						
+						case '\uFB02':  // Ã¯Â¬â€š  [LATIN SMALL LIGATURE FL]
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'l';
+							break;
+						
+						case '\u011C': 
+						// Ã„Å“  [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+						case '\u011E': 
+						// Ã„Å¾  [LATIN CAPITAL LETTER G WITH BREVE]
+						case '\u0120': 
+						// Ã„Â   [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+						case '\u0122': 
+						// Ã„Â¢  [LATIN CAPITAL LETTER G WITH CEDILLA]
+						case '\u0193': 
+						// Ã†â€œ  [LATIN CAPITAL LETTER G WITH HOOK]
+						case '\u01E4': 
+						// Ã‡Â¤  [LATIN CAPITAL LETTER G WITH STROKE]
+						case '\u01E5': 
+						// Ã‡Â¥  [LATIN SMALL LETTER G WITH STROKE]
+						case '\u01E6': 
+						// Ã‡Â¦  [LATIN CAPITAL LETTER G WITH CARON]
+						case '\u01E7': 
+						// Ã‡Â§  [LATIN SMALL LETTER G WITH CARON]
+						case '\u01F4': 
+						// Ã‡Â´  [LATIN CAPITAL LETTER G WITH ACUTE]
+						case '\u0262': 
+						// Ã‰Â¢  [LATIN LETTER SMALL CAPITAL G]
+						case '\u029B': 
+						// ÃŠâ€º  [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+						case '\u1E20': 
+						// Ã¡Â¸Â   [LATIN CAPITAL LETTER G WITH MACRON]
+						case '\u24BC': 
+						// Ã¢â€™Â¼  [CIRCLED LATIN CAPITAL LETTER G]
+						case '\uA77D': 
+						// Ãªï¿½Â½  [LATIN CAPITAL LETTER INSULAR G]
+						case '\uA77E': 
+						// Ãªï¿½Â¾  [LATIN CAPITAL LETTER TURNED INSULAR G]
+						case '\uFF27':  // Ã¯Â¼Â§  [FULLWIDTH LATIN CAPITAL LETTER G]
+							output[outputPos++] = 'G';
+							break;
+						
+						case '\u011D': 
+						// Ã„ï¿½  [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+						case '\u011F': 
+						// Ã„Å¸  [LATIN SMALL LETTER G WITH BREVE]
+						case '\u0121': 
+						// Ã„Â¡  [LATIN SMALL LETTER G WITH DOT ABOVE]
+						case '\u0123': 
+						// Ã„Â£  [LATIN SMALL LETTER G WITH CEDILLA]
+						case '\u01F5': 
+						// Ã‡Âµ  [LATIN SMALL LETTER G WITH ACUTE]
+						case '\u0260': 
+						// Ã‰Â   [LATIN SMALL LETTER G WITH HOOK]
+						case '\u0261': 
+						// Ã‰Â¡  [LATIN SMALL LETTER SCRIPT G]
+						case '\u1D77': 
+						// Ã¡ÂµÂ·  [LATIN SMALL LETTER TURNED G]
+						case '\u1D79': 
+						// Ã¡ÂµÂ¹  [LATIN SMALL LETTER INSULAR G]
+						case '\u1D83': 
+						// Ã¡Â¶Æ’  [LATIN SMALL LETTER G WITH PALATAL HOOK]
+						case '\u1E21': 
+						// Ã¡Â¸Â¡  [LATIN SMALL LETTER G WITH MACRON]
+						case '\u24D6': 
+						// Ã¢â€œâ€“  [CIRCLED LATIN SMALL LETTER G]
+						case '\uA77F': 
+						// Ãªï¿½Â¿  [LATIN SMALL LETTER TURNED INSULAR G]
+						case '\uFF47':  // Ã¯Â½â€¡  [FULLWIDTH LATIN SMALL LETTER G]
+							output[outputPos++] = 'g';
+							break;
+						
+						case '\u24A2':  // Ã¢â€™Â¢  [PARENTHESIZED LATIN SMALL LETTER G]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'g';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0124': 
+						// Ã„Â¤  [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+						case '\u0126': 
+						// Ã„Â¦  [LATIN CAPITAL LETTER H WITH STROKE]
+						case '\u021E': 
+						// ÃˆÅ¾  [LATIN CAPITAL LETTER H WITH CARON]
+						case '\u029C': 
+						// ÃŠÅ“  [LATIN LETTER SMALL CAPITAL H]
+						case '\u1E22': 
+						// Ã¡Â¸Â¢  [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+						case '\u1E24': 
+						// Ã¡Â¸Â¤  [LATIN CAPITAL LETTER H WITH DOT BELOW]
+						case '\u1E26': 
+						// Ã¡Â¸Â¦  [LATIN CAPITAL LETTER H WITH DIAERESIS]
+						case '\u1E28': 
+						// Ã¡Â¸Â¨  [LATIN CAPITAL LETTER H WITH CEDILLA]
+						case '\u1E2A': 
+						// Ã¡Â¸Âª  [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+						case '\u24BD': 
+						// Ã¢â€™Â½  [CIRCLED LATIN CAPITAL LETTER H]
+						case '\u2C67': 
+						// Ã¢Â±Â§  [LATIN CAPITAL LETTER H WITH DESCENDER]
+						case '\u2C75': 
+						// Ã¢Â±Âµ  [LATIN CAPITAL LETTER HALF H]
+						case '\uFF28':  // Ã¯Â¼Â¨  [FULLWIDTH LATIN CAPITAL LETTER H]
+							output[outputPos++] = 'H';
+							break;
+						
+						case '\u0125': 
+						// Ã„Â¥  [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+						case '\u0127': 
+						// Ã„Â§  [LATIN SMALL LETTER H WITH STROKE]
+						case '\u021F': 
+						// ÃˆÅ¸  [LATIN SMALL LETTER H WITH CARON]
+						case '\u0265': 
+						// Ã‰Â¥  [LATIN SMALL LETTER TURNED H]
+						case '\u0266': 
+						// Ã‰Â¦  [LATIN SMALL LETTER H WITH HOOK]
+						case '\u02AE': 
+						// ÃŠÂ®  [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+						case '\u02AF': 
+						// ÃŠÂ¯  [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+						case '\u1E23': 
+						// Ã¡Â¸Â£  [LATIN SMALL LETTER H WITH DOT ABOVE]
+						case '\u1E25': 
+						// Ã¡Â¸Â¥  [LATIN SMALL LETTER H WITH DOT BELOW]
+						case '\u1E27': 
+						// Ã¡Â¸Â§  [LATIN SMALL LETTER H WITH DIAERESIS]
+						case '\u1E29': 
+						// Ã¡Â¸Â©  [LATIN SMALL LETTER H WITH CEDILLA]
+						case '\u1E2B': 
+						// Ã¡Â¸Â«  [LATIN SMALL LETTER H WITH BREVE BELOW]
+						case '\u1E96': 
+						// Ã¡Âºâ€“  [LATIN SMALL LETTER H WITH LINE BELOW]
+						case '\u24D7': 
+						// Ã¢â€œâ€”  [CIRCLED LATIN SMALL LETTER H]
+						case '\u2C68': 
+						// Ã¢Â±Â¨  [LATIN SMALL LETTER H WITH DESCENDER]
+						case '\u2C76': 
+						// Ã¢Â±Â¶  [LATIN SMALL LETTER HALF H]
+						case '\uFF48':  // Ã¯Â½Ë†  [FULLWIDTH LATIN SMALL LETTER H]
+							output[outputPos++] = 'h';
+							break;
+						
+						case '\u01F6':  // Ã‡Â¶  http://en.wikipedia.org/wiki/Hwair  [LATIN CAPITAL LETTER HWAIR]
+							output[outputPos++] = 'H';
+							output[outputPos++] = 'V';
+							break;
+						
+						case '\u24A3':  // Ã¢â€™Â£  [PARENTHESIZED LATIN SMALL LETTER H]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'h';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0195':  // Ã†â€¢  [LATIN SMALL LETTER HV]
+							output[outputPos++] = 'h';
+							output[outputPos++] = 'v';
+							break;
+						
+						case '\u00CC': 
+						// ÃƒÅ’  [LATIN CAPITAL LETTER I WITH GRAVE]
+						case '\u00CD': 
+						// Ãƒï¿½  [LATIN CAPITAL LETTER I WITH ACUTE]
+						case '\u00CE': 
+						// ÃƒÅ½  [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+						case '\u00CF': 
+						// Ãƒï¿½  [LATIN CAPITAL LETTER I WITH DIAERESIS]
+						case '\u0128': 
+						// Ã„Â¨  [LATIN CAPITAL LETTER I WITH TILDE]
+						case '\u012A': 
+						// Ã„Âª  [LATIN CAPITAL LETTER I WITH MACRON]
+						case '\u012C': 
+						// Ã„Â¬  [LATIN CAPITAL LETTER I WITH BREVE]
+						case '\u012E': 
+						// Ã„Â®  [LATIN CAPITAL LETTER I WITH OGONEK]
+						case '\u0130': 
+						// Ã„Â°  [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+						case '\u0196': 
+						// Ã†â€“  [LATIN CAPITAL LETTER IOTA]
+						case '\u0197': 
+						// Ã†â€”  [LATIN CAPITAL LETTER I WITH STROKE]
+						case '\u01CF': 
+						// Ã‡ï¿½  [LATIN CAPITAL LETTER I WITH CARON]
+						case '\u0208': 
+						// ÃˆË†  [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+						case '\u020A': 
+						// ÃˆÅ   [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+						case '\u026A': 
+						// Ã‰Âª  [LATIN LETTER SMALL CAPITAL I]
+						case '\u1D7B': 
+						// Ã¡ÂµÂ»  [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+						case '\u1E2C': 
+						// Ã¡Â¸Â¬  [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+						case '\u1E2E': 
+						// Ã¡Â¸Â®  [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+						case '\u1EC8': 
+						// Ã¡Â»Ë†  [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+						case '\u1ECA': 
+						// Ã¡Â»Å   [LATIN CAPITAL LETTER I WITH DOT BELOW]
+						case '\u24BE': 
+						// Ã¢â€™Â¾  [CIRCLED LATIN CAPITAL LETTER I]
+						case '\uA7FE': 
+						// ÃªÅ¸Â¾  [LATIN EPIGRAPHIC LETTER I LONGA]
+						case '\uFF29':  // Ã¯Â¼Â©  [FULLWIDTH LATIN CAPITAL LETTER I]
+							output[outputPos++] = 'I';
+							break;
+						
+						case '\u00EC': 
+						// ÃƒÂ¬  [LATIN SMALL LETTER I WITH GRAVE]
+						case '\u00ED': 
+						// ÃƒÂ  [LATIN SMALL LETTER I WITH ACUTE]
+						case '\u00EE': 
+						// ÃƒÂ®  [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+						case '\u00EF': 
+						// ÃƒÂ¯  [LATIN SMALL LETTER I WITH DIAERESIS]
+						case '\u0129': 
+						// Ã„Â©  [LATIN SMALL LETTER I WITH TILDE]
+						case '\u012B': 
+						// Ã„Â«  [LATIN SMALL LETTER I WITH MACRON]
+						case '\u012D': 
+						// Ã„Â  [LATIN SMALL LETTER I WITH BREVE]
+						case '\u012F': 
+						// Ã„Â¯  [LATIN SMALL LETTER I WITH OGONEK]
+						case '\u0131': 
+						// Ã„Â±  [LATIN SMALL LETTER DOTLESS I]
+						case '\u01D0': 
+						// Ã‡ï¿½  [LATIN SMALL LETTER I WITH CARON]
+						case '\u0209': 
+						// Ãˆâ€°  [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+						case '\u020B': 
+						// Ãˆâ€¹  [LATIN SMALL LETTER I WITH INVERTED BREVE]
+						case '\u0268': 
+						// Ã‰Â¨  [LATIN SMALL LETTER I WITH STROKE]
+						case '\u1D09': 
+						// Ã¡Â´â€°  [LATIN SMALL LETTER TURNED I]
+						case '\u1D62': 
+						// Ã¡ÂµÂ¢  [LATIN SUBSCRIPT SMALL LETTER I]
+						case '\u1D7C': 
+						// Ã¡ÂµÂ¼  [LATIN SMALL LETTER IOTA WITH STROKE]
+						case '\u1D96': 
+						// Ã¡Â¶â€“  [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+						case '\u1E2D': 
+						// Ã¡Â¸Â  [LATIN SMALL LETTER I WITH TILDE BELOW]
+						case '\u1E2F': 
+						// Ã¡Â¸Â¯  [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+						case '\u1EC9': 
+						// Ã¡Â»â€°  [LATIN SMALL LETTER I WITH HOOK ABOVE]
+						case '\u1ECB': 
+						// Ã¡Â»â€¹  [LATIN SMALL LETTER I WITH DOT BELOW]
+						case '\u2071': 
+						// Ã¢ï¿½Â±  [SUPERSCRIPT LATIN SMALL LETTER I]
+						case '\u24D8': 
+						// Ã¢â€œËœ  [CIRCLED LATIN SMALL LETTER I]
+						case '\uFF49':  // Ã¯Â½â€°  [FULLWIDTH LATIN SMALL LETTER I]
+							output[outputPos++] = 'i';
+							break;
+						
+						case '\u0132':  // Ã„Â²  [LATIN CAPITAL LIGATURE IJ]
+							output[outputPos++] = 'I';
+							output[outputPos++] = 'J';
+							break;
+						
+						case '\u24A4':  // Ã¢â€™Â¤  [PARENTHESIZED LATIN SMALL LETTER I]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'i';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0133':  // Ã„Â³  [LATIN SMALL LIGATURE IJ]
+							output[outputPos++] = 'i';
+							output[outputPos++] = 'j';
+							break;
+						
+						case '\u0134': 
+						// Ã„Â´  [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+						case '\u0248': 
+						// Ã‰Ë†  [LATIN CAPITAL LETTER J WITH STROKE]
+						case '\u1D0A': 
+						// Ã¡Â´Å   [LATIN LETTER SMALL CAPITAL J]
+						case '\u24BF': 
+						// Ã¢â€™Â¿  [CIRCLED LATIN CAPITAL LETTER J]
+						case '\uFF2A':  // Ã¯Â¼Âª  [FULLWIDTH LATIN CAPITAL LETTER J]
+							output[outputPos++] = 'J';
+							break;
+						
+						case '\u0135': 
+						// Ã„Âµ  [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+						case '\u01F0': 
+						// Ã‡Â°  [LATIN SMALL LETTER J WITH CARON]
+						case '\u0237': 
+						// ÃˆÂ·  [LATIN SMALL LETTER DOTLESS J]
+						case '\u0249': 
+						// Ã‰â€°  [LATIN SMALL LETTER J WITH STROKE]
+						case '\u025F': 
+						// Ã‰Å¸  [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+						case '\u0284': 
+						// ÃŠâ€ž  [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+						case '\u029D': 
+						// ÃŠï¿½  [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+						case '\u24D9': 
+						// Ã¢â€œâ„¢  [CIRCLED LATIN SMALL LETTER J]
+						case '\u2C7C': 
+						// Ã¢Â±Â¼  [LATIN SUBSCRIPT SMALL LETTER J]
+						case '\uFF4A':  // Ã¯Â½Å   [FULLWIDTH LATIN SMALL LETTER J]
+							output[outputPos++] = 'j';
+							break;
+						
+						case '\u24A5':  // Ã¢â€™Â¥  [PARENTHESIZED LATIN SMALL LETTER J]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'j';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0136': 
+						// Ã„Â¶  [LATIN CAPITAL LETTER K WITH CEDILLA]
+						case '\u0198': 
+						// Ã†Ëœ  [LATIN CAPITAL LETTER K WITH HOOK]
+						case '\u01E8': 
+						// Ã‡Â¨  [LATIN CAPITAL LETTER K WITH CARON]
+						case '\u1D0B': 
+						// Ã¡Â´â€¹  [LATIN LETTER SMALL CAPITAL K]
+						case '\u1E30': 
+						// Ã¡Â¸Â°  [LATIN CAPITAL LETTER K WITH ACUTE]
+						case '\u1E32': 
+						// Ã¡Â¸Â²  [LATIN CAPITAL LETTER K WITH DOT BELOW]
+						case '\u1E34': 
+						// Ã¡Â¸Â´  [LATIN CAPITAL LETTER K WITH LINE BELOW]
+						case '\u24C0': 
+						// Ã¢â€œâ‚¬  [CIRCLED LATIN CAPITAL LETTER K]
+						case '\u2C69': 
+						// Ã¢Â±Â©  [LATIN CAPITAL LETTER K WITH DESCENDER]
+						case '\uA740': 
+						// Ãªï¿½â‚¬  [LATIN CAPITAL LETTER K WITH STROKE]
+						case '\uA742': 
+						// Ãªï¿½â€š  [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+						case '\uA744': 
+						// Ãªï¿½â€ž  [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+						case '\uFF2B':  // Ã¯Â¼Â«  [FULLWIDTH LATIN CAPITAL LETTER K]
+							output[outputPos++] = 'K';
+							break;
+						
+						case '\u0137': 
+						// Ã„Â·  [LATIN SMALL LETTER K WITH CEDILLA]
+						case '\u0199': 
+						// Ã†â„¢  [LATIN SMALL LETTER K WITH HOOK]
+						case '\u01E9': 
+						// Ã‡Â©  [LATIN SMALL LETTER K WITH CARON]
+						case '\u029E': 
+						// ÃŠÅ¾  [LATIN SMALL LETTER TURNED K]
+						case '\u1D84': 
+						// Ã¡Â¶â€ž  [LATIN SMALL LETTER K WITH PALATAL HOOK]
+						case '\u1E31': 
+						// Ã¡Â¸Â±  [LATIN SMALL LETTER K WITH ACUTE]
+						case '\u1E33': 
+						// Ã¡Â¸Â³  [LATIN SMALL LETTER K WITH DOT BELOW]
+						case '\u1E35': 
+						// Ã¡Â¸Âµ  [LATIN SMALL LETTER K WITH LINE BELOW]
+						case '\u24DA': 
+						// Ã¢â€œÅ¡  [CIRCLED LATIN SMALL LETTER K]
+						case '\u2C6A': 
+						// Ã¢Â±Âª  [LATIN SMALL LETTER K WITH DESCENDER]
+						case '\uA741': 
+						// Ãªï¿½ï¿½  [LATIN SMALL LETTER K WITH STROKE]
+						case '\uA743': 
+						// Ãªï¿½Æ’  [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+						case '\uA745': 
+						// Ãªï¿½â€¦  [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+						case '\uFF4B':  // Ã¯Â½â€¹  [FULLWIDTH LATIN SMALL LETTER K]
+							output[outputPos++] = 'k';
+							break;
+						
+						case '\u24A6':  // Ã¢â€™Â¦  [PARENTHESIZED LATIN SMALL LETTER K]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'k';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0139': 
+						// Ã„Â¹  [LATIN CAPITAL LETTER L WITH ACUTE]
+						case '\u013B': 
+						// Ã„Â»  [LATIN CAPITAL LETTER L WITH CEDILLA]
+						case '\u013D': 
+						// Ã„Â½  [LATIN CAPITAL LETTER L WITH CARON]
+						case '\u013F': 
+						// Ã„Â¿  [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+						case '\u0141': 
+						// Ã…ï¿½  [LATIN CAPITAL LETTER L WITH STROKE]
+						case '\u023D': 
+						// ÃˆÂ½  [LATIN CAPITAL LETTER L WITH BAR]
+						case '\u029F': 
+						// ÃŠÅ¸  [LATIN LETTER SMALL CAPITAL L]
+						case '\u1D0C': 
+						// Ã¡Â´Å’  [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+						case '\u1E36': 
+						// Ã¡Â¸Â¶  [LATIN CAPITAL LETTER L WITH DOT BELOW]
+						case '\u1E38': 
+						// Ã¡Â¸Â¸  [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+						case '\u1E3A': 
+						// Ã¡Â¸Âº  [LATIN CAPITAL LETTER L WITH LINE BELOW]
+						case '\u1E3C': 
+						// Ã¡Â¸Â¼  [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+						case '\u24C1': 
+						// Ã¢â€œï¿½  [CIRCLED LATIN CAPITAL LETTER L]
+						case '\u2C60': 
+						// Ã¢Â±Â   [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+						case '\u2C62': 
+						// Ã¢Â±Â¢  [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+						case '\uA746': 
+						// Ãªï¿½â€   [LATIN CAPITAL LETTER BROKEN L]
+						case '\uA748': 
+						// Ãªï¿½Ë†  [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+						case '\uA780': 
+						// ÃªÅ¾â‚¬  [LATIN CAPITAL LETTER TURNED L]
+						case '\uFF2C':  // Ã¯Â¼Â¬  [FULLWIDTH LATIN CAPITAL LETTER L]
+							output[outputPos++] = 'L';
+							break;
+						
+						case '\u013A': 
+						// Ã„Âº  [LATIN SMALL LETTER L WITH ACUTE]
+						case '\u013C': 
+						// Ã„Â¼  [LATIN SMALL LETTER L WITH CEDILLA]
+						case '\u013E': 
+						// Ã„Â¾  [LATIN SMALL LETTER L WITH CARON]
+						case '\u0140': 
+						// Ã…â‚¬  [LATIN SMALL LETTER L WITH MIDDLE DOT]
+						case '\u0142': 
+						// Ã…â€š  [LATIN SMALL LETTER L WITH STROKE]
+						case '\u019A': 
+						// Ã†Å¡  [LATIN SMALL LETTER L WITH BAR]
+						case '\u0234': 
+						// ÃˆÂ´  [LATIN SMALL LETTER L WITH CURL]
+						case '\u026B': 
+						// Ã‰Â«  [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+						case '\u026C': 
+						// Ã‰Â¬  [LATIN SMALL LETTER L WITH BELT]
+						case '\u026D': 
+						// Ã‰Â  [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+						case '\u1D85': 
+						// Ã¡Â¶â€¦  [LATIN SMALL LETTER L WITH PALATAL HOOK]
+						case '\u1E37': 
+						// Ã¡Â¸Â·  [LATIN SMALL LETTER L WITH DOT BELOW]
+						case '\u1E39': 
+						// Ã¡Â¸Â¹  [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+						case '\u1E3B': 
+						// Ã¡Â¸Â»  [LATIN SMALL LETTER L WITH LINE BELOW]
+						case '\u1E3D': 
+						// Ã¡Â¸Â½  [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+						case '\u24DB': 
+						// Ã¢â€œâ€º  [CIRCLED LATIN SMALL LETTER L]
+						case '\u2C61': 
+						// Ã¢Â±Â¡  [LATIN SMALL LETTER L WITH DOUBLE BAR]
+						case '\uA747': 
+						// Ãªï¿½â€¡  [LATIN SMALL LETTER BROKEN L]
+						case '\uA749': 
+						// Ãªï¿½â€°  [LATIN SMALL LETTER L WITH HIGH STROKE]
+						case '\uA781': 
+						// ÃªÅ¾ï¿½  [LATIN SMALL LETTER TURNED L]
+						case '\uFF4C':  // Ã¯Â½Å’  [FULLWIDTH LATIN SMALL LETTER L]
+							output[outputPos++] = 'l';
+							break;
+						
+						case '\u01C7':  // Ã‡â€¡  [LATIN CAPITAL LETTER LJ]
+							output[outputPos++] = 'L';
+							output[outputPos++] = 'J';
+							break;
+						
+						case '\u1EFA':  // Ã¡Â»Âº  [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+							output[outputPos++] = 'L';
+							output[outputPos++] = 'L';
+							break;
+						
+						case '\u01C8':  // Ã‡Ë†  [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+							output[outputPos++] = 'L';
+							output[outputPos++] = 'j';
+							break;
+						
+						case '\u24A7':  // Ã¢â€™Â§  [PARENTHESIZED LATIN SMALL LETTER L]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'l';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u01C9':  // Ã‡â€°  [LATIN SMALL LETTER LJ]
+							output[outputPos++] = 'l';
+							output[outputPos++] = 'j';
+							break;
+						
+						case '\u1EFB':  // Ã¡Â»Â»  [LATIN SMALL LETTER MIDDLE-WELSH LL]
+							output[outputPos++] = 'l';
+							output[outputPos++] = 'l';
+							break;
+						
+						case '\u02AA':  // ÃŠÂª  [LATIN SMALL LETTER LS DIGRAPH]
+							output[outputPos++] = 'l';
+							output[outputPos++] = 's';
+							break;
+						
+						case '\u02AB':  // ÃŠÂ«  [LATIN SMALL LETTER LZ DIGRAPH]
+							output[outputPos++] = 'l';
+							output[outputPos++] = 'z';
+							break;
+						
+						case '\u019C': 
+						// Ã†Å“  [LATIN CAPITAL LETTER TURNED M]
+						case '\u1D0D': 
+						// Ã¡Â´ï¿½  [LATIN LETTER SMALL CAPITAL M]
+						case '\u1E3E': 
+						// Ã¡Â¸Â¾  [LATIN CAPITAL LETTER M WITH ACUTE]
+						case '\u1E40': 
+						// Ã¡Â¹â‚¬  [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+						case '\u1E42': 
+						// Ã¡Â¹â€š  [LATIN CAPITAL LETTER M WITH DOT BELOW]
+						case '\u24C2': 
+						// Ã¢â€œâ€š  [CIRCLED LATIN CAPITAL LETTER M]
+						case '\u2C6E': 
+						// Ã¢Â±Â®  [LATIN CAPITAL LETTER M WITH HOOK]
+						case '\uA7FD': 
+						// ÃªÅ¸Â½  [LATIN EPIGRAPHIC LETTER INVERTED M]
+						case '\uA7FF': 
+						// ÃªÅ¸Â¿  [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+						case '\uFF2D':  // Ã¯Â¼Â  [FULLWIDTH LATIN CAPITAL LETTER M]
+							output[outputPos++] = 'M';
+							break;
+						
+						case '\u026F': 
+						// Ã‰Â¯  [LATIN SMALL LETTER TURNED M]
+						case '\u0270': 
+						// Ã‰Â°  [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+						case '\u0271': 
+						// Ã‰Â±  [LATIN SMALL LETTER M WITH HOOK]
+						case '\u1D6F': 
+						// Ã¡ÂµÂ¯  [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+						case '\u1D86': 
+						// Ã¡Â¶â€   [LATIN SMALL LETTER M WITH PALATAL HOOK]
+						case '\u1E3F': 
+						// Ã¡Â¸Â¿  [LATIN SMALL LETTER M WITH ACUTE]
+						case '\u1E41': 
+						// Ã¡Â¹ï¿½  [LATIN SMALL LETTER M WITH DOT ABOVE]
+						case '\u1E43': 
+						// Ã¡Â¹Æ’  [LATIN SMALL LETTER M WITH DOT BELOW]
+						case '\u24DC': 
+						// Ã¢â€œÅ“  [CIRCLED LATIN SMALL LETTER M]
+						case '\uFF4D':  // Ã¯Â½ï¿½  [FULLWIDTH LATIN SMALL LETTER M]
+							output[outputPos++] = 'm';
+							break;
+						
+						case '\u24A8':  // Ã¢â€™Â¨  [PARENTHESIZED LATIN SMALL LETTER M]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'm';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00D1': 
+						// Ãƒâ€˜  [LATIN CAPITAL LETTER N WITH TILDE]
+						case '\u0143': 
+						// Ã…Æ’  [LATIN CAPITAL LETTER N WITH ACUTE]
+						case '\u0145': 
+						// Ã…â€¦  [LATIN CAPITAL LETTER N WITH CEDILLA]
+						case '\u0147': 
+						// Ã…â€¡  [LATIN CAPITAL LETTER N WITH CARON]
+						case '\u014A': 
+						// Ã…Å   http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN CAPITAL LETTER ENG]
+						case '\u019D': 
+						// Ã†ï¿½  [LATIN CAPITAL LETTER N WITH LEFT HOOK]
+						case '\u01F8': 
+						// Ã‡Â¸  [LATIN CAPITAL LETTER N WITH GRAVE]
+						case '\u0220': 
+						// ÃˆÂ   [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
+						case '\u0274': 
+						// Ã‰Â´  [LATIN LETTER SMALL CAPITAL N]
+						case '\u1D0E': 
+						// Ã¡Â´Å½  [LATIN LETTER SMALL CAPITAL REVERSED N]
+						case '\u1E44': 
+						// Ã¡Â¹â€ž  [LATIN CAPITAL LETTER N WITH DOT ABOVE]
+						case '\u1E46': 
+						// Ã¡Â¹â€   [LATIN CAPITAL LETTER N WITH DOT BELOW]
+						case '\u1E48': 
+						// Ã¡Â¹Ë†  [LATIN CAPITAL LETTER N WITH LINE BELOW]
+						case '\u1E4A': 
+						// Ã¡Â¹Å   [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
+						case '\u24C3': 
+						// Ã¢â€œÆ’  [CIRCLED LATIN CAPITAL LETTER N]
+						case '\uFF2E':  // Ã¯Â¼Â®  [FULLWIDTH LATIN CAPITAL LETTER N]
+							output[outputPos++] = 'N';
+							break;
+						
+						case '\u00F1': 
+						// ÃƒÂ±  [LATIN SMALL LETTER N WITH TILDE]
+						case '\u0144': 
+						// Ã…â€ž  [LATIN SMALL LETTER N WITH ACUTE]
+						case '\u0146': 
+						// Ã…â€   [LATIN SMALL LETTER N WITH CEDILLA]
+						case '\u0148': 
+						// Ã…Ë†  [LATIN SMALL LETTER N WITH CARON]
+						case '\u0149': 
+						// Ã…â€°  [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
+						case '\u014B': 
+						// Ã…â€¹  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN SMALL LETTER ENG]
+						case '\u019E': 
+						// Ã†Å¾  [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
+						case '\u01F9': 
+						// Ã‡Â¹  [LATIN SMALL LETTER N WITH GRAVE]
+						case '\u0235': 
+						// ÃˆÂµ  [LATIN SMALL LETTER N WITH CURL]
+						case '\u0272': 
+						// Ã‰Â²  [LATIN SMALL LETTER N WITH LEFT HOOK]
+						case '\u0273': 
+						// Ã‰Â³  [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
+						case '\u1D70': 
+						// Ã¡ÂµÂ°  [LATIN SMALL LETTER N WITH MIDDLE TILDE]
+						case '\u1D87': 
+						// Ã¡Â¶â€¡  [LATIN SMALL LETTER N WITH PALATAL HOOK]
+						case '\u1E45': 
+						// Ã¡Â¹â€¦  [LATIN SMALL LETTER N WITH DOT ABOVE]
+						case '\u1E47': 
+						// Ã¡Â¹â€¡  [LATIN SMALL LETTER N WITH DOT BELOW]
+						case '\u1E49': 
+						// Ã¡Â¹â€°  [LATIN SMALL LETTER N WITH LINE BELOW]
+						case '\u1E4B': 
+						// Ã¡Â¹â€¹  [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
+						case '\u207F': 
+						// Ã¢ï¿½Â¿  [SUPERSCRIPT LATIN SMALL LETTER N]
+						case '\u24DD': 
+						// Ã¢â€œï¿½  [CIRCLED LATIN SMALL LETTER N]
+						case '\uFF4E':  // Ã¯Â½Å½  [FULLWIDTH LATIN SMALL LETTER N]
+							output[outputPos++] = 'n';
+							break;
+						
+						case '\u01CA':  // Ã‡Å   [LATIN CAPITAL LETTER NJ]
+							output[outputPos++] = 'N';
+							output[outputPos++] = 'J';
+							break;
+						
+						case '\u01CB':  // Ã‡â€¹  [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
+							output[outputPos++] = 'N';
+							output[outputPos++] = 'j';
+							break;
+						
+						case '\u24A9':  // Ã¢â€™Â©  [PARENTHESIZED LATIN SMALL LETTER N]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'n';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u01CC':  // Ã‡Å’  [LATIN SMALL LETTER NJ]
+							output[outputPos++] = 'n';
+							output[outputPos++] = 'j';
+							break;
+						
+						case '\u00D2': 
+						// Ãƒâ€™  [LATIN CAPITAL LETTER O WITH GRAVE]
+						case '\u00D3': 
+						// Ãƒâ€œ  [LATIN CAPITAL LETTER O WITH ACUTE]
+						case '\u00D4': 
+						// Ãƒï¿½?  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
+						case '\u00D5': 
+						// Ãƒâ€¢  [LATIN CAPITAL LETTER O WITH TILDE]
+						case '\u00D6': 
+						// Ãƒâ€“  [LATIN CAPITAL LETTER O WITH DIAERESIS]
+						case '\u00D8': 
+						// ÃƒËœ  [LATIN CAPITAL LETTER O WITH STROKE]
+						case '\u014C': 
+						// Ã…Å’  [LATIN CAPITAL LETTER O WITH MACRON]
+						case '\u014E': 
+						// Ã…Å½  [LATIN CAPITAL LETTER O WITH BREVE]
+						case '\u0150': 
+						// Ã…ï¿½  [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
+						case '\u0186': 
+						// Ã†â€   [LATIN CAPITAL LETTER OPEN O]
+						case '\u019F': 
+						// Ã†Å¸  [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
+						case '\u01A0': 
+						// Ã†Â   [LATIN CAPITAL LETTER O WITH HORN]
+						case '\u01D1': 
+						// Ã‡â€˜  [LATIN CAPITAL LETTER O WITH CARON]
+						case '\u01EA': 
+						// Ã‡Âª  [LATIN CAPITAL LETTER O WITH OGONEK]
+						case '\u01EC': 
+						// Ã‡Â¬  [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
+						case '\u01FE': 
+						// Ã‡Â¾  [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
+						case '\u020C': 
+						// ÃˆÅ’  [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
+						case '\u020E': 
+						// ÃˆÅ½  [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
+						case '\u022A': 
+						// ÃˆÂª  [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
+						case '\u022C': 
+						// ÃˆÂ¬  [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
+						case '\u022E': 
+						// ÃˆÂ®  [LATIN CAPITAL LETTER O WITH DOT ABOVE]
+						case '\u0230': 
+						// ÃˆÂ°  [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
+						case '\u1D0F': 
+						// Ã¡Â´ï¿½  [LATIN LETTER SMALL CAPITAL O]
+						case '\u1D10': 
+						// Ã¡Â´ï¿½  [LATIN LETTER SMALL CAPITAL OPEN O]
+						case '\u1E4C': 
+						// Ã¡Â¹Å’  [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
+						case '\u1E4E': 
+						// Ã¡Â¹Å½  [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
+						case '\u1E50': 
+						// Ã¡Â¹ï¿½  [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
+						case '\u1E52': 
+						// Ã¡Â¹â€™  [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
+						case '\u1ECC': 
+						// Ã¡Â»Å’  [LATIN CAPITAL LETTER O WITH DOT BELOW]
+						case '\u1ECE': 
+						// Ã¡Â»Å½  [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
+						case '\u1ED0': 
+						// Ã¡Â»ï¿½  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
+						case '\u1ED2': 
+						// Ã¡Â»â€™  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
+						case '\u1ED4': 
+						// Ã¡Â»ï¿½?  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+						case '\u1ED6': 
+						// Ã¡Â»â€“  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
+						case '\u1ED8': 
+						// Ã¡Â»Ëœ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+						case '\u1EDA': 
+						// Ã¡Â»Å¡  [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
+						case '\u1EDC': 
+						// Ã¡Â»Å“  [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
+						case '\u1EDE': 
+						// Ã¡Â»Å¾  [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
+						case '\u1EE0': 
+						// Ã¡Â»Â   [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
+						case '\u1EE2': 
+						// Ã¡Â»Â¢  [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
+						case '\u24C4': 
+						// Ã¢â€œâ€ž  [CIRCLED LATIN CAPITAL LETTER O]
+						case '\uA74A': 
+						// Ãªï¿½Å   [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
+						case '\uA74C': 
+						// Ãªï¿½Å’  [LATIN CAPITAL LETTER O WITH LOOP]
+						case '\uFF2F':  // Ã¯Â¼Â¯  [FULLWIDTH LATIN CAPITAL LETTER O]
+							output[outputPos++] = 'O';
+							break;
+						
+						case '\u00F2': 
+						// ÃƒÂ²  [LATIN SMALL LETTER O WITH GRAVE]
+						case '\u00F3': 
+						// ÃƒÂ³  [LATIN SMALL LETTER O WITH ACUTE]
+						case '\u00F4': 
+						// ÃƒÂ´  [LATIN SMALL LETTER O WITH CIRCUMFLEX]
+						case '\u00F5': 
+						// ÃƒÂµ  [LATIN SMALL LETTER O WITH TILDE]
+						case '\u00F6': 
+						// ÃƒÂ¶  [LATIN SMALL LETTER O WITH DIAERESIS]
+						case '\u00F8': 
+						// ÃƒÂ¸  [LATIN SMALL LETTER O WITH STROKE]
+						case '\u014D': 
+						// Ã…ï¿½  [LATIN SMALL LETTER O WITH MACRON]
+						case '\u014F': 
+						// Ã…ï¿½  [LATIN SMALL LETTER O WITH BREVE]
+						case '\u0151': 
+						// Ã…â€˜  [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
+						case '\u01A1': 
+						// Ã†Â¡  [LATIN SMALL LETTER O WITH HORN]
+						case '\u01D2': 
+						// Ã‡â€™  [LATIN SMALL LETTER O WITH CARON]
+						case '\u01EB': 
+						// Ã‡Â«  [LATIN SMALL LETTER O WITH OGONEK]
+						case '\u01ED': 
+						// Ã‡Â  [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
+						case '\u01FF': 
+						// Ã‡Â¿  [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
+						case '\u020D': 
+						// Ãˆï¿½  [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
+						case '\u020F': 
+						// Ãˆï¿½  [LATIN SMALL LETTER O WITH INVERTED BREVE]
+						case '\u022B': 
+						// ÃˆÂ«  [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
+						case '\u022D': 
+						// ÃˆÂ  [LATIN SMALL LETTER O WITH TILDE AND MACRON]
+						case '\u022F': 
+						// ÃˆÂ¯  [LATIN SMALL LETTER O WITH DOT ABOVE]
+						case '\u0231': 
+						// ÃˆÂ±  [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
+						case '\u0254': 
+						// Ã‰ï¿½?  [LATIN SMALL LETTER OPEN O]
+						case '\u0275': 
+						// Ã‰Âµ  [LATIN SMALL LETTER BARRED O]
+						case '\u1D16': 
+						// Ã¡Â´â€“  [LATIN SMALL LETTER TOP HALF O]
+						case '\u1D17': 
+						// Ã¡Â´â€”  [LATIN SMALL LETTER BOTTOM HALF O]
+						case '\u1D97': 
+						// Ã¡Â¶â€”  [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
+						case '\u1E4D': 
+						// Ã¡Â¹ï¿½  [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
+						case '\u1E4F': 
+						// Ã¡Â¹ï¿½  [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
+						case '\u1E51': 
+						// Ã¡Â¹â€˜  [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
+						case '\u1E53': 
+						// Ã¡Â¹â€œ  [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
+						case '\u1ECD': 
+						// Ã¡Â»ï¿½  [LATIN SMALL LETTER O WITH DOT BELOW]
+						case '\u1ECF': 
+						// Ã¡Â»ï¿½  [LATIN SMALL LETTER O WITH HOOK ABOVE]
+						case '\u1ED1': 
+						// Ã¡Â»â€˜  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
+						case '\u1ED3': 
+						// Ã¡Â»â€œ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
+						case '\u1ED5': 
+						// Ã¡Â»â€¢  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+						case '\u1ED7': 
+						// Ã¡Â»â€”  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
+						case '\u1ED9': 
+						// Ã¡Â»â„¢  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+						case '\u1EDB': 
+						// Ã¡Â»â€º  [LATIN SMALL LETTER O WITH HORN AND ACUTE]
+						case '\u1EDD': 
+						// Ã¡Â»ï¿½  [LATIN SMALL LETTER O WITH HORN AND GRAVE]
+						case '\u1EDF': 
+						// Ã¡Â»Å¸  [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
+						case '\u1EE1': 
+						// Ã¡Â»Â¡  [LATIN SMALL LETTER O WITH HORN AND TILDE]
+						case '\u1EE3': 
+						// Ã¡Â»Â£  [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
+						case '\u2092': 
+						// Ã¢â€šâ€™  [LATIN SUBSCRIPT SMALL LETTER O]
+						case '\u24DE': 
+						// Ã¢â€œÅ¾  [CIRCLED LATIN SMALL LETTER O]
+						case '\u2C7A': 
+						// Ã¢Â±Âº  [LATIN SMALL LETTER O WITH LOW RING INSIDE]
+						case '\uA74B': 
+						// Ãªï¿½â€¹  [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
+						case '\uA74D': 
+						// Ãªï¿½ï¿½  [LATIN SMALL LETTER O WITH LOOP]
+						case '\uFF4F':  // Ã¯Â½ï¿½  [FULLWIDTH LATIN SMALL LETTER O]
+							output[outputPos++] = 'o';
+							break;
+						
+						case '\u0152': 
+						// Ã…â€™  [LATIN CAPITAL LIGATURE OE]
+						case '\u0276':  // Ã‰Â¶  [LATIN LETTER SMALL CAPITAL OE]
+							output[outputPos++] = 'O';
+							output[outputPos++] = 'E';
+							break;
+						
+						case '\uA74E':  // Ãªï¿½Å½  [LATIN CAPITAL LETTER OO]
+							output[outputPos++] = 'O';
+							output[outputPos++] = 'O';
+							break;
+						
+						case '\u0222': 
+						// ÃˆÂ¢  http://en.wikipedia.org/wiki/OU  [LATIN CAPITAL LETTER OU]
+						case '\u1D15':  // Ã¡Â´â€¢  [LATIN LETTER SMALL CAPITAL OU]
+							output[outputPos++] = 'O';
+							output[outputPos++] = 'U';
+							break;
+						
+						case '\u24AA':  // Ã¢â€™Âª  [PARENTHESIZED LATIN SMALL LETTER O]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'o';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0153': 
+						// Ã…â€œ  [LATIN SMALL LIGATURE OE]
+						case '\u1D14':  // Ã¡Â´ï¿½?  [LATIN SMALL LETTER TURNED OE]
+							output[outputPos++] = 'o';
+							output[outputPos++] = 'e';
+							break;
+						
+						case '\uA74F':  // Ãªï¿½ï¿½  [LATIN SMALL LETTER OO]
+							output[outputPos++] = 'o';
+							output[outputPos++] = 'o';
+							break;
+						
+						case '\u0223':  // ÃˆÂ£  http://en.wikipedia.org/wiki/OU  [LATIN SMALL LETTER OU]
+							output[outputPos++] = 'o';
+							output[outputPos++] = 'u';
+							break;
+						
+						case '\u01A4': 
+						// Ã†Â¤  [LATIN CAPITAL LETTER P WITH HOOK]
+						case '\u1D18': 
+						// Ã¡Â´Ëœ  [LATIN LETTER SMALL CAPITAL P]
+						case '\u1E54': 
+						// Ã¡Â¹ï¿½?  [LATIN CAPITAL LETTER P WITH ACUTE]
+						case '\u1E56': 
+						// Ã¡Â¹â€“  [LATIN CAPITAL LETTER P WITH DOT ABOVE]
+						case '\u24C5': 
+						// Ã¢â€œâ€¦  [CIRCLED LATIN CAPITAL LETTER P]
+						case '\u2C63': 
+						// Ã¢Â±Â£  [LATIN CAPITAL LETTER P WITH STROKE]
+						case '\uA750': 
+						// Ãªï¿½ï¿½  [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
+						case '\uA752': 
+						// Ãªï¿½â€™  [LATIN CAPITAL LETTER P WITH FLOURISH]
+						case '\uA754': 
+						// Ãªï¿½ï¿½?  [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
+						case '\uFF30':  // Ã¯Â¼Â°  [FULLWIDTH LATIN CAPITAL LETTER P]
+							output[outputPos++] = 'P';
+							break;
+						
+						case '\u01A5': 
+						// Ã†Â¥  [LATIN SMALL LETTER P WITH HOOK]
+						case '\u1D71': 
+						// Ã¡ÂµÂ±  [LATIN SMALL LETTER P WITH MIDDLE TILDE]
+						case '\u1D7D': 
+						// Ã¡ÂµÂ½  [LATIN SMALL LETTER P WITH STROKE]
+						case '\u1D88': 
+						// Ã¡Â¶Ë†  [LATIN SMALL LETTER P WITH PALATAL HOOK]
+						case '\u1E55': 
+						// Ã¡Â¹â€¢  [LATIN SMALL LETTER P WITH ACUTE]
+						case '\u1E57': 
+						// Ã¡Â¹â€”  [LATIN SMALL LETTER P WITH DOT ABOVE]
+						case '\u24DF': 
+						// Ã¢â€œÅ¸  [CIRCLED LATIN SMALL LETTER P]
+						case '\uA751': 
+						// Ãªï¿½â€˜  [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
+						case '\uA753': 
+						// Ãªï¿½â€œ  [LATIN SMALL LETTER P WITH FLOURISH]
+						case '\uA755': 
+						// Ãªï¿½â€¢  [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
+						case '\uA7FC': 
+						// ÃªÅ¸Â¼  [LATIN EPIGRAPHIC LETTER REVERSED P]
+						case '\uFF50':  // Ã¯Â½ï¿½  [FULLWIDTH LATIN SMALL LETTER P]
+							output[outputPos++] = 'p';
+							break;
+						
+						case '\u24AB':  // Ã¢â€™Â«  [PARENTHESIZED LATIN SMALL LETTER P]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'p';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u024A': 
+						// Ã‰Å   [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
+						case '\u24C6': 
+						// Ã¢â€œâ€   [CIRCLED LATIN CAPITAL LETTER Q]
+						case '\uA756': 
+						// Ãªï¿½â€“  [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
+						case '\uA758': 
+						// Ãªï¿½Ëœ  [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
+						case '\uFF31':  // Ã¯Â¼Â±  [FULLWIDTH LATIN CAPITAL LETTER Q]
+							output[outputPos++] = 'Q';
+							break;
+						
+						case '\u0138': 
+						// Ã„Â¸  http://en.wikipedia.org/wiki/Kra_(letter)  [LATIN SMALL LETTER KRA]
+						case '\u024B': 
+						// Ã‰â€¹  [LATIN SMALL LETTER Q WITH HOOK TAIL]
+						case '\u02A0': 
+						// ÃŠÂ   [LATIN SMALL LETTER Q WITH HOOK]
+						case '\u24E0': 
+						// Ã¢â€œÂ   [CIRCLED LATIN SMALL LETTER Q]
+						case '\uA757': 
+						// Ãªï¿½â€”  [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
+						case '\uA759': 
+						// Ãªï¿½â„¢  [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
+						case '\uFF51':  // Ã¯Â½â€˜  [FULLWIDTH LATIN SMALL LETTER Q]
+							output[outputPos++] = 'q';
+							break;
+						
+						case '\u24AC':  // Ã¢â€™Â¬  [PARENTHESIZED LATIN SMALL LETTER Q]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'q';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0239':  // ÃˆÂ¹  [LATIN SMALL LETTER QP DIGRAPH]
+							output[outputPos++] = 'q';
+							output[outputPos++] = 'p';
+							break;
+						
+						case '\u0154': 
+						// Ã…ï¿½?  [LATIN CAPITAL LETTER R WITH ACUTE]
+						case '\u0156': 
+						// Ã…â€“  [LATIN CAPITAL LETTER R WITH CEDILLA]
+						case '\u0158': 
+						// Ã…Ëœ  [LATIN CAPITAL LETTER R WITH CARON]
+						case '\u0210': 
+						// Ãˆâ€™  [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
+						case '\u0212': 
+						// Ãˆâ€™  [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
+						case '\u024C': 
+						// Ã‰Å’  [LATIN CAPITAL LETTER R WITH STROKE]
+						case '\u0280': 
+						// ÃŠâ‚¬  [LATIN LETTER SMALL CAPITAL R]
+						case '\u0281': 
+						// ÃŠï¿½  [LATIN LETTER SMALL CAPITAL INVERTED R]
+						case '\u1D19': 
+						// Ã¡Â´â„¢  [LATIN LETTER SMALL CAPITAL REVERSED R]
+						case '\u1D1A': 
+						// Ã¡Â´Å¡  [LATIN LETTER SMALL CAPITAL TURNED R]
+						case '\u1E58': 
+						// Ã¡Â¹Ëœ  [LATIN CAPITAL LETTER R WITH DOT ABOVE]
+						case '\u1E5A': 
+						// Ã¡Â¹Å¡  [LATIN CAPITAL LETTER R WITH DOT BELOW]
+						case '\u1E5C': 
+						// Ã¡Â¹Å“  [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
+						case '\u1E5E': 
+						// Ã¡Â¹Å¾  [LATIN CAPITAL LETTER R WITH LINE BELOW]
+						case '\u24C7': 
+						// Ã¢â€œâ€¡  [CIRCLED LATIN CAPITAL LETTER R]
+						case '\u2C64': 
+						// Ã¢Â±Â¤  [LATIN CAPITAL LETTER R WITH TAIL]
+						case '\uA75A': 
+						// Ãªï¿½Å¡  [LATIN CAPITAL LETTER R ROTUNDA]
+						case '\uA782': 
+						// ÃªÅ¾â€š  [LATIN CAPITAL LETTER INSULAR R]
+						case '\uFF32':  // Ã¯Â¼Â²  [FULLWIDTH LATIN CAPITAL LETTER R]
+							output[outputPos++] = 'R';
+							break;
+						
+						case '\u0155': 
+						// Ã…â€¢  [LATIN SMALL LETTER R WITH ACUTE]
+						case '\u0157': 
+						// Ã…â€”  [LATIN SMALL LETTER R WITH CEDILLA]
+						case '\u0159': 
+						// Ã…â„¢  [LATIN SMALL LETTER R WITH CARON]
+						case '\u0211': 
+						// Ãˆâ€˜  [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
+						case '\u0213': 
+						// Ãˆâ€œ  [LATIN SMALL LETTER R WITH INVERTED BREVE]
+						case '\u024D': 
+						// Ã‰ï¿½  [LATIN SMALL LETTER R WITH STROKE]
+						case '\u027C': 
+						// Ã‰Â¼  [LATIN SMALL LETTER R WITH LONG LEG]
+						case '\u027D': 
+						// Ã‰Â½  [LATIN SMALL LETTER R WITH TAIL]
+						case '\u027E': 
+						// Ã‰Â¾  [LATIN SMALL LETTER R WITH FISHHOOK]
+						case '\u027F': 
+						// Ã‰Â¿  [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
+						case '\u1D63': 
+						// Ã¡ÂµÂ£  [LATIN SUBSCRIPT SMALL LETTER R]
+						case '\u1D72': 
+						// Ã¡ÂµÂ²  [LATIN SMALL LETTER R WITH MIDDLE TILDE]
+						case '\u1D73': 
+						// Ã¡ÂµÂ³  [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
+						case '\u1D89': 
+						// Ã¡Â¶â€°  [LATIN SMALL LETTER R WITH PALATAL HOOK]
+						case '\u1E59': 
+						// Ã¡Â¹â„¢  [LATIN SMALL LETTER R WITH DOT ABOVE]
+						case '\u1E5B': 
+						// Ã¡Â¹â€º  [LATIN SMALL LETTER R WITH DOT BELOW]
+						case '\u1E5D': 
+						// Ã¡Â¹ï¿½  [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
+						case '\u1E5F': 
+						// Ã¡Â¹Å¸  [LATIN SMALL LETTER R WITH LINE BELOW]
+						case '\u24E1': 
+						// Ã¢â€œÂ¡  [CIRCLED LATIN SMALL LETTER R]
+						case '\uA75B': 
+						// Ãªï¿½â€º  [LATIN SMALL LETTER R ROTUNDA]
+						case '\uA783': 
+						// ÃªÅ¾Æ’  [LATIN SMALL LETTER INSULAR R]
+						case '\uFF52':  // Ã¯Â½â€™  [FULLWIDTH LATIN SMALL LETTER R]
+							output[outputPos++] = 'r';
+							break;
+						
+						case '\u24AD':  // Ã¢â€™Â  [PARENTHESIZED LATIN SMALL LETTER R]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'r';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u015A': 
+						// Ã…Å¡  [LATIN CAPITAL LETTER S WITH ACUTE]
+						case '\u015C': 
+						// Ã…Å“  [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
+						case '\u015E': 
+						// Ã…Å¾  [LATIN CAPITAL LETTER S WITH CEDILLA]
+						case '\u0160': 
+						// Ã…Â   [LATIN CAPITAL LETTER S WITH CARON]
+						case '\u0218': 
+						// ÃˆËœ  [LATIN CAPITAL LETTER S WITH COMMA BELOW]
+						case '\u1E60': 
+						// Ã¡Â¹Â   [LATIN CAPITAL LETTER S WITH DOT ABOVE]
+						case '\u1E62': 
+						// Ã¡Â¹Â¢  [LATIN CAPITAL LETTER S WITH DOT BELOW]
+						case '\u1E64': 
+						// Ã¡Â¹Â¤  [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
+						case '\u1E66': 
+						// Ã¡Â¹Â¦  [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
+						case '\u1E68': 
+						// Ã¡Â¹Â¨  [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
+						case '\u24C8': 
+						// Ã¢â€œË†  [CIRCLED LATIN CAPITAL LETTER S]
+						case '\uA731': 
+						// ÃªÅ“Â±  [LATIN LETTER SMALL CAPITAL S]
+						case '\uA785': 
+						// ÃªÅ¾â€¦  [LATIN SMALL LETTER INSULAR S]
+						case '\uFF33':  // Ã¯Â¼Â³  [FULLWIDTH LATIN CAPITAL LETTER S]
+							output[outputPos++] = 'S';
+							break;
+						
+						case '\u015B': 
+						// Ã…â€º  [LATIN SMALL LETTER S WITH ACUTE]
+						case '\u015D': 
+						// Ã…ï¿½  [LATIN SMALL LETTER S WITH CIRCUMFLEX]
+						case '\u015F': 
+						// Ã…Å¸  [LATIN SMALL LETTER S WITH CEDILLA]
+						case '\u0161': 
+						// Ã…Â¡  [LATIN SMALL LETTER S WITH CARON]
+						case '\u017F': 
+						// Ã…Â¿  http://en.wikipedia.org/wiki/Long_S  [LATIN SMALL LETTER LONG S]
+						case '\u0219': 
+						// Ãˆâ„¢  [LATIN SMALL LETTER S WITH COMMA BELOW]
+						case '\u023F': 
+						// ÃˆÂ¿  [LATIN SMALL LETTER S WITH SWASH TAIL]
+						case '\u0282': 
+						// ÃŠâ€š  [LATIN SMALL LETTER S WITH HOOK]
+						case '\u1D74': 
+						// Ã¡ÂµÂ´  [LATIN SMALL LETTER S WITH MIDDLE TILDE]
+						case '\u1D8A': 
+						// Ã¡Â¶Å   [LATIN SMALL LETTER S WITH PALATAL HOOK]
+						case '\u1E61': 
+						// Ã¡Â¹Â¡  [LATIN SMALL LETTER S WITH DOT ABOVE]
+						case '\u1E63': 
+						// Ã¡Â¹Â£  [LATIN SMALL LETTER S WITH DOT BELOW]
+						case '\u1E65': 
+						// Ã¡Â¹Â¥  [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
+						case '\u1E67': 
+						// Ã¡Â¹Â§  [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
+						case '\u1E69': 
+						// Ã¡Â¹Â©  [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
+						case '\u1E9C': 
+						// Ã¡ÂºÅ“  [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
+						case '\u1E9D': 
+						// Ã¡Âºï¿½  [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
+						case '\u24E2': 
+						// Ã¢â€œÂ¢  [CIRCLED LATIN SMALL LETTER S]
+						case '\uA784': 
+						// ÃªÅ¾â€ž  [LATIN CAPITAL LETTER INSULAR S]
+						case '\uFF53':  // Ã¯Â½â€œ  [FULLWIDTH LATIN SMALL LETTER S]
+							output[outputPos++] = 's';
+							break;
+						
+						case '\u1E9E':  // Ã¡ÂºÅ¾  [LATIN CAPITAL LETTER SHARP S]
+							output[outputPos++] = 'S';
+							output[outputPos++] = 'S';
+							break;
+						
+						case '\u24AE':  // Ã¢â€™Â®  [PARENTHESIZED LATIN SMALL LETTER S]
+							output[outputPos++] = '(';
+							output[outputPos++] = 's';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00DF':  // ÃƒÅ¸  [LATIN SMALL LETTER SHARP S]
+							output[outputPos++] = 's';
+							output[outputPos++] = 's';
+							break;
+						
+						case '\uFB06':  // Ã¯Â¬â€   [LATIN SMALL LIGATURE ST]
+							output[outputPos++] = 's';
+							output[outputPos++] = 't';
+							break;
+						
+						case '\u0162': 
+						// Ã…Â¢  [LATIN CAPITAL LETTER T WITH CEDILLA]
+						case '\u0164': 
+						// Ã…Â¤  [LATIN CAPITAL LETTER T WITH CARON]
+						case '\u0166': 
+						// Ã…Â¦  [LATIN CAPITAL LETTER T WITH STROKE]
+						case '\u01AC': 
+						// Ã†Â¬  [LATIN CAPITAL LETTER T WITH HOOK]
+						case '\u01AE': 
+						// Ã†Â®  [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
+						case '\u021A': 
+						// ÃˆÅ¡  [LATIN CAPITAL LETTER T WITH COMMA BELOW]
+						case '\u023E': 
+						// ÃˆÂ¾  [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
+						case '\u1D1B': 
+						// Ã¡Â´â€º  [LATIN LETTER SMALL CAPITAL T]
+						case '\u1E6A': 
+						// Ã¡Â¹Âª  [LATIN CAPITAL LETTER T WITH DOT ABOVE]
+						case '\u1E6C': 
+						// Ã¡Â¹Â¬  [LATIN CAPITAL LETTER T WITH DOT BELOW]
+						case '\u1E6E': 
+						// Ã¡Â¹Â®  [LATIN CAPITAL LETTER T WITH LINE BELOW]
+						case '\u1E70': 
+						// Ã¡Â¹Â°  [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
+						case '\u24C9': 
+						// Ã¢â€œâ€°  [CIRCLED LATIN CAPITAL LETTER T]
+						case '\uA786': 
+						// ÃªÅ¾â€   [LATIN CAPITAL LETTER INSULAR T]
+						case '\uFF34':  // Ã¯Â¼Â´  [FULLWIDTH LATIN CAPITAL LETTER T]
+							output[outputPos++] = 'T';
+							break;
+						
+						case '\u0163': 
+						// Ã…Â£  [LATIN SMALL LETTER T WITH CEDILLA]
+						case '\u0165': 
+						// Ã…Â¥  [LATIN SMALL LETTER T WITH CARON]
+						case '\u0167': 
+						// Ã…Â§  [LATIN SMALL LETTER T WITH STROKE]
+						case '\u01AB': 
+						// Ã†Â«  [LATIN SMALL LETTER T WITH PALATAL HOOK]
+						case '\u01AD': 
+						// Ã†Â  [LATIN SMALL LETTER T WITH HOOK]
+						case '\u021B': 
+						// Ãˆâ€º  [LATIN SMALL LETTER T WITH COMMA BELOW]
+						case '\u0236': 
+						// ÃˆÂ¶  [LATIN SMALL LETTER T WITH CURL]
+						case '\u0287': 
+						// ÃŠâ€¡  [LATIN SMALL LETTER TURNED T]
+						case '\u0288': 
+						// ÃŠË†  [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
+						case '\u1D75': 
+						// Ã¡ÂµÂµ  [LATIN SMALL LETTER T WITH MIDDLE TILDE]
+						case '\u1E6B': 
+						// Ã¡Â¹Â«  [LATIN SMALL LETTER T WITH DOT ABOVE]
+						case '\u1E6D': 
+						// Ã¡Â¹Â  [LATIN SMALL LETTER T WITH DOT BELOW]
+						case '\u1E6F': 
+						// Ã¡Â¹Â¯  [LATIN SMALL LETTER T WITH LINE BELOW]
+						case '\u1E71': 
+						// Ã¡Â¹Â±  [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
+						case '\u1E97': 
+						// Ã¡Âºâ€”  [LATIN SMALL LETTER T WITH DIAERESIS]
+						case '\u24E3': 
+						// Ã¢â€œÂ£  [CIRCLED LATIN SMALL LETTER T]
+						case '\u2C66': 
+						// Ã¢Â±Â¦  [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
+						case '\uFF54':  // Ã¯Â½ï¿½?  [FULLWIDTH LATIN SMALL LETTER T]
+							output[outputPos++] = 't';
+							break;
+						
+						case '\u00DE': 
+						// ÃƒÅ¾  [LATIN CAPITAL LETTER THORN]
+						case '\uA766':  // Ãªï¿½Â¦  [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
+							output[outputPos++] = 'T';
+							output[outputPos++] = 'H';
+							break;
+						
+						case '\uA728':  // ÃªÅ“Â¨  [LATIN CAPITAL LETTER TZ]
+							output[outputPos++] = 'T';
+							output[outputPos++] = 'Z';
+							break;
+						
+						case '\u24AF':  // Ã¢â€™Â¯  [PARENTHESIZED LATIN SMALL LETTER T]
+							output[outputPos++] = '(';
+							output[outputPos++] = 't';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u02A8':  // ÃŠÂ¨  [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
+							output[outputPos++] = 't';
+							output[outputPos++] = 'c';
+							break;
+						
+						case '\u00FE': 
+						// ÃƒÂ¾  [LATIN SMALL LETTER THORN]
+						case '\u1D7A': 
+						// Ã¡ÂµÂº  [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
+						case '\uA767':  // Ãªï¿½Â§  [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
+							output[outputPos++] = 't';
+							output[outputPos++] = 'h';
+							break;
+						
+						case '\u02A6':  // ÃŠÂ¦  [LATIN SMALL LETTER TS DIGRAPH]
+							output[outputPos++] = 't';
+							output[outputPos++] = 's';
+							break;
+						
+						case '\uA729':  // ÃªÅ“Â©  [LATIN SMALL LETTER TZ]
+							output[outputPos++] = 't';
+							output[outputPos++] = 'z';
+							break;
+						
+						case '\u00D9': 
+						// Ãƒâ„¢  [LATIN CAPITAL LETTER U WITH GRAVE]
+						case '\u00DA': 
+						// ÃƒÅ¡  [LATIN CAPITAL LETTER U WITH ACUTE]
+						case '\u00DB': 
+						// Ãƒâ€º  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
+						case '\u00DC': 
+						// ÃƒÅ“  [LATIN CAPITAL LETTER U WITH DIAERESIS]
+						case '\u0168': 
+						// Ã…Â¨  [LATIN CAPITAL LETTER U WITH TILDE]
+						case '\u016A': 
+						// Ã…Âª  [LATIN CAPITAL LETTER U WITH MACRON]
+						case '\u016C': 
+						// Ã…Â¬  [LATIN CAPITAL LETTER U WITH BREVE]
+						case '\u016E': 
+						// Ã…Â®  [LATIN CAPITAL LETTER U WITH RING ABOVE]
+						case '\u0170': 
+						// Ã…Â°  [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
+						case '\u0172': 
+						// Ã…Â²  [LATIN CAPITAL LETTER U WITH OGONEK]
+						case '\u01AF': 
+						// Ã†Â¯  [LATIN CAPITAL LETTER U WITH HORN]
+						case '\u01D3': 
+						// Ã‡â€œ  [LATIN CAPITAL LETTER U WITH CARON]
+						case '\u01D5': 
+						// Ã‡â€¢  [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
+						case '\u01D7': 
+						// Ã‡â€”  [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
+						case '\u01D9': 
+						// Ã‡â„¢  [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
+						case '\u01DB': 
+						// Ã‡â€º  [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
+						case '\u0214': 
+						// Ãˆï¿½?  [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
+						case '\u0216': 
+						// Ãˆâ€“  [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
+						case '\u0244': 
+						// Ã‰â€ž  [LATIN CAPITAL LETTER U BAR]
+						case '\u1D1C': 
+						// Ã¡Â´Å“  [LATIN LETTER SMALL CAPITAL U]
+						case '\u1D7E': 
+						// Ã¡ÂµÂ¾  [LATIN SMALL CAPITAL LETTER U WITH STROKE]
+						case '\u1E72': 
+						// Ã¡Â¹Â²  [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
+						case '\u1E74': 
+						// Ã¡Â¹Â´  [LATIN CAPITAL LETTER U WITH TILDE BELOW]
+						case '\u1E76': 
+						// Ã¡Â¹Â¶  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
+						case '\u1E78': 
+						// Ã¡Â¹Â¸  [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
+						case '\u1E7A': 
+						// Ã¡Â¹Âº  [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
+						case '\u1EE4': 
+						// Ã¡Â»Â¤  [LATIN CAPITAL LETTER U WITH DOT BELOW]
+						case '\u1EE6': 
+						// Ã¡Â»Â¦  [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
+						case '\u1EE8': 
+						// Ã¡Â»Â¨  [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
+						case '\u1EEA': 
+						// Ã¡Â»Âª  [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
+						case '\u1EEC': 
+						// Ã¡Â»Â¬  [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
+						case '\u1EEE': 
+						// Ã¡Â»Â®  [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
+						case '\u1EF0': 
+						// Ã¡Â»Â°  [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
+						case '\u24CA': 
+						// Ã¢â€œÅ   [CIRCLED LATIN CAPITAL LETTER U]
+						case '\uFF35':  // Ã¯Â¼Âµ  [FULLWIDTH LATIN CAPITAL LETTER U]
+							output[outputPos++] = 'U';
+							break;
+						
+						case '\u00F9': 
+						// ÃƒÂ¹  [LATIN SMALL LETTER U WITH GRAVE]
+						case '\u00FA': 
+						// ÃƒÂº  [LATIN SMALL LETTER U WITH ACUTE]
+						case '\u00FB': 
+						// ÃƒÂ»  [LATIN SMALL LETTER U WITH CIRCUMFLEX]
+						case '\u00FC': 
+						// ÃƒÂ¼  [LATIN SMALL LETTER U WITH DIAERESIS]
+						case '\u0169': 
+						// Ã…Â©  [LATIN SMALL LETTER U WITH TILDE]
+						case '\u016B': 
+						// Ã…Â«  [LATIN SMALL LETTER U WITH MACRON]
+						case '\u016D': 
+						// Ã…Â  [LATIN SMALL LETTER U WITH BREVE]
+						case '\u016F': 
+						// Ã…Â¯  [LATIN SMALL LETTER U WITH RING ABOVE]
+						case '\u0171': 
+						// Ã…Â±  [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
+						case '\u0173': 
+						// Ã…Â³  [LATIN SMALL LETTER U WITH OGONEK]
+						case '\u01B0': 
+						// Ã†Â°  [LATIN SMALL LETTER U WITH HORN]
+						case '\u01D4': 
+						// Ã‡ï¿½?  [LATIN SMALL LETTER U WITH CARON]
+						case '\u01D6': 
+						// Ã‡â€“  [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
+						case '\u01D8': 
+						// Ã‡Ëœ  [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
+						case '\u01DA': 
+						// Ã‡Å¡  [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
+						case '\u01DC': 
+						// Ã‡Å“  [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
+						case '\u0215': 
+						// Ãˆâ€¢  [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
+						case '\u0217': 
+						// Ãˆâ€”  [LATIN SMALL LETTER U WITH INVERTED BREVE]
+						case '\u0289': 
+						// ÃŠâ€°  [LATIN SMALL LETTER U BAR]
+						case '\u1D64': 
+						// Ã¡ÂµÂ¤  [LATIN SUBSCRIPT SMALL LETTER U]
+						case '\u1D99': 
+						// Ã¡Â¶â„¢  [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
+						case '\u1E73': 
+						// Ã¡Â¹Â³  [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
+						case '\u1E75': 
+						// Ã¡Â¹Âµ  [LATIN SMALL LETTER U WITH TILDE BELOW]
+						case '\u1E77': 
+						// Ã¡Â¹Â·  [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
+						case '\u1E79': 
+						// Ã¡Â¹Â¹  [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
+						case '\u1E7B': 
+						// Ã¡Â¹Â»  [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
+						case '\u1EE5': 
+						// Ã¡Â»Â¥  [LATIN SMALL LETTER U WITH DOT BELOW]
+						case '\u1EE7': 
+						// Ã¡Â»Â§  [LATIN SMALL LETTER U WITH HOOK ABOVE]
+						case '\u1EE9': 
+						// Ã¡Â»Â©  [LATIN SMALL LETTER U WITH HORN AND ACUTE]
+						case '\u1EEB': 
+						// Ã¡Â»Â«  [LATIN SMALL LETTER U WITH HORN AND GRAVE]
+						case '\u1EED': 
+						// Ã¡Â»Â  [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
+						case '\u1EEF': 
+						// Ã¡Â»Â¯  [LATIN SMALL LETTER U WITH HORN AND TILDE]
+						case '\u1EF1': 
+						// Ã¡Â»Â±  [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
+						case '\u24E4': 
+						// Ã¢â€œÂ¤  [CIRCLED LATIN SMALL LETTER U]
+						case '\uFF55':  // Ã¯Â½â€¢  [FULLWIDTH LATIN SMALL LETTER U]
+							output[outputPos++] = 'u';
+							break;
+						
+						case '\u24B0':  // Ã¢â€™Â°  [PARENTHESIZED LATIN SMALL LETTER U]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'u';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u1D6B':  // Ã¡ÂµÂ«  [LATIN SMALL LETTER UE]
+							output[outputPos++] = 'u';
+							output[outputPos++] = 'e';
+							break;
+						
+						case '\u01B2': 
+						// Ã†Â²  [LATIN CAPITAL LETTER V WITH HOOK]
+						case '\u0245': 
+						// Ã‰â€¦  [LATIN CAPITAL LETTER TURNED V]
+						case '\u1D20': 
+						// Ã¡Â´Â   [LATIN LETTER SMALL CAPITAL V]
+						case '\u1E7C': 
+						// Ã¡Â¹Â¼  [LATIN CAPITAL LETTER V WITH TILDE]
+						case '\u1E7E': 
+						// Ã¡Â¹Â¾  [LATIN CAPITAL LETTER V WITH DOT BELOW]
+						case '\u1EFC': 
+						// Ã¡Â»Â¼  [LATIN CAPITAL LETTER MIDDLE-WELSH V]
+						case '\u24CB': 
+						// Ã¢â€œâ€¹  [CIRCLED LATIN CAPITAL LETTER V]
+						case '\uA75E': 
+						// Ãªï¿½Å¾  [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
+						case '\uA768': 
+						// Ãªï¿½Â¨  [LATIN CAPITAL LETTER VEND]
+						case '\uFF36':  // Ã¯Â¼Â¶  [FULLWIDTH LATIN CAPITAL LETTER V]
+							output[outputPos++] = 'V';
+							break;
+						
+						case '\u028B': 
+						// ÃŠâ€¹  [LATIN SMALL LETTER V WITH HOOK]
+						case '\u028C': 
+						// ÃŠÅ’  [LATIN SMALL LETTER TURNED V]
+						case '\u1D65': 
+						// Ã¡ÂµÂ¥  [LATIN SUBSCRIPT SMALL LETTER V]
+						case '\u1D8C': 
+						// Ã¡Â¶Å’  [LATIN SMALL LETTER V WITH PALATAL HOOK]
+						case '\u1E7D': 
+						// Ã¡Â¹Â½  [LATIN SMALL LETTER V WITH TILDE]
+						case '\u1E7F': 
+						// Ã¡Â¹Â¿  [LATIN SMALL LETTER V WITH DOT BELOW]
+						case '\u24E5': 
+						// Ã¢â€œÂ¥  [CIRCLED LATIN SMALL LETTER V]
+						case '\u2C71': 
+						// Ã¢Â±Â±  [LATIN SMALL LETTER V WITH RIGHT HOOK]
+						case '\u2C74': 
+						// Ã¢Â±Â´  [LATIN SMALL LETTER V WITH CURL]
+						case '\uA75F': 
+						// Ãªï¿½Å¸  [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
+						case '\uFF56':  // Ã¯Â½â€“  [FULLWIDTH LATIN SMALL LETTER V]
+							output[outputPos++] = 'v';
+							break;
+						
+						case '\uA760':  // Ãªï¿½Â   [LATIN CAPITAL LETTER VY]
+							output[outputPos++] = 'V';
+							output[outputPos++] = 'Y';
+							break;
+						
+						case '\u24B1':  // Ã¢â€™Â±  [PARENTHESIZED LATIN SMALL LETTER V]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'v';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\uA761':  // Ãªï¿½Â¡  [LATIN SMALL LETTER VY]
+							output[outputPos++] = 'v';
+							output[outputPos++] = 'y';
+							break;
+						
+						case '\u0174': 
+						// Ã…Â´  [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
+						case '\u01F7': 
+						// Ã‡Â·  http://en.wikipedia.org/wiki/Wynn  [LATIN CAPITAL LETTER WYNN]
+						case '\u1D21': 
+						// Ã¡Â´Â¡  [LATIN LETTER SMALL CAPITAL W]
+						case '\u1E80': 
+						// Ã¡Âºâ‚¬  [LATIN CAPITAL LETTER W WITH GRAVE]
+						case '\u1E82': 
+						// Ã¡Âºâ€š  [LATIN CAPITAL LETTER W WITH ACUTE]
+						case '\u1E84': 
+						// Ã¡Âºâ€ž  [LATIN CAPITAL LETTER W WITH DIAERESIS]
+						case '\u1E86': 
+						// Ã¡Âºâ€   [LATIN CAPITAL LETTER W WITH DOT ABOVE]
+						case '\u1E88': 
+						// Ã¡ÂºË†  [LATIN CAPITAL LETTER W WITH DOT BELOW]
+						case '\u24CC': 
+						// Ã¢â€œÅ’  [CIRCLED LATIN CAPITAL LETTER W]
+						case '\u2C72': 
+						// Ã¢Â±Â²  [LATIN CAPITAL LETTER W WITH HOOK]
+						case '\uFF37':  // Ã¯Â¼Â·  [FULLWIDTH LATIN CAPITAL LETTER W]
+							output[outputPos++] = 'W';
+							break;
+						
+						case '\u0175': 
+						// Ã…Âµ  [LATIN SMALL LETTER W WITH CIRCUMFLEX]
+						case '\u01BF': 
+						// Ã†Â¿  http://en.wikipedia.org/wiki/Wynn  [LATIN LETTER WYNN]
+						case '\u028D': 
+						// ÃŠï¿½  [LATIN SMALL LETTER TURNED W]
+						case '\u1E81': 
+						// Ã¡Âºï¿½  [LATIN SMALL LETTER W WITH GRAVE]
+						case '\u1E83': 
+						// Ã¡ÂºÆ’  [LATIN SMALL LETTER W WITH ACUTE]
+						case '\u1E85': 
+						// Ã¡Âºâ€¦  [LATIN SMALL LETTER W WITH DIAERESIS]
+						case '\u1E87': 
+						// Ã¡Âºâ€¡  [LATIN SMALL LETTER W WITH DOT ABOVE]
+						case '\u1E89': 
+						// Ã¡Âºâ€°  [LATIN SMALL LETTER W WITH DOT BELOW]
+						case '\u1E98': 
+						// Ã¡ÂºËœ  [LATIN SMALL LETTER W WITH RING ABOVE]
+						case '\u24E6': 
+						// Ã¢â€œÂ¦  [CIRCLED LATIN SMALL LETTER W]
+						case '\u2C73': 
+						// Ã¢Â±Â³  [LATIN SMALL LETTER W WITH HOOK]
+						case '\uFF57':  // Ã¯Â½â€”  [FULLWIDTH LATIN SMALL LETTER W]
+							output[outputPos++] = 'w';
+							break;
+						
+						case '\u24B2':  // Ã¢â€™Â²  [PARENTHESIZED LATIN SMALL LETTER W]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'w';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u1E8A': 
+						// Ã¡ÂºÅ   [LATIN CAPITAL LETTER X WITH DOT ABOVE]
+						case '\u1E8C': 
+						// Ã¡ÂºÅ’  [LATIN CAPITAL LETTER X WITH DIAERESIS]
+						case '\u24CD': 
+						// Ã¢â€œï¿½  [CIRCLED LATIN CAPITAL LETTER X]
+						case '\uFF38':  // Ã¯Â¼Â¸  [FULLWIDTH LATIN CAPITAL LETTER X]
+							output[outputPos++] = 'X';
+							break;
+						
+						case '\u1D8D': 
+						// Ã¡Â¶ï¿½  [LATIN SMALL LETTER X WITH PALATAL HOOK]
+						case '\u1E8B': 
+						// Ã¡Âºâ€¹  [LATIN SMALL LETTER X WITH DOT ABOVE]
+						case '\u1E8D': 
+						// Ã¡Âºï¿½  [LATIN SMALL LETTER X WITH DIAERESIS]
+						case '\u2093': 
+						// Ã¢â€šâ€œ  [LATIN SUBSCRIPT SMALL LETTER X]
+						case '\u24E7': 
+						// Ã¢â€œÂ§  [CIRCLED LATIN SMALL LETTER X]
+						case '\uFF58':  // Ã¯Â½Ëœ  [FULLWIDTH LATIN SMALL LETTER X]
+							output[outputPos++] = 'x';
+							break;
+						
+						case '\u24B3':  // Ã¢â€™Â³  [PARENTHESIZED LATIN SMALL LETTER X]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'x';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00DD': 
+						// Ãƒï¿½  [LATIN CAPITAL LETTER Y WITH ACUTE]
+						case '\u0176': 
+						// Ã…Â¶  [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
+						case '\u0178': 
+						// Ã…Â¸  [LATIN CAPITAL LETTER Y WITH DIAERESIS]
+						case '\u01B3': 
+						// Ã†Â³  [LATIN CAPITAL LETTER Y WITH HOOK]
+						case '\u0232': 
+						// ÃˆÂ²  [LATIN CAPITAL LETTER Y WITH MACRON]
+						case '\u024E': 
+						// Ã‰Å½  [LATIN CAPITAL LETTER Y WITH STROKE]
+						case '\u028F': 
+						// ÃŠï¿½  [LATIN LETTER SMALL CAPITAL Y]
+						case '\u1E8E': 
+						// Ã¡ÂºÅ½  [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
+						case '\u1EF2': 
+						// Ã¡Â»Â²  [LATIN CAPITAL LETTER Y WITH GRAVE]
+						case '\u1EF4': 
+						// Ã¡Â»Â´  [LATIN CAPITAL LETTER Y WITH DOT BELOW]
+						case '\u1EF6': 
+						// Ã¡Â»Â¶  [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
+						case '\u1EF8': 
+						// Ã¡Â»Â¸  [LATIN CAPITAL LETTER Y WITH TILDE]
+						case '\u1EFE': 
+						// Ã¡Â»Â¾  [LATIN CAPITAL LETTER Y WITH LOOP]
+						case '\u24CE': 
+						// Ã¢â€œÅ½  [CIRCLED LATIN CAPITAL LETTER Y]
+						case '\uFF39':  // Ã¯Â¼Â¹  [FULLWIDTH LATIN CAPITAL LETTER Y]
+							output[outputPos++] = 'Y';
+							break;
+						
+						case '\u00FD': 
+						// ÃƒÂ½  [LATIN SMALL LETTER Y WITH ACUTE]
+						case '\u00FF': 
+						// ÃƒÂ¿  [LATIN SMALL LETTER Y WITH DIAERESIS]
+						case '\u0177': 
+						// Ã…Â·  [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
+						case '\u01B4': 
+						// Ã†Â´  [LATIN SMALL LETTER Y WITH HOOK]
+						case '\u0233': 
+						// ÃˆÂ³  [LATIN SMALL LETTER Y WITH MACRON]
+						case '\u024F': 
+						// Ã‰ï¿½  [LATIN SMALL LETTER Y WITH STROKE]
+						case '\u028E': 
+						// ÃŠÅ½  [LATIN SMALL LETTER TURNED Y]
+						case '\u1E8F': 
+						// Ã¡Âºï¿½  [LATIN SMALL LETTER Y WITH DOT ABOVE]
+						case '\u1E99': 
+						// Ã¡Âºâ„¢  [LATIN SMALL LETTER Y WITH RING ABOVE]
+						case '\u1EF3': 
+						// Ã¡Â»Â³  [LATIN SMALL LETTER Y WITH GRAVE]
+						case '\u1EF5': 
+						// Ã¡Â»Âµ  [LATIN SMALL LETTER Y WITH DOT BELOW]
+						case '\u1EF7': 
+						// Ã¡Â»Â·  [LATIN SMALL LETTER Y WITH HOOK ABOVE]
+						case '\u1EF9': 
+						// Ã¡Â»Â¹  [LATIN SMALL LETTER Y WITH TILDE]
+						case '\u1EFF': 
+						// Ã¡Â»Â¿  [LATIN SMALL LETTER Y WITH LOOP]
+						case '\u24E8': 
+						// Ã¢â€œÂ¨  [CIRCLED LATIN SMALL LETTER Y]
+						case '\uFF59':  // Ã¯Â½â„¢  [FULLWIDTH LATIN SMALL LETTER Y]
+							output[outputPos++] = 'y';
+							break;
+						
+						case '\u24B4':  // Ã¢â€™Â´  [PARENTHESIZED LATIN SMALL LETTER Y]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'y';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u0179': 
+						// Ã…Â¹  [LATIN CAPITAL LETTER Z WITH ACUTE]
+						case '\u017B': 
+						// Ã…Â»  [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
+						case '\u017D': 
+						// Ã…Â½  [LATIN CAPITAL LETTER Z WITH CARON]
+						case '\u01B5': 
+						// Ã†Âµ  [LATIN CAPITAL LETTER Z WITH STROKE]
+						case '\u021C': 
+						// ÃˆÅ“  http://en.wikipedia.org/wiki/Yogh  [LATIN CAPITAL LETTER YOGH]
+						case '\u0224': 
+						// ÃˆÂ¤  [LATIN CAPITAL LETTER Z WITH HOOK]
+						case '\u1D22': 
+						// Ã¡Â´Â¢  [LATIN LETTER SMALL CAPITAL Z]
+						case '\u1E90': 
+						// Ã¡Âºï¿½  [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
+						case '\u1E92': 
+						// Ã¡Âºâ€™  [LATIN CAPITAL LETTER Z WITH DOT BELOW]
+						case '\u1E94': 
+						// Ã¡Âºï¿½?  [LATIN CAPITAL LETTER Z WITH LINE BELOW]
+						case '\u24CF': 
+						// Ã¢â€œï¿½  [CIRCLED LATIN CAPITAL LETTER Z]
+						case '\u2C6B': 
+						// Ã¢Â±Â«  [LATIN CAPITAL LETTER Z WITH DESCENDER]
+						case '\uA762': 
+						// Ãªï¿½Â¢  [LATIN CAPITAL LETTER VISIGOTHIC Z]
+						case '\uFF3A':  // Ã¯Â¼Âº  [FULLWIDTH LATIN CAPITAL LETTER Z]
+							output[outputPos++] = 'Z';
+							break;
+						
+						case '\u017A': 
+						// Ã…Âº  [LATIN SMALL LETTER Z WITH ACUTE]
+						case '\u017C': 
+						// Ã…Â¼  [LATIN SMALL LETTER Z WITH DOT ABOVE]
+						case '\u017E': 
+						// Ã…Â¾  [LATIN SMALL LETTER Z WITH CARON]
+						case '\u01B6': 
+						// Ã†Â¶  [LATIN SMALL LETTER Z WITH STROKE]
+						case '\u021D': 
+						// Ãˆï¿½  http://en.wikipedia.org/wiki/Yogh  [LATIN SMALL LETTER YOGH]
+						case '\u0225': 
+						// ÃˆÂ¥  [LATIN SMALL LETTER Z WITH HOOK]
+						case '\u0240': 
+						// Ã‰â‚¬  [LATIN SMALL LETTER Z WITH SWASH TAIL]
+						case '\u0290': 
+						// ÃŠï¿½  [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
+						case '\u0291': 
+						// ÃŠâ€˜  [LATIN SMALL LETTER Z WITH CURL]
+						case '\u1D76': 
+						// Ã¡ÂµÂ¶  [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
+						case '\u1D8E': 
+						// Ã¡Â¶Å½  [LATIN SMALL LETTER Z WITH PALATAL HOOK]
+						case '\u1E91': 
+						// Ã¡Âºâ€˜  [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
+						case '\u1E93': 
+						// Ã¡Âºâ€œ  [LATIN SMALL LETTER Z WITH DOT BELOW]
+						case '\u1E95': 
+						// Ã¡Âºâ€¢  [LATIN SMALL LETTER Z WITH LINE BELOW]
+						case '\u24E9': 
+						// Ã¢â€œÂ©  [CIRCLED LATIN SMALL LETTER Z]
+						case '\u2C6C': 
+						// Ã¢Â±Â¬  [LATIN SMALL LETTER Z WITH DESCENDER]
+						case '\uA763': 
+						// Ãªï¿½Â£  [LATIN SMALL LETTER VISIGOTHIC Z]
+						case '\uFF5A':  // Ã¯Â½Å¡  [FULLWIDTH LATIN SMALL LETTER Z]
+							output[outputPos++] = 'z';
+							break;
+						
+						case '\u24B5':  // Ã¢â€™Âµ  [PARENTHESIZED LATIN SMALL LETTER Z]
+							output[outputPos++] = '(';
+							output[outputPos++] = 'z';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2070': 
+						// Ã¢ï¿½Â°  [SUPERSCRIPT ZERO]
+						case '\u2080': 
+						// Ã¢â€šâ‚¬  [SUBSCRIPT ZERO]
+						case '\u24EA': 
+						// Ã¢â€œÂª  [CIRCLED DIGIT ZERO]
+						case '\u24FF': 
+						// Ã¢â€œÂ¿  [NEGATIVE CIRCLED DIGIT ZERO]
+						case '\uFF10':  // Ã¯Â¼ï¿½  [FULLWIDTH DIGIT ZERO]
+							output[outputPos++] = '0';
+							break;
+						
+						case '\u00B9': 
+						// Ã‚Â¹  [SUPERSCRIPT ONE]
+						case '\u2081': 
+						// Ã¢â€šï¿½  [SUBSCRIPT ONE]
+						case '\u2460': 
+						// Ã¢â€˜Â   [CIRCLED DIGIT ONE]
+						case '\u24F5': 
+						// Ã¢â€œÂµ  [DOUBLE CIRCLED DIGIT ONE]
+						case '\u2776': 
+						// Ã¢ï¿½Â¶  [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
+						case '\u2780': 
+						// Ã¢Å¾â‚¬  [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
+						case '\u278A': 
+						// Ã¢Å¾Å   [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
+						case '\uFF11':  // Ã¯Â¼â€˜  [FULLWIDTH DIGIT ONE]
+							output[outputPos++] = '1';
+							break;
+						
+						case '\u2488':  // Ã¢â€™Ë†  [DIGIT ONE FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2474':  // Ã¢â€˜Â´  [PARENTHESIZED DIGIT ONE]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00B2': 
+						// Ã‚Â²  [SUPERSCRIPT TWO]
+						case '\u2082': 
+						// Ã¢â€šâ€š  [SUBSCRIPT TWO]
+						case '\u2461': 
+						// Ã¢â€˜Â¡  [CIRCLED DIGIT TWO]
+						case '\u24F6': 
+						// Ã¢â€œÂ¶  [DOUBLE CIRCLED DIGIT TWO]
+						case '\u2777': 
+						// Ã¢ï¿½Â·  [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
+						case '\u2781': 
+						// Ã¢Å¾ï¿½  [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
+						case '\u278B': 
+						// Ã¢Å¾â€¹  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
+						case '\uFF12':  // Ã¯Â¼â€™  [FULLWIDTH DIGIT TWO]
+							output[outputPos++] = '2';
+							break;
+						
+						case '\u2489':  // Ã¢â€™â€°  [DIGIT TWO FULL STOP]
+							output[outputPos++] = '2';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2475':  // Ã¢â€˜Âµ  [PARENTHESIZED DIGIT TWO]
+							output[outputPos++] = '(';
+							output[outputPos++] = '2';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00B3': 
+						// Ã‚Â³  [SUPERSCRIPT THREE]
+						case '\u2083': 
+						// Ã¢â€šÆ’  [SUBSCRIPT THREE]
+						case '\u2462': 
+						// Ã¢â€˜Â¢  [CIRCLED DIGIT THREE]
+						case '\u24F7': 
+						// Ã¢â€œÂ·  [DOUBLE CIRCLED DIGIT THREE]
+						case '\u2778': 
+						// Ã¢ï¿½Â¸  [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
+						case '\u2782': 
+						// Ã¢Å¾â€š  [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
+						case '\u278C': 
+						// Ã¢Å¾Å’  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
+						case '\uFF13':  // Ã¯Â¼â€œ  [FULLWIDTH DIGIT THREE]
+							output[outputPos++] = '3';
+							break;
+						
+						case '\u248A':  // Ã¢â€™Å   [DIGIT THREE FULL STOP]
+							output[outputPos++] = '3';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2476':  // Ã¢â€˜Â¶  [PARENTHESIZED DIGIT THREE]
+							output[outputPos++] = '(';
+							output[outputPos++] = '3';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2074': 
+						// Ã¢ï¿½Â´  [SUPERSCRIPT FOUR]
+						case '\u2084': 
+						// Ã¢â€šâ€ž  [SUBSCRIPT FOUR]
+						case '\u2463': 
+						// Ã¢â€˜Â£  [CIRCLED DIGIT FOUR]
+						case '\u24F8': 
+						// Ã¢â€œÂ¸  [DOUBLE CIRCLED DIGIT FOUR]
+						case '\u2779': 
+						// Ã¢ï¿½Â¹  [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
+						case '\u2783': 
+						// Ã¢Å¾Æ’  [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
+						case '\u278D': 
+						// Ã¢Å¾ï¿½  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
+						case '\uFF14':  // Ã¯Â¼ï¿½?  [FULLWIDTH DIGIT FOUR]
+							output[outputPos++] = '4';
+							break;
+						
+						case '\u248B':  // Ã¢â€™â€¹  [DIGIT FOUR FULL STOP]
+							output[outputPos++] = '4';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2477':  // Ã¢â€˜Â·  [PARENTHESIZED DIGIT FOUR]
+							output[outputPos++] = '(';
+							output[outputPos++] = '4';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2075': 
+						// Ã¢ï¿½Âµ  [SUPERSCRIPT FIVE]
+						case '\u2085': 
+						// Ã¢â€šâ€¦  [SUBSCRIPT FIVE]
+						case '\u2464': 
+						// Ã¢â€˜Â¤  [CIRCLED DIGIT FIVE]
+						case '\u24F9': 
+						// Ã¢â€œÂ¹  [DOUBLE CIRCLED DIGIT FIVE]
+						case '\u277A': 
+						// Ã¢ï¿½Âº  [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
+						case '\u2784': 
+						// Ã¢Å¾â€ž  [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
+						case '\u278E': 
+						// Ã¢Å¾Å½  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
+						case '\uFF15':  // Ã¯Â¼â€¢  [FULLWIDTH DIGIT FIVE]
+							output[outputPos++] = '5';
+							break;
+						
+						case '\u248C':  // Ã¢â€™Å’  [DIGIT FIVE FULL STOP]
+							output[outputPos++] = '5';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2478':  // Ã¢â€˜Â¸  [PARENTHESIZED DIGIT FIVE]
+							output[outputPos++] = '(';
+							output[outputPos++] = '5';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2076': 
+						// Ã¢ï¿½Â¶  [SUPERSCRIPT SIX]
+						case '\u2086': 
+						// Ã¢â€šâ€   [SUBSCRIPT SIX]
+						case '\u2465': 
+						// Ã¢â€˜Â¥  [CIRCLED DIGIT SIX]
+						case '\u24FA': 
+						// Ã¢â€œÂº  [DOUBLE CIRCLED DIGIT SIX]
+						case '\u277B': 
+						// Ã¢ï¿½Â»  [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
+						case '\u2785': 
+						// Ã¢Å¾â€¦  [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
+						case '\u278F': 
+						// Ã¢Å¾ï¿½  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
+						case '\uFF16':  // Ã¯Â¼â€“  [FULLWIDTH DIGIT SIX]
+							output[outputPos++] = '6';
+							break;
+						
+						case '\u248D':  // Ã¢â€™ï¿½  [DIGIT SIX FULL STOP]
+							output[outputPos++] = '6';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2479':  // Ã¢â€˜Â¹  [PARENTHESIZED DIGIT SIX]
+							output[outputPos++] = '(';
+							output[outputPos++] = '6';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2077': 
+						// Ã¢ï¿½Â·  [SUPERSCRIPT SEVEN]
+						case '\u2087': 
+						// Ã¢â€šâ€¡  [SUBSCRIPT SEVEN]
+						case '\u2466': 
+						// Ã¢â€˜Â¦  [CIRCLED DIGIT SEVEN]
+						case '\u24FB': 
+						// Ã¢â€œÂ»  [DOUBLE CIRCLED DIGIT SEVEN]
+						case '\u277C': 
+						// Ã¢ï¿½Â¼  [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
+						case '\u2786': 
+						// Ã¢Å¾â€   [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
+						case '\u2790': 
+						// Ã¢Å¾ï¿½  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
+						case '\uFF17':  // Ã¯Â¼â€”  [FULLWIDTH DIGIT SEVEN]
+							output[outputPos++] = '7';
+							break;
+						
+						case '\u248E':  // Ã¢â€™Å½  [DIGIT SEVEN FULL STOP]
+							output[outputPos++] = '7';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u247A':  // Ã¢â€˜Âº  [PARENTHESIZED DIGIT SEVEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '7';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2078': 
+						// Ã¢ï¿½Â¸  [SUPERSCRIPT EIGHT]
+						case '\u2088': 
+						// Ã¢â€šË†  [SUBSCRIPT EIGHT]
+						case '\u2467': 
+						// Ã¢â€˜Â§  [CIRCLED DIGIT EIGHT]
+						case '\u24FC': 
+						// Ã¢â€œÂ¼  [DOUBLE CIRCLED DIGIT EIGHT]
+						case '\u277D': 
+						// Ã¢ï¿½Â½  [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
+						case '\u2787': 
+						// Ã¢Å¾â€¡  [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
+						case '\u2791': 
+						// Ã¢Å¾â€˜  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
+						case '\uFF18':  // Ã¯Â¼Ëœ  [FULLWIDTH DIGIT EIGHT]
+							output[outputPos++] = '8';
+							break;
+						
+						case '\u248F':  // Ã¢â€™ï¿½  [DIGIT EIGHT FULL STOP]
+							output[outputPos++] = '8';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u247B':  // Ã¢â€˜Â»  [PARENTHESIZED DIGIT EIGHT]
+							output[outputPos++] = '(';
+							output[outputPos++] = '8';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2079': 
+						// Ã¢ï¿½Â¹  [SUPERSCRIPT NINE]
+						case '\u2089': 
+						// Ã¢â€šâ€°  [SUBSCRIPT NINE]
+						case '\u2468': 
+						// Ã¢â€˜Â¨  [CIRCLED DIGIT NINE]
+						case '\u24FD': 
+						// Ã¢â€œÂ½  [DOUBLE CIRCLED DIGIT NINE]
+						case '\u277E': 
+						// Ã¢ï¿½Â¾  [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
+						case '\u2788': 
+						// Ã¢Å¾Ë†  [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
+						case '\u2792': 
+						// Ã¢Å¾â€™  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
+						case '\uFF19':  // Ã¯Â¼â„¢  [FULLWIDTH DIGIT NINE]
+							output[outputPos++] = '9';
+							break;
+						
+						case '\u2490':  // Ã¢â€™ï¿½  [DIGIT NINE FULL STOP]
+							output[outputPos++] = '9';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u247C':  // Ã¢â€˜Â¼  [PARENTHESIZED DIGIT NINE]
+							output[outputPos++] = '(';
+							output[outputPos++] = '9';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2469': 
+						// Ã¢â€˜Â©  [CIRCLED NUMBER TEN]
+						case '\u24FE': 
+						// Ã¢â€œÂ¾  [DOUBLE CIRCLED NUMBER TEN]
+						case '\u277F': 
+						// Ã¢ï¿½Â¿  [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
+						case '\u2789': 
+						// Ã¢Å¾â€°  [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
+						case '\u2793':  // Ã¢Å¾â€œ  [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '0';
+							break;
+						
+						case '\u2491':  // Ã¢â€™â€˜  [NUMBER TEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '0';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u247D':  // Ã¢â€˜Â½  [PARENTHESIZED NUMBER TEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '0';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u246A': 
+						// Ã¢â€˜Âª  [CIRCLED NUMBER ELEVEN]
+						case '\u24EB':  // Ã¢â€œÂ«  [NEGATIVE CIRCLED NUMBER ELEVEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '1';
+							break;
+						
+						case '\u2492':  // Ã¢â€™â€™  [NUMBER ELEVEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '1';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u247E':  // Ã¢â€˜Â¾  [PARENTHESIZED NUMBER ELEVEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '1';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u246B': 
+						// Ã¢â€˜Â«  [CIRCLED NUMBER TWELVE]
+						case '\u24EC':  // Ã¢â€œÂ¬  [NEGATIVE CIRCLED NUMBER TWELVE]
+							output[outputPos++] = '1';
+							output[outputPos++] = '2';
+							break;
+						
+						case '\u2493':  // Ã¢â€™â€œ  [NUMBER TWELVE FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '2';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u247F':  // Ã¢â€˜Â¿  [PARENTHESIZED NUMBER TWELVE]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '2';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u246C': 
+						// Ã¢â€˜Â¬  [CIRCLED NUMBER THIRTEEN]
+						case '\u24ED':  // Ã¢â€œÂ  [NEGATIVE CIRCLED NUMBER THIRTEEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '3';
+							break;
+						
+						case '\u2494':  // Ã¢â€™ï¿½?  [NUMBER THIRTEEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '3';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2480':  // Ã¢â€™â‚¬  [PARENTHESIZED NUMBER THIRTEEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '3';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u246D': 
+						// Ã¢â€˜Â  [CIRCLED NUMBER FOURTEEN]
+						case '\u24EE':  // Ã¢â€œÂ®  [NEGATIVE CIRCLED NUMBER FOURTEEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '4';
+							break;
+						
+						case '\u2495':  // Ã¢â€™â€¢  [NUMBER FOURTEEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '4';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2481':  // Ã¢â€™ï¿½  [PARENTHESIZED NUMBER FOURTEEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '4';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u246E': 
+						// Ã¢â€˜Â®  [CIRCLED NUMBER FIFTEEN]
+						case '\u24EF':  // Ã¢â€œÂ¯  [NEGATIVE CIRCLED NUMBER FIFTEEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '5';
+							break;
+						
+						case '\u2496':  // Ã¢â€™â€“  [NUMBER FIFTEEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '5';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2482':  // Ã¢â€™â€š  [PARENTHESIZED NUMBER FIFTEEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '5';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u246F': 
+						// Ã¢â€˜Â¯  [CIRCLED NUMBER SIXTEEN]
+						case '\u24F0':  // Ã¢â€œÂ°  [NEGATIVE CIRCLED NUMBER SIXTEEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '6';
+							break;
+						
+						case '\u2497':  // Ã¢â€™â€”  [NUMBER SIXTEEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '6';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2483':  // Ã¢â€™Æ’  [PARENTHESIZED NUMBER SIXTEEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '6';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2470': 
+						// Ã¢â€˜Â°  [CIRCLED NUMBER SEVENTEEN]
+						case '\u24F1':  // Ã¢â€œÂ±  [NEGATIVE CIRCLED NUMBER SEVENTEEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '7';
+							break;
+						
+						case '\u2498':  // Ã¢â€™Ëœ  [NUMBER SEVENTEEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '7';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2484':  // Ã¢â€™â€ž  [PARENTHESIZED NUMBER SEVENTEEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '7';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2471': 
+						// Ã¢â€˜Â±  [CIRCLED NUMBER EIGHTEEN]
+						case '\u24F2':  // Ã¢â€œÂ²  [NEGATIVE CIRCLED NUMBER EIGHTEEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '8';
+							break;
+						
+						case '\u2499':  // Ã¢â€™â„¢  [NUMBER EIGHTEEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '8';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2485':  // Ã¢â€™â€¦  [PARENTHESIZED NUMBER EIGHTEEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '8';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2472': 
+						// Ã¢â€˜Â²  [CIRCLED NUMBER NINETEEN]
+						case '\u24F3':  // Ã¢â€œÂ³  [NEGATIVE CIRCLED NUMBER NINETEEN]
+							output[outputPos++] = '1';
+							output[outputPos++] = '9';
+							break;
+						
+						case '\u249A':  // Ã¢â€™Å¡  [NUMBER NINETEEN FULL STOP]
+							output[outputPos++] = '1';
+							output[outputPos++] = '9';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2486':  // Ã¢â€™â€   [PARENTHESIZED NUMBER NINETEEN]
+							output[outputPos++] = '(';
+							output[outputPos++] = '1';
+							output[outputPos++] = '9';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2473': 
+						// Ã¢â€˜Â³  [CIRCLED NUMBER TWENTY]
+						case '\u24F4':  // Ã¢â€œÂ´  [NEGATIVE CIRCLED NUMBER TWENTY]
+							output[outputPos++] = '2';
+							output[outputPos++] = '0';
+							break;
+						
+						case '\u249B':  // Ã¢â€™â€º  [NUMBER TWENTY FULL STOP]
+							output[outputPos++] = '2';
+							output[outputPos++] = '0';
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2487':  // Ã¢â€™â€¡  [PARENTHESIZED NUMBER TWENTY]
+							output[outputPos++] = '(';
+							output[outputPos++] = '2';
+							output[outputPos++] = '0';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u00AB': 
+						// Ã‚Â«  [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
+						case '\u00BB': 
+						// Ã‚Â»  [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
+						case '\u201C': 
+						// Ã¢â‚¬Å“  [LEFT DOUBLE QUOTATION MARK]
+						case '\u201D': 
+						// Ã¢â‚¬ï¿½  [RIGHT DOUBLE QUOTATION MARK]
+						case '\u201E': 
+						// Ã¢â‚¬Å¾  [DOUBLE LOW-9 QUOTATION MARK]
+						case '\u2033': 
+						// Ã¢â‚¬Â³  [DOUBLE PRIME]
+						case '\u2036': 
+						// Ã¢â‚¬Â¶  [REVERSED DOUBLE PRIME]
+						case '\u275D': 
+						// Ã¢ï¿½ï¿½  [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
+						case '\u275E': 
+						// Ã¢ï¿½Å¾  [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
+						case '\u276E': 
+						// Ã¢ï¿½Â®  [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+						case '\u276F': 
+						// Ã¢ï¿½Â¯  [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+						case '\uFF02':  // Ã¯Â¼â€š  [FULLWIDTH QUOTATION MARK]
+							output[outputPos++] = '"';
+							break;
+						
+						case '\u2018': 
+						// Ã¢â‚¬Ëœ  [LEFT SINGLE QUOTATION MARK]
+						case '\u2019': 
+						// Ã¢â‚¬â„¢  [RIGHT SINGLE QUOTATION MARK]
+						case '\u201A': 
+						// Ã¢â‚¬Å¡  [SINGLE LOW-9 QUOTATION MARK]
+						case '\u201B': 
+						// Ã¢â‚¬â€º  [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
+						case '\u2032': 
+						// Ã¢â‚¬Â²  [PRIME]
+						case '\u2035': 
+						// Ã¢â‚¬Âµ  [REVERSED PRIME]
+						case '\u2039': 
+						// Ã¢â‚¬Â¹  [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
+						case '\u203A': 
+						// Ã¢â‚¬Âº  [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
+						case '\u275B': 
+						// Ã¢ï¿½â€º  [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
+						case '\u275C': 
+						// Ã¢ï¿½Å“  [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
+						case '\uFF07':  // Ã¯Â¼â€¡  [FULLWIDTH APOSTROPHE]
+							output[outputPos++] = '\'';
+							break;
+						
+						case '\u2010': 
+						// Ã¢â‚¬ï¿½  [HYPHEN]
+						case '\u2011': 
+						// Ã¢â‚¬â€˜  [NON-BREAKING HYPHEN]
+						case '\u2012': 
+						// Ã¢â‚¬â€™  [FIGURE DASH]
+						case '\u2013': 
+						// Ã¢â‚¬â€œ  [EN DASH]
+						case '\u2014': 
+						// Ã¢â‚¬ï¿½?  [EM DASH]
+						case '\u207B': 
+						// Ã¢ï¿½Â»  [SUPERSCRIPT MINUS]
+						case '\u208B': 
+						// Ã¢â€šâ€¹  [SUBSCRIPT MINUS]
+						case '\uFF0D':  // Ã¯Â¼ï¿½  [FULLWIDTH HYPHEN-MINUS]
+							output[outputPos++] = '-';
+							break;
+						
+						case '\u2045': 
+						// Ã¢ï¿½â€¦  [LEFT SQUARE BRACKET WITH QUILL]
+						case '\u2772': 
+						// Ã¢ï¿½Â²  [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
+						case '\uFF3B':  // Ã¯Â¼Â»  [FULLWIDTH LEFT SQUARE BRACKET]
+							output[outputPos++] = '[';
+							break;
+						
+						case '\u2046': 
+						// Ã¢ï¿½â€   [RIGHT SQUARE BRACKET WITH QUILL]
+						case '\u2773': 
+						// Ã¢ï¿½Â³  [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
+						case '\uFF3D':  // Ã¯Â¼Â½  [FULLWIDTH RIGHT SQUARE BRACKET]
+							output[outputPos++] = ']';
+							break;
+						
+						case '\u207D': 
+						// Ã¢ï¿½Â½  [SUPERSCRIPT LEFT PARENTHESIS]
+						case '\u208D': 
+						// Ã¢â€šï¿½  [SUBSCRIPT LEFT PARENTHESIS]
+						case '\u2768': 
+						// Ã¢ï¿½Â¨  [MEDIUM LEFT PARENTHESIS ORNAMENT]
+						case '\u276A': 
+						// Ã¢ï¿½Âª  [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
+						case '\uFF08':  // Ã¯Â¼Ë†  [FULLWIDTH LEFT PARENTHESIS]
+							output[outputPos++] = '(';
+							break;
+						
+						case '\u2E28':  // Ã¢Â¸Â¨  [LEFT DOUBLE PARENTHESIS]
+							output[outputPos++] = '(';
+							output[outputPos++] = '(';
+							break;
+						
+						case '\u207E': 
+						// Ã¢ï¿½Â¾  [SUPERSCRIPT RIGHT PARENTHESIS]
+						case '\u208E': 
+						// Ã¢â€šÅ½  [SUBSCRIPT RIGHT PARENTHESIS]
+						case '\u2769': 
+						// Ã¢ï¿½Â©  [MEDIUM RIGHT PARENTHESIS ORNAMENT]
+						case '\u276B': 
+						// Ã¢ï¿½Â«  [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
+						case '\uFF09':  // Ã¯Â¼â€°  [FULLWIDTH RIGHT PARENTHESIS]
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u2E29':  // Ã¢Â¸Â©  [RIGHT DOUBLE PARENTHESIS]
+							output[outputPos++] = ')';
+							output[outputPos++] = ')';
+							break;
+						
+						case '\u276C': 
+						// Ã¢ï¿½Â¬  [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
+						case '\u2770': 
+						// Ã¢ï¿½Â°  [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
+						case '\uFF1C':  // Ã¯Â¼Å“  [FULLWIDTH LESS-THAN SIGN]
+							output[outputPos++] = '<';
+							break;
+						
+						case '\u276D': 
+						// Ã¢ï¿½Â  [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+						case '\u2771': 
+						// Ã¢ï¿½Â±  [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+						case '\uFF1E':  // Ã¯Â¼Å¾  [FULLWIDTH GREATER-THAN SIGN]
+							output[outputPos++] = '>';
+							break;
+						
+						case '\u2774': 
+						// Ã¢ï¿½Â´  [MEDIUM LEFT CURLY BRACKET ORNAMENT]
+						case '\uFF5B':  // Ã¯Â½â€º  [FULLWIDTH LEFT CURLY BRACKET]
+							output[outputPos++] = '{';
+							break;
+						
+						case '\u2775': 
+						// Ã¢ï¿½Âµ  [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
+						case '\uFF5D':  // Ã¯Â½ï¿½  [FULLWIDTH RIGHT CURLY BRACKET]
+							output[outputPos++] = '}';
+							break;
+						
+						case '\u207A': 
+						// Ã¢ï¿½Âº  [SUPERSCRIPT PLUS SIGN]
+						case '\u208A': 
+						// Ã¢â€šÅ   [SUBSCRIPT PLUS SIGN]
+						case '\uFF0B':  // Ã¯Â¼â€¹  [FULLWIDTH PLUS SIGN]
+							output[outputPos++] = '+';
+							break;
+						
+						case '\u207C': 
+						// Ã¢ï¿½Â¼  [SUPERSCRIPT EQUALS SIGN]
+						case '\u208C': 
+						// Ã¢â€šÅ’  [SUBSCRIPT EQUALS SIGN]
+						case '\uFF1D':  // Ã¯Â¼ï¿½  [FULLWIDTH EQUALS SIGN]
+							output[outputPos++] = '=';
+							break;
+						
+						case '\uFF01':  // Ã¯Â¼ï¿½  [FULLWIDTH EXCLAMATION MARK]
+							output[outputPos++] = '!';
+							break;
+						
+						case '\u203C':  // Ã¢â‚¬Â¼  [DOUBLE EXCLAMATION MARK]
+							output[outputPos++] = '!';
+							output[outputPos++] = '!';
+							break;
+						
+						case '\u2049':  // Ã¢ï¿½â€°  [EXCLAMATION QUESTION MARK]
+							output[outputPos++] = '!';
+							output[outputPos++] = '?';
+							break;
+						
+						case '\uFF03':  // Ã¯Â¼Æ’  [FULLWIDTH NUMBER SIGN]
+							output[outputPos++] = '#';
+							break;
+						
+						case '\uFF04':  // Ã¯Â¼â€ž  [FULLWIDTH DOLLAR SIGN]
+							output[outputPos++] = '$';
+							break;
+						
+						case '\u2052': 
+						// Ã¢ï¿½â€™  [COMMERCIAL MINUS SIGN]
+						case '\uFF05':  // Ã¯Â¼â€¦  [FULLWIDTH PERCENT SIGN]
+							output[outputPos++] = '%';
+							break;
+						
+						case '\uFF06':  // Ã¯Â¼â€   [FULLWIDTH AMPERSAND]
+							output[outputPos++] = '&';
+							break;
+						
+						case '\u204E': 
+						// Ã¢ï¿½Å½  [LOW ASTERISK]
+						case '\uFF0A':  // Ã¯Â¼Å   [FULLWIDTH ASTERISK]
+							output[outputPos++] = '*';
+							break;
+						
+						case '\uFF0C':  // Ã¯Â¼Å’  [FULLWIDTH COMMA]
+							output[outputPos++] = ',';
+							break;
+						
+						case '\uFF0E':  // Ã¯Â¼Å½  [FULLWIDTH FULL STOP]
+							output[outputPos++] = '.';
+							break;
+						
+						case '\u2044': 
+						// Ã¢ï¿½â€ž  [FRACTION SLASH]
+						case '\uFF0F':  // Ã¯Â¼ï¿½  [FULLWIDTH SOLIDUS]
+							output[outputPos++] = '/';
+							break;
+						
+						case '\uFF1A':  // Ã¯Â¼Å¡  [FULLWIDTH COLON]
+							output[outputPos++] = ':';
+							break;
+						
+						case '\u204F': 
+						// Ã¢ï¿½ï¿½  [REVERSED SEMICOLON]
+						case '\uFF1B':  // Ã¯Â¼â€º  [FULLWIDTH SEMICOLON]
+							output[outputPos++] = ';';
+							break;
+						
+						case '\uFF1F':  // Ã¯Â¼Å¸  [FULLWIDTH QUESTION MARK]
+							output[outputPos++] = '?';
+							break;
+						
+						case '\u2047':  // Ã¢ï¿½â€¡  [DOUBLE QUESTION MARK]
+							output[outputPos++] = '?';
+							output[outputPos++] = '?';
+							break;
+						
+						case '\u2048':  // Ã¢ï¿½Ë†  [QUESTION EXCLAMATION MARK]
+							output[outputPos++] = '?';
+							output[outputPos++] = '!';
+							break;
+						
+						case '\uFF20':  // Ã¯Â¼Â   [FULLWIDTH COMMERCIAL AT]
+							output[outputPos++] = '@';
+							break;
+						
+						case '\uFF3C':  // Ã¯Â¼Â¼  [FULLWIDTH REVERSE SOLIDUS]
+							output[outputPos++] = '\\';
+							break;
+						
+						case '\u2038': 
+						// Ã¢â‚¬Â¸  [CARET]
+						case '\uFF3E':  // Ã¯Â¼Â¾  [FULLWIDTH CIRCUMFLEX ACCENT]
+							output[outputPos++] = '^';
+							break;
+						
+						case '\uFF3F':  // Ã¯Â¼Â¿  [FULLWIDTH LOW LINE]
+							output[outputPos++] = '_';
+							break;
+						
+						case '\u2053': 
+						// Ã¢ï¿½â€œ  [SWUNG DASH]
+						case '\uFF5E':  // Ã¯Â½Å¾  [FULLWIDTH TILDE]
+							output[outputPos++] = '~';
+							break;
+						
+						default: 
+							output[outputPos++] = c;
+							break;
+						
+					}
+				}
+			}
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Analyzer.cs b/src/core/Analysis/Analyzer.cs
new file mode 100644
index 0000000..cea0ee3
--- /dev/null
+++ b/src/core/Analysis/Analyzer.cs
@@ -0,0 +1,171 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+	/// <summary>An Analyzer builds TokenStreams, which analyze text.  It thus represents a
+	/// policy for extracting index terms from text.
+	/// <p/>
+	/// Typical implementations first build a Tokenizer, which breaks the stream of
+	/// characters from the Reader into raw Tokens.  One or more TokenFilters may
+	/// then be applied to the output of the Tokenizer.
+	/// </summary>
+	public abstract class Analyzer : IDisposable
+	{
+		/// <summary>Creates a TokenStream which tokenizes all the text in the provided
+		/// Reader.  Must be able to handle null field name for
+		/// backward compatibility.
+		/// </summary>
+		public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader);
+		
+		/// <summary>Creates a TokenStream that is allowed to be re-used
+		/// from the previous time that the same thread called
+		/// this method.  Callers that do not need to use more
+		/// than one TokenStream at the same time from this
+		/// analyzer should use this method for better
+		/// performance.
+		/// </summary>
+		public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader)
+		{
+			return TokenStream(fieldName, reader);
+		}
+		
+		private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
+	    private bool isDisposed;
+
+	    /// <summary>Used by Analyzers that implement reusableTokenStream
+	    /// to retrieve previously saved TokenStreams for re-use
+	    /// by the same thread. 
+	    /// </summary>
+	    protected internal virtual object PreviousTokenStream
+	    {
+	        get
+	        {
+	            if (tokenStreams == null)
+	            {
+	                throw new AlreadyClosedException("this Analyzer is closed");
+	            }
+	            return tokenStreams.Get();
+	        }
+	        set
+	        {
+	            if (tokenStreams == null)
+	            {
+	                throw new AlreadyClosedException("this Analyzer is closed");
+	            }
+	            tokenStreams.Set(value);
+	        }
+	    }
+
+	    [Obsolete()]
+		protected internal bool overridesTokenStreamMethod = false;
+		
+		/// <deprecated> This is only present to preserve
+		/// back-compat of classes that subclass a core analyzer
+		/// and override tokenStream but not reusableTokenStream 
+		/// </deprecated>
+		/// <summary>
+        /// Java uses Class&lt;? extends Analyer&gt; to constrain <typeparamref name="TClass"/> to
+        /// only Types that inherit from Analyzer.  C# does not have a generic type class,
+        /// ie Type&lt;t&gt;.  The method signature stays the same, and an exception may
+        /// still be thrown, if the method doesn't exist.
+		/// </summary>
+        [Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")]
+		protected internal virtual void SetOverridesTokenStreamMethod<TClass>()
+            where TClass : Analyzer
+		{
+            try
+            {
+                System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", new[] { typeof(string), typeof(System.IO.TextReader) });
+                overridesTokenStreamMethod = m.DeclaringType != typeof(TClass);
+            }
+            catch (MethodAccessException)
+            {
+                // can't happen, as baseClass is subclass of Analyzer
+                overridesTokenStreamMethod = false;
+            }
+		}
+		
+		
+		/// <summary> Invoked before indexing a Fieldable instance if
+		/// terms have already been added to that field.  This allows custom
+		/// analyzers to place an automatic position increment gap between
+		/// Fieldable instances using the same field name.  The default value
+		/// position increment gap is 0.  With a 0 position increment gap and
+		/// the typical default token position increment of 1, all terms in a field,
+		/// including across Fieldable instances, are in successive positions, allowing
+		/// exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
+		/// 
+		/// </summary>
+		/// <param name="fieldName">Fieldable name being indexed.
+		/// </param>
+		/// <returns> position increment gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
+		/// </returns>
+		public virtual int GetPositionIncrementGap(String fieldName)
+		{
+			return 0;
+		}
+		
+		/// <summary> Just like <see cref="GetPositionIncrementGap" />, except for
+		/// Token offsets instead.  By default this returns 1 for
+		/// tokenized fields and, as if the fields were joined
+		/// with an extra space character, and 0 for un-tokenized
+		/// fields.  This method is only called if the field
+		/// produced at least one token for indexing.
+		/// 
+		/// </summary>
+		/// <param name="field">the field just indexed
+		/// </param>
+		/// <returns> offset gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
+		/// </returns>
+		public virtual int GetOffsetGap(IFieldable field)
+		{
+			return field.IsTokenized ? 1 : 0;
+		}
+
+		/// <summary>Frees persistent resources used by this Analyzer </summary>
+		public void  Close()
+		{
+		    Dispose();
+		}
+
+        public virtual void Dispose()
+        {
+            Dispose(true);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (isDisposed) return;
+
+            if (disposing)
+            {
+                if (tokenStreams != null)
+                {
+                    tokenStreams.Close();
+                    tokenStreams = null;
+                }
+            }
+            isDisposed = true;
+        }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/BaseCharFilter.cs b/src/core/Analysis/BaseCharFilter.cs
new file mode 100644
index 0000000..b84fce0
--- /dev/null
+++ b/src/core/Analysis/BaseCharFilter.cs
@@ -0,0 +1,105 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+
+    /// <summary>
+    /// * Base utility class for implementing a <see cref="CharFilter" />.
+    /// * You subclass this, and then record mappings by calling
+    /// * <see cref="AddOffCorrectMap" />, and then invoke the correct
+    /// * method to correct an offset.
+    /// </summary>
+    public abstract class BaseCharFilter : CharFilter
+    {
+
+        private int[] offsets;
+        private int[] diffs;
+        private int size = 0;
+
+        protected BaseCharFilter(CharStream @in) : base(@in)
+        {
+        }
+
+        /* Retrieve the corrected offset. */
+        //@Override
+        protected internal override int Correct(int currentOff)
+        {
+            if (offsets == null || currentOff < offsets[0])
+            {
+                return currentOff;
+            }
+
+            int hi = size - 1;
+            if (currentOff >= offsets[hi])
+                return currentOff + diffs[hi];
+
+            int lo = 0;
+            int mid = -1;
+
+            while (hi >= lo)
+            {
+                mid = Number.URShift(lo + hi, 1);
+                if (currentOff < offsets[mid])
+                    hi = mid - 1;
+                else if (currentOff > offsets[mid])
+                    lo = mid + 1;
+                else
+                    return currentOff + diffs[mid];
+            }
+
+            if (currentOff < offsets[mid])
+                return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
+        	return currentOff + diffs[mid];
+        }
+
+        protected int LastCumulativeDiff
+        {
+            get
+            {
+                return offsets == null ? 0 : diffs[size - 1];
+            }
+        }
+
+        [Obsolete("Use LastCumulativeDiff property instead")]
+        protected int GetLastCumulativeDiff()
+        {
+            return LastCumulativeDiff;
+        }
+
+        protected void AddOffCorrectMap(int off, int cumulativeDiff)
+        {
+            if (offsets == null)
+            {
+                offsets = new int[64];
+                diffs = new int[64];
+            }
+            else if (size == offsets.Length)
+            {
+                offsets = ArrayUtil.Grow(offsets);
+                diffs = ArrayUtil.Grow(diffs);
+            }
+
+            offsets[size] = off;
+            diffs[size++] = cumulativeDiff;
+        }
+    }
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/CachingTokenFilter.cs b/src/core/Analysis/CachingTokenFilter.cs
new file mode 100644
index 0000000..c5f7694
--- /dev/null
+++ b/src/core/Analysis/CachingTokenFilter.cs
@@ -0,0 +1,86 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> This class can be used if the token attributes of a TokenStream
+	/// are intended to be consumed more than once. It caches
+	/// all token attribute states locally in a List.
+	/// 
+	/// <p/>CachingTokenFilter implements the optional method
+	/// <see cref="TokenStream.Reset()" />, which repositions the
+	/// stream to the first Token. 
+	/// </summary>
+	public sealed class CachingTokenFilter : TokenFilter
+	{
+        private System.Collections.Generic.LinkedList<State> cache = null;
+		private System.Collections.Generic.IEnumerator<State> iterator = null;
+		private State finalState;
+		
+		public CachingTokenFilter(TokenStream input):base(input)
+		{
+		}
+
+		public override bool IncrementToken()
+		{
+			if (cache == null)
+			{
+				// fill cache lazily
+				cache = new System.Collections.Generic.LinkedList<State>();
+				FillCache();
+				iterator = cache.GetEnumerator();
+			}
+			
+			if (!iterator.MoveNext())
+			{
+				// the cache is exhausted, return false
+				return false;
+			}
+			// Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
+			RestoreState(iterator.Current);
+			return true;
+		}
+		
+		public override void  End()
+		{
+			if (finalState != null)
+			{
+				RestoreState(finalState);
+			}
+		}
+		
+		public override void  Reset()
+		{
+			if (cache != null)
+			{
+				iterator = cache.GetEnumerator();
+			}
+		}
+		
+		private void  FillCache()
+		{
+			while (input.IncrementToken())
+			{
+				cache.AddLast(CaptureState());
+			}
+			// capture final state
+			input.End();
+			finalState = CaptureState();
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/CharArraySet.cs b/src/core/Analysis/CharArraySet.cs
new file mode 100644
index 0000000..e7df0ba
--- /dev/null
+++ b/src/core/Analysis/CharArraySet.cs
@@ -0,0 +1,517 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Linq;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+    /// <summary> A simple class that stores Strings as char[]'s in a
+    /// hash table.  Note that this is not a general purpose
+    /// class.  For example, it cannot remove items from the
+    /// set, nor does it resize its hash table to be smaller,
+    /// etc.  It is designed to be quick to test if a char[]
+    /// is in the set without the necessity of converting it
+    /// to a String first.
+    /// <p/>
+    /// <em>Please note:</em> This class implements <see cref="System.Collections.Generic.ISet{T}"/> but
+    /// does not behave like it should in all cases. The generic type is
+    /// <see cref="System.Collections.Generic.ICollection{T}"/>, because you can add any object to it,
+    /// that has a string representation. The add methods will use
+    /// <see cref="object.ToString()"/> and store the result using a <see cref="char"/>
+    /// buffer. The same behaviour have the <see cref="Contains(object)"/> methods.
+    /// The <see cref="GetEnumerator"/> method returns an <see cref="string"/> IEnumerable.
+    /// For type safety also {@link #stringIterator()} is provided.
+    /// </summary>
+    // TODO: java uses wildcards, .net doesn't have this, easiest way is to 
+    //       make the entire class generic.  Ultimately, though, since this
+    //       works with strings, I can't think of a reason not to just declare
+    //       this as an ISet<string>.
+    public class CharArraySet : ISet<string>
+    {
+        bool _ReadOnly = false;
+        const int INIT_SIZE = 8;
+        char[][] _Entries;
+        int _Count;
+        bool _IgnoreCase;
+        public static CharArraySet EMPTY_SET = UnmodifiableSet(new CharArraySet(0, false));
+
+        private void Init(int startSize, bool ignoreCase)
+        {
+            this._IgnoreCase = ignoreCase;
+            int size = INIT_SIZE;
+            while (startSize + (startSize >> 2) > size)
+                size <<= 1;
+            _Entries = new char[size][];
+        }
+
+        /// <summary>Create set with enough capacity to hold startSize
+        /// terms 
+        /// </summary>
+        public CharArraySet(int startSize, bool ignoreCase)
+        {
+            Init(startSize, ignoreCase);
+        }
+
+        public CharArraySet(IEnumerable<string> c, bool ignoreCase)
+        {
+            Init(c.Count(), ignoreCase);
+            AddItems(c);
+        }
+
+        /// <summary>Create set from a Collection of char[] or String </summary>
+        public CharArraySet(IEnumerable<object> c, bool ignoreCase)
+        {
+            Init(c.Count(), ignoreCase);
+            AddItems(c);
+        }
+
+        private void AddItems<T>(IEnumerable<T> items)
+        {
+            foreach(var item in items)
+            {
+                Add(item.ToString());
+            }
+        }
+
+        /// <summary>Create set from entries </summary>
+        private CharArraySet(char[][] entries, bool ignoreCase, int count)
+        {
+            this._Entries = entries;
+            this._IgnoreCase = ignoreCase;
+            this._Count = count;
+        }
+
+        /// <summary>true if the <c>len</c> chars of <c>text</c> starting at <c>off</c>
+        /// are in the set 
+        /// </summary>
+        public virtual bool Contains(char[] text, int off, int len)
+        {
+            return _Entries[GetSlot(text, off, len)] != null;
+        }
+
+        public virtual bool Contains(string text)
+        {
+            return _Entries[GetSlot(text)] != null;
+        }
+
+
+        private int GetSlot(char[] text, int off, int len)
+        {
+            int code = GetHashCode(text, off, len);
+            int pos = code & (_Entries.Length - 1);
+            char[] text2 = _Entries[pos];
+            if (text2 != null && !Equals(text, off, len, text2))
+            {
+                int inc = ((code >> 8) + code) | 1;
+                do
+                {
+                    code += inc;
+                    pos = code & (_Entries.Length - 1);
+                    text2 = _Entries[pos];
+                }
+                while (text2 != null && !Equals(text, off, len, text2));
+            }
+            return pos;
+        }
+
+        /// <summary>Returns true if the String is in the set </summary>
+        private int GetSlot(string text)
+        {
+            int code = GetHashCode(text);
+            int pos = code & (_Entries.Length - 1);
+            char[] text2 = _Entries[pos];
+            if (text2 != null && !Equals(text, text2))
+            {
+                int inc = ((code >> 8) + code) | 1;
+                do
+                {
+                    code += inc;
+                    pos = code & (_Entries.Length - 1);
+                    text2 = _Entries[pos];
+                }
+                while (text2 != null && !Equals(text, text2));
+            }
+            return pos;
+        }
+
+        public bool Add(string text)
+        {
+            if (_ReadOnly) throw new NotSupportedException();
+            return Add(text.ToCharArray());
+        }
+
+        /// <summary>Add this char[] directly to the set.
+        /// If ignoreCase is true for this Set, the text array will be directly modified.
+        /// The user should never modify this text array after calling this method.
+        /// </summary>
+        public bool Add(char[] text)
+        {
+            if (_ReadOnly) throw new NotSupportedException();
+
+            if (_IgnoreCase)
+                for (int i = 0; i < text.Length; i++)
+                    text[i] = Char.ToLower(text[i]);
+            int slot = GetSlot(text, 0, text.Length);
+            if (_Entries[slot] != null)
+                return false;
+            _Entries[slot] = text;
+            _Count++;
+
+            if (_Count + (_Count >> 2) > _Entries.Length)
+            {
+                Rehash();
+            }
+
+            return true;
+        }
+
+        private bool Equals(char[] text1, int off, int len, char[] text2)
+        {
+            if (len != text2.Length)
+                return false;
+            if (_IgnoreCase)
+            {
+                for (int i = 0; i < len; i++)
+                {
+                    if (char.ToLower(text1[off + i]) != text2[i])
+                        return false;
+                }
+            }
+            else
+            {
+                for (int i = 0; i < len; i++)
+                {
+                    if (text1[off + i] != text2[i])
+                        return false;
+                }
+            }
+            return true;
+        }
+
+        private bool Equals(string text1, char[] text2)
+        {
+            int len = text1.Length;
+            if (len != text2.Length)
+                return false;
+            if (_IgnoreCase)
+            {
+                for (int i = 0; i < len; i++)
+                {
+                    if (char.ToLower(text1[i]) != text2[i])
+                        return false;
+                }
+            }
+            else
+            {
+                for (int i = 0; i < len; i++)
+                {
+                    if (text1[i] != text2[i])
+                        return false;
+                }
+            }
+            return true;
+        }
+
+        private void Rehash()
+        {
+            int newSize = 2 * _Entries.Length;
+            char[][] oldEntries = _Entries;
+            _Entries = new char[newSize][];
+
+            for (int i = 0; i < oldEntries.Length; i++)
+            {
+                char[] text = oldEntries[i];
+                if (text != null)
+                {
+                    // todo: could be faster... no need to compare strings on collision
+                    _Entries[GetSlot(text, 0, text.Length)] = text;
+                }
+            }
+        }
+
+        private int GetHashCode(char[] text, int offset, int len)
+        {
+            int code = 0;
+            int stop = offset + len;
+            if (_IgnoreCase)
+            {
+                for (int i = offset; i < stop; i++)
+                {
+                    code = code * 31 + char.ToLower(text[i]);
+                }
+            }
+            else
+            {
+                for (int i = offset; i < stop; i++)
+                {
+                    code = code * 31 + text[i];
+                }
+            }
+            return code;
+        }
+
+        private int GetHashCode(string text)
+        {
+            int code = 0;
+            int len = text.Length;
+            if (_IgnoreCase)
+            {
+                for (int i = 0; i < len; i++)
+                {
+                    code = code * 31 + char.ToLower(text[i]);
+                }
+            }
+            else
+            {
+                for (int i = 0; i < len; i++)
+                {
+                    code = code * 31 + text[i];
+                }
+            }
+            return code;
+        }
+
+        public int Count
+        {
+            get { return _Count; }
+        }
+
+        public bool IsEmpty
+        {
+            get { return _Count == 0; }
+        }
+
+        public bool Contains(object item)
+        {
+        	var text = item as char[];
+        	return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString());
+        }
+
+        public bool Add(object item)
+        {
+            return Add(item.ToString());
+        }
+
+        void ICollection<string>.Add(string item)
+        {
+            this.Add(item);
+        }
+
+        /// <summary>
+        /// Returns an unmodifiable <see cref="CharArraySet"/>.  This allows to provide
+        /// unmodifiable views of internal sets for "read-only" use
+        /// </summary>
+        /// <param name="set">A Set for which the unmodifiable set it returns.</param>
+        /// <returns>A new unmodifiable <see cref="CharArraySet"/></returns>
+        /// <throws>ArgumentNullException of the given set is <c>null</c></throws>
+        public static CharArraySet UnmodifiableSet(CharArraySet set)
+        {
+            if(set == null)
+                throw new ArgumentNullException("Given set is null");
+            if (set == EMPTY_SET)
+                return EMPTY_SET;
+            if (set._ReadOnly)
+                return set;
+
+            var newSet = new CharArraySet(set._Entries, set._IgnoreCase, set.Count) {IsReadOnly = true};
+            return newSet;
+        }
+
+        /// <summary>
+        /// returns a copy of the given set as a <see cref="CharArraySet"/>.  If the given set
+        /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.
+        /// </summary>
+        /// <param name="set">A set to copy</param>
+        /// <returns>a copy of the given set as a <see cref="CharArraySet"/>.  If the given set
+        /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.</returns>
+        public static CharArraySet Copy<T>(ISet<T> set)
+        {
+            if (set == null)
+                throw new ArgumentNullException("set", "Given set is null!");
+            if (set == EMPTY_SET)
+                return EMPTY_SET;
+            bool ignoreCase = set is CharArraySet && ((CharArraySet)set)._IgnoreCase;
+            var arrSet = new CharArraySet(set.Count, ignoreCase);
+            arrSet.AddItems(set);
+            return arrSet;
+        }
+
+        public void Clear()
+        {
+            throw new NotSupportedException("Remove not supported!");
+        }
+
+        public bool IsReadOnly
+        {
+            get { return _ReadOnly; }
+            private set { _ReadOnly = value; }
+        }
+
+        /// <summary>Adds all of the elements in the specified collection to this collection </summary>
+        public void UnionWith(IEnumerable<string> other)
+        {
+            if (_ReadOnly) throw new NotSupportedException();
+
+            foreach (string s in other)
+            {
+                Add(s.ToCharArray());
+            }
+        }
+
+        /// <summary>Wrapper that calls UnionWith</summary>
+        public void AddAll(IEnumerable<string> coll)
+        {
+            UnionWith(coll);
+        }
+
+        #region Unneeded methods
+        public void RemoveAll(ICollection<string> c)
+        {
+            throw new NotSupportedException();
+        }
+
+        public void RetainAll(ICollection<string> c)
+        {
+            throw new NotSupportedException();
+        }
+
+        void ICollection<string>.CopyTo(string[] array, int arrayIndex)
+        {
+            throw new NotSupportedException();
+        }
+
+        void ISet<string>.IntersectWith(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        void ISet<string>.ExceptWith(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        void ISet<string>.SymmetricExceptWith(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        bool ISet<string>.IsSubsetOf(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        bool ISet<string>.IsSupersetOf(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        bool ISet<string>.IsProperSupersetOf(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        bool ISet<string>.IsProperSubsetOf(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        bool ISet<string>.Overlaps(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        bool ISet<string>.SetEquals(IEnumerable<string> other)
+        {
+            throw new NotSupportedException();
+        }
+
+        bool ICollection<string>.Remove(string item)
+        {
+            throw new NotSupportedException();
+        }
+        #endregion
+
+        /// <summary>
+        /// The IEnumerator&lt;String&gt; for this set.  Strings are constructed on the fly,
+        /// so use <c>nextCharArray</c> for more efficient access
+        /// </summary>
+        public class CharArraySetEnumerator : IEnumerator<string>
+        {
+        	readonly CharArraySet _Creator;
+            int pos = -1;
+            char[] cur;
+
+            protected internal CharArraySetEnumerator(CharArraySet creator)
+            {
+                _Creator = creator;
+            }
+
+            public bool MoveNext()
+            {
+                cur = null;
+                pos++;
+                while (pos < _Creator._Entries.Length && (cur = _Creator._Entries[pos]) == null)
+                    pos++;
+                return cur != null;
+            }
+
+            /// <summary>do not modify the returned char[] </summary>
+            public char[] NextCharArray()
+            {
+                return cur;
+            }
+
+            public string Current
+            {
+                get { return new string(NextCharArray()); }
+            }
+
+            public void Dispose()
+            {
+            }
+
+            object IEnumerator.Current
+            {
+                get { return new string(NextCharArray()); }
+            }
+
+            public void Reset()
+            {
+                throw new NotImplementedException();
+            }
+        }
+
+        public IEnumerator<string> StringEnumerator()
+        {
+            return new CharArraySetEnumerator(this);
+        }
+
+        public IEnumerator<string> GetEnumerator()
+        {
+            return new CharArraySetEnumerator(this);
+        }
+
+        IEnumerator IEnumerable.GetEnumerator()
+        {
+            return GetEnumerator();
+        }
+    }
+
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/CharFilter.cs b/src/core/Analysis/CharFilter.cs
new file mode 100644
index 0000000..039f841
--- /dev/null
+++ b/src/core/Analysis/CharFilter.cs
@@ -0,0 +1,95 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Subclasses of CharFilter can be chained to filter CharStream.
+	/// They can be used as <see cref="System.IO.TextReader" /> with additional offset
+	/// correction. <see cref="Tokenizer" />s will automatically use <see cref="CorrectOffset" />
+	/// if a CharFilter/CharStream subclass is used.
+	/// 
+	/// </summary>
+	/// <version>  $Id$
+	/// 
+	/// </version>
+	public abstract class CharFilter : CharStream
+	{
+        private long currentPosition = -1;
+	    private bool isDisposed;
+		protected internal CharStream input;
+		
+		protected internal CharFilter(CharStream in_Renamed) : base(in_Renamed)
+		{
+			input = in_Renamed;
+		}
+		
+		/// <summary>Subclass may want to override to correct the current offset.</summary>
+		/// <param name="currentOff">current offset</param>
+		/// <returns>corrected offset</returns>
+		protected internal virtual int Correct(int currentOff)
+        {
+			return currentOff;
+		}
+		
+		/// <summary> Chains the corrected offset through the input
+		/// CharFilter.
+		/// </summary>
+		public override int CorrectOffset(int currentOff)
+		{
+			return input.CorrectOffset(Correct(currentOff));
+		}
+
+        protected override void Dispose(bool disposing)
+        {
+            if (isDisposed) return;
+
+            if (disposing)
+            {
+                if (input != null)
+                {
+                    input.Close();
+                }
+            }
+
+            input = null;
+            isDisposed = true;
+            base.Dispose(disposing);
+        }
+		
+		public override int Read(System.Char[] cbuf, int off, int len)
+        {
+			return input.Read(cbuf, off, len);
+		}
+		
+		public bool MarkSupported()
+        {
+            return input.BaseStream.CanSeek;
+		}
+		
+		public void Mark(int readAheadLimit)
+        {
+            currentPosition = input.BaseStream.Position;
+			input.BaseStream.Position = readAheadLimit;
+		}
+		
+		public void Reset()
+        {
+			input.BaseStream.Position = currentPosition;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/CharReader.cs b/src/core/Analysis/CharReader.cs
new file mode 100644
index 0000000..2120bd4
--- /dev/null
+++ b/src/core/Analysis/CharReader.cs
@@ -0,0 +1,94 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> CharReader is a Reader wrapper. It reads chars from
+	/// Reader and outputs <see cref="CharStream" />, defining an
+	/// identify function <see cref="CorrectOffset" /> method that
+	/// simply returns the provided offset.
+	/// </summary>
+	public sealed class CharReader:CharStream
+	{
+        private long currentPosition = -1;
+
+	    private bool isDisposed;
+
+		internal System.IO.StreamReader input;
+		
+		public static CharStream Get(System.IO.TextReader input)
+		{
+			var charStream = input as CharStream;
+			if (charStream != null)
+				return charStream;
+			
+			// {{Aroush-2.9}} isn't there a better (faster) way to do this?
+			var theString = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(input.ReadToEnd()));
+			return new CharReader(new System.IO.StreamReader(theString));
+			//return input is CharStream?(CharStream) input:new CharReader(input);
+		}
+		
+		private CharReader(System.IO.StreamReader in_Renamed) : base(in_Renamed)
+		{
+			input = in_Renamed;
+		}
+		
+		public override int CorrectOffset(int currentOff)
+		{
+			return currentOff;
+		}
+
+        protected override void Dispose(bool disposing)
+        {
+            if (isDisposed) return;
+
+            if (disposing)
+            {
+                if (input != null)
+                {
+                    input.Close();
+                }
+            }
+
+            input = null;
+            isDisposed = true;
+            base.Dispose(disposing);
+        }
+		
+		public  override int Read(System.Char[] cbuf, int off, int len)
+		{
+			return input.Read(cbuf, off, len);
+		}
+		
+		public bool MarkSupported()
+		{
+			return input.BaseStream.CanSeek;
+		}
+		
+		public void  Mark(int readAheadLimit)
+		{
+			currentPosition = input.BaseStream.Position;
+			input.BaseStream.Position = readAheadLimit;
+        }
+		
+		public void  Reset()
+		{
+			input.BaseStream.Position = currentPosition;
+        }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/CharStream.cs b/src/core/Analysis/CharStream.cs
new file mode 100644
index 0000000..0b36fe2
--- /dev/null
+++ b/src/core/Analysis/CharStream.cs
@@ -0,0 +1,45 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> CharStream adds <see cref="CorrectOffset" />
+	/// functionality over <see cref="System.IO.TextReader" />.  All Tokenizers accept a
+	/// CharStream instead of <see cref="System.IO.TextReader" /> as input, which enables
+	/// arbitrary character based filtering before tokenization. 
+	/// The <see cref="CorrectOffset" /> method fixed offsets to account for
+	/// removal or insertion of characters, so that the offsets
+	/// reported in the tokens match the character offsets of the
+	/// original Reader.
+    /// </summary>
+	public abstract class CharStream : System.IO.StreamReader
+	{
+	    protected CharStream(System.IO.StreamReader reader) : base(reader.BaseStream)
+        {
+        }
+		
+		/// <summary> Called by CharFilter(s) and Tokenizer to correct token offset.
+		/// 
+		/// </summary>
+		/// <param name="currentOff">offset as seen in the output
+		/// </param>
+		/// <returns> corrected offset based on the input
+		/// </returns>
+		public abstract int CorrectOffset(int currentOff);
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/CharTokenizer.cs b/src/core/Analysis/CharTokenizer.cs
new file mode 100644
index 0000000..22423ec
--- /dev/null
+++ b/src/core/Analysis/CharTokenizer.cs
@@ -0,0 +1,135 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>An abstract base class for simple, character-oriented tokenizers.</summary>
+	public abstract class CharTokenizer:Tokenizer
+	{
+	    protected CharTokenizer(System.IO.TextReader input):base(input)
+		{
+			offsetAtt = AddAttribute<IOffsetAttribute>();
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+
+	    protected CharTokenizer(AttributeSource source, System.IO.TextReader input):base(source, input)
+		{
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+
+	    protected CharTokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory, input)
+		{
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+		
+		private int offset = 0, bufferIndex = 0, dataLen = 0;
+		private const int MAX_WORD_LEN = 255;
+		private const int IO_BUFFER_SIZE = 4096;
+		private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
+		
+		private readonly ITermAttribute termAtt;
+		private readonly IOffsetAttribute offsetAtt;
+		
+		/// <summary>Returns true iff a character should be included in a token.  This
+		/// tokenizer generates as tokens adjacent sequences of characters which
+		/// satisfy this predicate.  Characters for which this is false are used to
+		/// define token boundaries and are not included in tokens. 
+		/// </summary>
+		protected internal abstract bool IsTokenChar(char c);
+		
+		/// <summary>Called on each token character to normalize it before it is added to the
+		/// token.  The default implementation does nothing.  Subclasses may use this
+		/// to, e.g., lowercase tokens. 
+		/// </summary>
+		protected internal virtual char Normalize(char c)
+		{
+			return c;
+		}
+		
+		public override bool IncrementToken()
+		{
+			ClearAttributes();
+			int length = 0;
+			int start = bufferIndex;
+			char[] buffer = termAtt.TermBuffer();
+			while (true)
+			{
+				
+				if (bufferIndex >= dataLen)
+				{
+					offset += dataLen;
+					dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
+					if (dataLen <= 0)
+					{
+						dataLen = 0; // so next offset += dataLen won't decrement offset
+						if (length > 0)
+							break;
+						return false;
+					}
+					bufferIndex = 0;
+				}
+				
+				char c = ioBuffer[bufferIndex++];
+				
+				if (IsTokenChar(c))
+				{
+					// if it's a token char
+					
+					if (length == 0)
+					// start of token
+						start = offset + bufferIndex - 1;
+					else if (length == buffer.Length)
+						buffer = termAtt.ResizeTermBuffer(1 + length);
+					
+					buffer[length++] = Normalize(c); // buffer it, normalized
+					
+					if (length == MAX_WORD_LEN)
+					// buffer overflow!
+						break;
+				}
+				else if (length > 0)
+				// at non-Letter w/ chars
+					break; // return 'em
+			}
+			
+			termAtt.SetTermLength(length);
+			offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
+			return true;
+		}
+		
+		public override void  End()
+		{
+			// set final offset
+			int finalOffset = CorrectOffset(offset);
+			offsetAtt.SetOffset(finalOffset, finalOffset);
+		}
+		
+		public override void  Reset(System.IO.TextReader input)
+		{
+			base.Reset(input);
+			bufferIndex = 0;
+			offset = 0;
+			dataLen = 0;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/ISOLatin1AccentFilter.cs b/src/core/Analysis/ISOLatin1AccentFilter.cs
new file mode 100644
index 0000000..5fd839e
--- /dev/null
+++ b/src/core/Analysis/ISOLatin1AccentFilter.cs
@@ -0,0 +1,344 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> A filter that replaces accented characters in the ISO Latin 1 character set 
+	/// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
+	/// <p/>
+	/// For instance, '&#192;' will be replaced by 'a'.
+	/// <p/>
+	/// 
+	/// </summary>
+	/// <deprecated> If you build a new index, use <see cref="ASCIIFoldingFilter"/>
+	/// which covers a superset of Latin 1.
+	/// This class is included for use with existing indexes and will be removed
+	/// in a future release (possible Lucene 4.0)
+	/// </deprecated>
+    [Obsolete("If you build a new index, use ASCIIFoldingFilter which covers a superset of Latin 1.  This class is included for use with existing indexes and will be removed in a future release (possible Lucene 4.0).")]
+	public class ISOLatin1AccentFilter : TokenFilter
+	{
+		public ISOLatin1AccentFilter(TokenStream input):base(input)
+		{
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+		
+		private char[] output = new char[256];
+		private int outputPos;
+		private readonly ITermAttribute termAtt;
+		
+		public override bool IncrementToken()
+		{
+			if (input.IncrementToken())
+			{
+				char[] buffer = termAtt.TermBuffer();
+				int length = termAtt.TermLength();
+				// If no characters actually require rewriting then we
+				// just return token as-is:
+				for (int i = 0; i < length; i++)
+				{
+					char c = buffer[i];
+					if (c >= '\u00c0' && c <= '\uFB06')
+					{
+						RemoveAccents(buffer, length);
+						termAtt.SetTermBuffer(output, 0, outputPos);
+						break;
+					}
+				}
+				return true;
+			}
+			return false;
+		}
+
+		/// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
+		public void  RemoveAccents(char[] input, int length)
+		{
+			
+			// Worst-case length required:
+			int maxSizeNeeded = 2 * length;
+			
+			int size = output.Length;
+			while (size < maxSizeNeeded)
+				size *= 2;
+			
+			if (size != output.Length)
+				output = new char[size];
+			
+			outputPos = 0;
+			
+			int pos = 0;
+			
+			for (int i = 0; i < length; i++, pos++)
+			{
+				char c = input[pos];
+				
+				// Quick test: if it's not in range then just keep
+				// current character
+				if (c < '\u00c0' || c > '\uFB06')
+					output[outputPos++] = c;
+				else
+				{
+					switch (c)
+					{
+						
+						case '\u00C0': 
+						// Ã€
+						case '\u00C1': 
+						// ï¿½?
+						case '\u00C2': 
+						// Ã‚
+						case '\u00C3': 
+						// Ãƒ
+						case '\u00C4': 
+						// Ã„
+						case '\u00C5':  // Ã…
+							output[outputPos++] = 'A';
+							break;
+						
+						case '\u00C6':  // Ã†
+							output[outputPos++] = 'A';
+							output[outputPos++] = 'E';
+							break;
+						
+						case '\u00C7':  // Ã‡
+							output[outputPos++] = 'C';
+							break;
+						
+						case '\u00C8': 
+						// Ãˆ
+						case '\u00C9': 
+						// Ã‰
+						case '\u00CA': 
+						// ÃŠ
+						case '\u00CB':  // Ã‹
+							output[outputPos++] = 'E';
+							break;
+						
+						case '\u00CC': 
+						// ÃŒ
+						case '\u00CD': 
+						// ï¿½?
+						case '\u00CE': 
+						// ÃŽ
+						case '\u00CF':  // ï¿½?
+							output[outputPos++] = 'I';
+							break;
+						
+						case '\u0132':  // Ä²
+							output[outputPos++] = 'I';
+							output[outputPos++] = 'J';
+							break;
+						
+						case '\u00D0':  // ï¿½?
+							output[outputPos++] = 'D';
+							break;
+						
+						case '\u00D1':  // Ã‘
+							output[outputPos++] = 'N';
+							break;
+						
+						case '\u00D2': 
+						// Ã’
+						case '\u00D3': 
+						// Ã“
+						case '\u00D4': 
+						// Ã”
+						case '\u00D5': 
+						// Ã•
+						case '\u00D6': 
+						// Ã–
+						case '\u00D8':  // Ã˜
+							output[outputPos++] = 'O';
+							break;
+						
+						case '\u0152':  // Å’
+							output[outputPos++] = 'O';
+							output[outputPos++] = 'E';
+							break;
+						
+						case '\u00DE':  // Ãž
+							output[outputPos++] = 'T';
+							output[outputPos++] = 'H';
+							break;
+						
+						case '\u00D9': 
+						// Ã™
+						case '\u00DA': 
+						// Ãš
+						case '\u00DB': 
+						// Ã›
+						case '\u00DC':  // Ãœ
+							output[outputPos++] = 'U';
+							break;
+						
+						case '\u00DD': 
+						// ï¿½?
+						case '\u0178':  // Å¸
+							output[outputPos++] = 'Y';
+							break;
+						
+						case '\u00E0': 
+						// Ã 
+						case '\u00E1': 
+						// Ã¡
+						case '\u00E2': 
+						// Ã¢
+						case '\u00E3': 
+						// Ã£
+						case '\u00E4': 
+						// Ã¤
+						case '\u00E5':  // Ã¥
+							output[outputPos++] = 'a';
+							break;
+						
+						case '\u00E6':  // Ã¦
+							output[outputPos++] = 'a';
+							output[outputPos++] = 'e';
+							break;
+						
+						case '\u00E7':  // Ã§
+							output[outputPos++] = 'c';
+							break;
+						
+						case '\u00E8': 
+						// Ã¨
+						case '\u00E9': 
+						// Ã©
+						case '\u00EA': 
+						// Ãª
+						case '\u00EB':  // Ã«
+							output[outputPos++] = 'e';
+							break;
+						
+						case '\u00EC': 
+						// Ã¬
+						case '\u00ED': 
+						// Ã
+						case '\u00EE': 
+						// Ã®
+						case '\u00EF':  // Ã¯
+							output[outputPos++] = 'i';
+							break;
+						
+						case '\u0133':  // Ä³
+							output[outputPos++] = 'i';
+							output[outputPos++] = 'j';
+							break;
+						
+						case '\u00F0':  // Ã°
+							output[outputPos++] = 'd';
+							break;
+						
+						case '\u00F1':  // Ã±
+							output[outputPos++] = 'n';
+							break;
+						
+						case '\u00F2': 
+						// Ã²
+						case '\u00F3': 
+						// Ã³
+						case '\u00F4': 
+						// Ã´
+						case '\u00F5': 
+						// Ãµ
+						case '\u00F6': 
+						// Ã¶
+						case '\u00F8':  // Ã¸
+							output[outputPos++] = 'o';
+							break;
+						
+						case '\u0153':  // Å“
+							output[outputPos++] = 'o';
+							output[outputPos++] = 'e';
+							break;
+						
+						case '\u00DF':  // ÃŸ
+							output[outputPos++] = 's';
+							output[outputPos++] = 's';
+							break;
+						
+						case '\u00FE':  // Ã¾
+							output[outputPos++] = 't';
+							output[outputPos++] = 'h';
+							break;
+						
+						case '\u00F9': 
+						// Ã¹
+						case '\u00FA': 
+						// Ãº
+						case '\u00FB': 
+						// Ã»
+						case '\u00FC':  // Ã¼
+							output[outputPos++] = 'u';
+							break;
+						
+						case '\u00FD': 
+						// Ã½
+						case '\u00FF':  // Ã¿
+							output[outputPos++] = 'y';
+							break;
+						
+						case '\uFB00':  // ï¬€
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'f';
+							break;
+						
+						case '\uFB01':  // ï¿½?
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'i';
+							break;
+						
+						case '\uFB02':  // ï¬‚
+							output[outputPos++] = 'f';
+							output[outputPos++] = 'l';
+							break;
+							// following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
+							//        case '\uFB03': // ï¬ƒ
+							//            output[outputPos++] = 'f';
+							//            output[outputPos++] = 'f';
+							//            output[outputPos++] = 'i';
+							//            break;
+							//        case '\uFB04': // ï¬„
+							//            output[outputPos++] = 'f';
+							//            output[outputPos++] = 'f';
+							//            output[outputPos++] = 'l';
+							//            break;
+						
+						case '\uFB05':  // ï¬…
+							output[outputPos++] = 'f';
+							output[outputPos++] = 't';
+							break;
+						
+						case '\uFB06':  // ï¬†
+							output[outputPos++] = 's';
+							output[outputPos++] = 't';
+							break;
+						
+						default: 
+							output[outputPos++] = c;
+							break;
+						
+					}
+				}
+			}
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/KeywordAnalyzer.cs b/src/core/Analysis/KeywordAnalyzer.cs
new file mode 100644
index 0000000..116babb
--- /dev/null
+++ b/src/core/Analysis/KeywordAnalyzer.cs
@@ -0,0 +1,54 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> "Tokenizes" the entire stream as a single token. This is useful
+	/// for data like zip codes, ids, and some product names.
+	/// </summary>
+	public class KeywordAnalyzer:Analyzer
+	{
+		public KeywordAnalyzer()
+		{
+            SetOverridesTokenStreamMethod<KeywordAnalyzer>();
+		}
+		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			return new KeywordTokenizer(reader);
+		}
+		public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			if (overridesTokenStreamMethod)
+			{
+				// LUCENE-1678: force fallback to tokenStream() if we
+				// have been subclassed and that subclass overrides
+				// tokenStream but not reusableTokenStream
+				return TokenStream(fieldName, reader);
+			}
+			var tokenizer = (Tokenizer) PreviousTokenStream;
+			if (tokenizer == null)
+			{
+				tokenizer = new KeywordTokenizer(reader);
+				PreviousTokenStream = tokenizer;
+			}
+			else
+				tokenizer.Reset(reader);
+			return tokenizer;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/KeywordTokenizer.cs b/src/core/Analysis/KeywordTokenizer.cs
new file mode 100644
index 0000000..f97ff95
--- /dev/null
+++ b/src/core/Analysis/KeywordTokenizer.cs
@@ -0,0 +1,99 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Emits the entire input as a single token.</summary>
+	public sealed class KeywordTokenizer:Tokenizer
+	{
+		
+		private const int DEFAULT_BUFFER_SIZE = 256;
+		
+		private bool done;
+		private int finalOffset;
+		private ITermAttribute termAtt;
+		private IOffsetAttribute offsetAtt;
+		
+		public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE)
+		{
+		}
+		
+		public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input)
+		{
+			Init(bufferSize);
+		}
+		
+		public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input)
+		{
+			Init(bufferSize);
+		}
+		
+		public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input)
+		{
+			Init(bufferSize);
+		}
+		
+		private void  Init(int bufferSize)
+		{
+			this.done = false;
+            termAtt = AddAttribute<ITermAttribute>();
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+			termAtt.ResizeTermBuffer(bufferSize);
+		}
+		
+		public override bool IncrementToken()
+		{
+			if (!done)
+			{
+				ClearAttributes();
+				done = true;
+				int upto = 0;
+				char[] buffer = termAtt.TermBuffer();
+				while (true)
+				{
+					int length = input.Read(buffer, upto, buffer.Length - upto);
+					if (length == 0)
+						break;
+					upto += length;
+					if (upto == buffer.Length)
+						buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
+				}
+				termAtt.SetTermLength(upto);
+				finalOffset = CorrectOffset(upto);
+				offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
+				return true;
+			}
+			return false;
+		}
+		
+		public override void  End()
+		{
+			// set final offset 
+			offsetAtt.SetOffset(finalOffset, finalOffset);
+		}
+		
+		public override void  Reset(System.IO.TextReader input)
+		{
+			base.Reset(input);
+			this.done = false;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/LengthFilter.cs b/src/core/Analysis/LengthFilter.cs
new file mode 100644
index 0000000..c4f60ad
--- /dev/null
+++ b/src/core/Analysis/LengthFilter.cs
@@ -0,0 +1,60 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>Removes words that are too long or too short from the stream.</summary>
+	public sealed class LengthFilter:TokenFilter
+	{
+		
+		internal int min;
+		internal int max;
+		
+		private readonly ITermAttribute termAtt;
+		
+		/// <summary> Build a filter that removes words that are too long or too
+		/// short from the text.
+		/// </summary>
+		public LengthFilter(TokenStream in_Renamed, int min, int max)
+            : base(in_Renamed)
+		{
+			this.min = min;
+			this.max = max;
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+		
+		/// <summary> Returns the next input Token whose term() is the right len</summary>
+		public override bool IncrementToken()
+		{
+			// return the first non-stop word found
+			while (input.IncrementToken())
+			{
+				var len = termAtt.TermLength();
+				if (len >= min && len <= max)
+				{
+					return true;
+				}
+				// note: else we ignore it but should we index each part of it?
+			}
+			// reached EOS -- return false
+			return false;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/LetterTokenizer.cs b/src/core/Analysis/LetterTokenizer.cs
new file mode 100644
index 0000000..77629a8
--- /dev/null
+++ b/src/core/Analysis/LetterTokenizer.cs
@@ -0,0 +1,57 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>A LetterTokenizer is a tokenizer that divides text at non-letters.  That's
+	/// to say, it defines tokens as maximal strings of adjacent letters, as defined
+	/// by java.lang.Character.isLetter() predicate.
+	/// Note: this does a decent job for most European languages, but does a terrible
+	/// job for some Asian languages, where words are not separated by spaces. 
+	/// </summary>
+	
+	public class LetterTokenizer:CharTokenizer
+	{
+		/// <summary>Construct a new LetterTokenizer. </summary>
+		public LetterTokenizer(System.IO.TextReader @in):base(@in)
+		{
+		}
+		
+		/// <summary>Construct a new LetterTokenizer using a given <see cref="AttributeSource" />. </summary>
+		public LetterTokenizer(AttributeSource source, System.IO.TextReader @in)
+			: base(source, @in)
+		{
+		}
+		
+		/// <summary>Construct a new LetterTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+		public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+			: base(factory, @in)
+		{
+		}
+		
+		/// <summary>Collects only characters which satisfy
+		/// <see cref="char.IsLetter(char)" />.
+		/// </summary>
+		protected internal override bool IsTokenChar(char c)
+		{
+			return System.Char.IsLetter(c);
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/LowerCaseFilter.cs b/src/core/Analysis/LowerCaseFilter.cs
new file mode 100644
index 0000000..cad0197
--- /dev/null
+++ b/src/core/Analysis/LowerCaseFilter.cs
@@ -0,0 +1,49 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>Normalizes token text to lower case.</summary>
+	public sealed class LowerCaseFilter:TokenFilter
+	{
+		public LowerCaseFilter(TokenStream @in)
+			: base(@in)
+		{
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+		
+		private readonly ITermAttribute termAtt;
+		
+		public override bool IncrementToken()
+		{
+			if (input.IncrementToken())
+			{
+				
+				char[] buffer = termAtt.TermBuffer();
+				int length = termAtt.TermLength();
+				for (int i = 0; i < length; i++)
+					buffer[i] = System.Char.ToLower(buffer[i]);
+				
+				return true;
+			}
+			return false;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/LowerCaseTokenizer.cs b/src/core/Analysis/LowerCaseTokenizer.cs
new file mode 100644
index 0000000..4cea217
--- /dev/null
+++ b/src/core/Analysis/LowerCaseTokenizer.cs
@@ -0,0 +1,60 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> LowerCaseTokenizer performs the function of LetterTokenizer
+	/// and LowerCaseFilter together.  It divides text at non-letters and converts
+	/// them to lower case.  While it is functionally equivalent to the combination
+	/// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+	/// to doing the two tasks at once, hence this (redundant) implementation.
+	/// <p/>
+	/// Note: this does a decent job for most European languages, but does a terrible
+	/// job for some Asian languages, where words are not separated by spaces.
+	/// </summary>
+	public sealed class LowerCaseTokenizer:LetterTokenizer
+	{
+		/// <summary>Construct a new LowerCaseTokenizer. </summary>
+		public LowerCaseTokenizer(System.IO.TextReader @in)
+			: base(@in)
+		{
+		}
+		
+		/// <summary>Construct a new LowerCaseTokenizer using a given <see cref="AttributeSource" />. </summary>
+		public LowerCaseTokenizer(AttributeSource source, System.IO.TextReader @in)
+			: base(source, @in)
+		{
+		}
+		
+		/// <summary>Construct a new LowerCaseTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+		public LowerCaseTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+			: base(factory, @in)
+		{
+		}
+		
+		/// <summary>Converts char to lower case
+		/// <see cref="char.ToLower(char)" />.
+		/// </summary>
+		protected internal override char Normalize(char c)
+		{
+			return System.Char.ToLower(c);
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/MappingCharFilter.cs b/src/core/Analysis/MappingCharFilter.cs
new file mode 100644
index 0000000..9705719
--- /dev/null
+++ b/src/core/Analysis/MappingCharFilter.cs
@@ -0,0 +1,166 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Simplistic <see cref="CharFilter" /> that applies the mappings
+	/// contained in a <see cref="NormalizeCharMap" /> to the character
+	/// stream, and correcting the resulting changes to the
+	/// offsets.
+	/// </summary>
+	public class MappingCharFilter : BaseCharFilter
+	{
+		private readonly NormalizeCharMap normMap;
+		private LinkedList<char> buffer;
+		private System.String replacement;
+		private int charPointer;
+		private int nextCharCounter;
+		
+		/// Default constructor that takes a <see cref="CharStream" />.
+		public MappingCharFilter(NormalizeCharMap normMap, CharStream @in)
+			: base(@in)
+		{
+			this.normMap = normMap;
+		}
+		
+		/// Easy-use constructor that takes a <see cref="System.IO.TextReader" />.
+		public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in)
+			: base(CharReader.Get(@in))
+		{
+			this.normMap = normMap;
+		}
+		
+		public  override int Read()
+		{
+			while (true)
+			{
+				if (replacement != null && charPointer < replacement.Length)
+				{
+					return replacement[charPointer++];
+				}
+				
+				int firstChar = NextChar();
+				if (firstChar == - 1)
+					return - 1;
+			    NormalizeCharMap nm = normMap.submap != null
+			                              ? normMap.submap[(char) firstChar]
+			                              : null;
+				if (nm == null)
+					return firstChar;
+				NormalizeCharMap result = Match(nm);
+				if (result == null)
+					return firstChar;
+				replacement = result.normStr;
+				charPointer = 0;
+				if (result.diff != 0)
+				{
+					int prevCumulativeDiff = LastCumulativeDiff;
+					if (result.diff < 0)
+					{
+						for (int i = 0; i < - result.diff; i++)
+							AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
+					}
+					else
+					{
+						AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
+					}
+				}
+			}
+		}
+		
+		private int NextChar()
+		{
+			nextCharCounter++;
+			if (buffer != null && buffer.Count != 0)
+			{
+				char tempObject = buffer.First.Value;
+				buffer.RemoveFirst();
+				return (tempObject);
+			}
+			return input.Read();
+		}
+		
+		private void  PushChar(int c)
+		{
+			nextCharCounter--;
+			if (buffer == null)
+			{
+				buffer = new LinkedList<char>();
+			}
+			buffer.AddFirst((char)c);
+		}
+		
+		private void  PushLastChar(int c)
+		{
+			if (buffer == null)
+			{
+                buffer = new LinkedList<char>();
+			}
+			buffer.AddLast((char)c);
+		}
+		
+		private NormalizeCharMap Match(NormalizeCharMap map)
+		{
+			NormalizeCharMap result = null;
+			if (map.submap != null)
+			{
+				int chr = NextChar();
+				if (chr != - 1)
+				{
+					NormalizeCharMap subMap = map.submap[(char)chr];
+					if (subMap != null)
+					{
+						result = Match(subMap);
+					}
+					if (result == null)
+					{
+						PushChar(chr);
+					}
+				}
+			}
+			if (result == null && map.normStr != null)
+			{
+				result = map;
+			}
+			return result;
+		}
+		
+		public  override int Read(System.Char[] cbuf, int off, int len)
+		{
+			var tmp = new char[len];
+			int l = input.Read(tmp, 0, len);
+			if (l != 0)
+			{
+				for (int i = 0; i < l; i++)
+					PushLastChar(tmp[i]);
+			}
+			l = 0;
+			for (int i = off; i < off + len; i++)
+			{
+				int c = Read();
+				if (c == - 1)
+					break;
+				cbuf[i] = (char) c;
+				l++;
+			}
+			return l == 0?- 1:l;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/NormalizeCharMap.cs b/src/core/Analysis/NormalizeCharMap.cs
new file mode 100644
index 0000000..7fd520c
--- /dev/null
+++ b/src/core/Analysis/NormalizeCharMap.cs
@@ -0,0 +1,68 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Holds a map of String input to String output, to be used
+	/// with <see cref="MappingCharFilter" />.
+	/// </summary>
+	public class NormalizeCharMap
+	{
+		internal System.Collections.Generic.IDictionary<char, NormalizeCharMap> submap;
+		internal System.String normStr;
+		internal int diff;
+		
+		/// <summary>Records a replacement to be applied to the inputs
+		/// stream.  Whenever <c>singleMatch</c> occurs in
+		/// the input, it will be replaced with
+		/// <c>replacement</c>.
+		/// 
+		/// </summary>
+		/// <param name="singleMatch">input String to be replaced
+		/// </param>
+		/// <param name="replacement">output String
+		/// </param>
+		public virtual void  Add(System.String singleMatch, System.String replacement)
+		{
+			NormalizeCharMap currMap = this;
+			for (var i = 0; i < singleMatch.Length; i++)
+			{
+				char c = singleMatch[i];
+				if (currMap.submap == null)
+				{
+					currMap.submap = new HashMap<char, NormalizeCharMap>(1);
+				}
+				var map = currMap.submap[c];
+				if (map == null)
+				{
+					map = new NormalizeCharMap();
+					currMap.submap[c] = map;
+				}
+				currMap = map;
+			}
+			if (currMap.normStr != null)
+			{
+				throw new System.SystemException("MappingCharFilter: there is already a mapping for " + singleMatch);
+			}
+			currMap.normStr = replacement;
+			currMap.diff = singleMatch.Length - replacement.Length;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/NumericTokenStream.cs b/src/core/Analysis/NumericTokenStream.cs
new file mode 100644
index 0000000..90b6e72
--- /dev/null
+++ b/src/core/Analysis/NumericTokenStream.cs
@@ -0,0 +1,270 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Search;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using NumericField = Lucene.Net.Documents.NumericField;
+// javadocs
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> <b>Expert:</b> This class provides a <see cref="TokenStream" />
+	/// for indexing numeric values that can be used by <see cref="NumericRangeQuery{T}" />
+    /// or <see cref="NumericRangeFilter{T}" />.
+	/// 
+	/// <p/>Note that for simple usage, <see cref="NumericField" /> is
+	/// recommended.  <see cref="NumericField" /> disables norms and
+	/// term freqs, as they are not usually needed during
+	/// searching.  If you need to change these settings, you
+	/// should use this class.
+	/// 
+	/// <p/>See <see cref="NumericField" /> for capabilities of fields
+	/// indexed numerically.<p/>
+	/// 
+	/// <p/>Here's an example usage, for an <c>int</c> field:
+	/// 
+	/// <code>
+	///  Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+	///  field.setOmitNorms(true);
+	///  field.setOmitTermFreqAndPositions(true);
+	///  document.add(field);
+	/// </code>
+	/// 
+	/// <p/>For optimal performance, re-use the TokenStream and Field instance
+	/// for more than one document:
+	/// 
+	/// <code>
+	///  NumericTokenStream stream = new NumericTokenStream(precisionStep);
+	///  Field field = new Field(name, stream);
+	///  field.setOmitNorms(true);
+	///  field.setOmitTermFreqAndPositions(true);
+	///  Document document = new Document();
+	///  document.add(field);
+	/// 
+	///  for(all documents) {
+	///    stream.setIntValue(value)
+	///    writer.addDocument(document);
+	///  }
+	/// </code>
+	/// 
+	/// <p/>This stream is not intended to be used in analyzers;
+	/// it's more for iterating the different precisions during
+	/// indexing a specific numeric value.<p/>
+	/// 
+	/// <p/><b>NOTE</b>: as token streams are only consumed once
+	/// the document is added to the index, if you index more
+	/// than one numeric field, use a separate <c>NumericTokenStream</c>
+	/// instance for each.<p/>
+	/// 
+    /// <p/>See <see cref="NumericRangeQuery{T}" /> for more details on the
+	/// <a href="../search/NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>
+	/// parameter as well as how numeric fields work under the hood.<p/>
+	/// 
+	/// <p/><font color="red"><b>NOTE:</b> This API is experimental and
+	/// might change in incompatible ways in the next release.</font>
+	///   Since 2.9
+	/// </summary>
+	public sealed class NumericTokenStream : TokenStream
+	{
+		private void  InitBlock()
+		{
+            termAtt = AddAttribute<ITermAttribute>();
+            typeAtt = AddAttribute<ITypeAttribute>();
+            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+		}
+		
+		/// <summary>The full precision token gets this token type assigned. </summary>
+		public const System.String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
+		
+		/// <summary>The lower precision tokens gets this token type assigned. </summary>
+		public const System.String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
+		
+		/// <summary> Creates a token stream for numeric values using the default <c>precisionStep</c>
+		/// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The stream is not yet initialized,
+		/// before using set a value using the various set<em>???</em>Value() methods.
+		/// </summary>
+		public NumericTokenStream():this(NumericUtils.PRECISION_STEP_DEFAULT)
+		{
+		}
+		
+		/// <summary> Creates a token stream for numeric values with the specified
+		/// <c>precisionStep</c>. The stream is not yet initialized,
+		/// before using set a value using the various set<em>???</em>Value() methods.
+		/// </summary>
+		public NumericTokenStream(int precisionStep):base()
+		{
+			InitBlock();
+			this.precisionStep = precisionStep;
+			if (precisionStep < 1)
+				throw new System.ArgumentException("precisionStep must be >=1");
+		}
+		
+		/// <summary> Expert: Creates a token stream for numeric values with the specified
+		/// <c>precisionStep</c> using the given <see cref="AttributeSource" />.
+		/// The stream is not yet initialized,
+		/// before using set a value using the various set<em>???</em>Value() methods.
+		/// </summary>
+		public NumericTokenStream(AttributeSource source, int precisionStep):base(source)
+		{
+			InitBlock();
+			this.precisionStep = precisionStep;
+			if (precisionStep < 1)
+				throw new System.ArgumentException("precisionStep must be >=1");
+		}
+		
+		/// <summary> Expert: Creates a token stream for numeric values with the specified
+		/// <c>precisionStep</c> using the given
+		/// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />.
+		/// The stream is not yet initialized,
+		/// before using set a value using the various set<em>???</em>Value() methods.
+		/// </summary>
+		public NumericTokenStream(AttributeFactory factory, int precisionStep):base(factory)
+		{
+			InitBlock();
+			this.precisionStep = precisionStep;
+			if (precisionStep < 1)
+				throw new System.ArgumentException("precisionStep must be >=1");
+		}
+		
+		/// <summary> Initializes the token stream with the supplied <c>long</c> value.</summary>
+		/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+		/// </param>
+		/// <returns> this instance, because of this you can use it the following way:
+		/// <c>new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value))</c>
+		/// </returns>
+		public NumericTokenStream SetLongValue(long value_Renamed)
+		{
+			this.value_Renamed = value_Renamed;
+			valSize = 64;
+			shift = 0;
+			return this;
+		}
+		
+		/// <summary> Initializes the token stream with the supplied <c>int</c> value.</summary>
+		/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+		/// </param>
+		/// <returns> this instance, because of this you can use it the following way:
+		/// <c>new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value))</c>
+		/// </returns>
+		public NumericTokenStream SetIntValue(int value_Renamed)
+		{
+			this.value_Renamed = (long) value_Renamed;
+			valSize = 32;
+			shift = 0;
+			return this;
+		}
+		
+		/// <summary> Initializes the token stream with the supplied <c>double</c> value.</summary>
+		/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+		/// </param>
+		/// <returns> this instance, because of this you can use it the following way:
+		/// <c>new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))</c>
+		/// </returns>
+		public NumericTokenStream SetDoubleValue(double value_Renamed)
+		{
+			this.value_Renamed = NumericUtils.DoubleToSortableLong(value_Renamed);
+			valSize = 64;
+			shift = 0;
+			return this;
+		}
+		
+		/// <summary> Initializes the token stream with the supplied <c>float</c> value.</summary>
+		/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+		/// </param>
+		/// <returns> this instance, because of this you can use it the following way:
+		/// <c>new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value))</c>
+		/// </returns>
+		public NumericTokenStream SetFloatValue(float value_Renamed)
+		{
+			this.value_Renamed = (long) NumericUtils.FloatToSortableInt(value_Renamed);
+			valSize = 32;
+			shift = 0;
+			return this;
+		}
+		
+		// @Override
+		public override void  Reset()
+		{
+			if (valSize == 0)
+				throw new System.SystemException("call set???Value() before usage");
+			shift = 0;
+		}
+
+        protected override void Dispose(bool disposing)
+        {
+            // Do nothing.
+        }
+		
+		// @Override
+		public override bool IncrementToken()
+		{
+			if (valSize == 0)
+				throw new System.SystemException("call set???Value() before usage");
+			if (shift >= valSize)
+				return false;
+			
+			ClearAttributes();
+			char[] buffer;
+			switch (valSize)
+			{
+				
+				case 64: 
+					buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
+					termAtt.SetTermLength(NumericUtils.LongToPrefixCoded(value_Renamed, shift, buffer));
+					break;
+				
+				
+				case 32: 
+					buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_INT);
+					termAtt.SetTermLength(NumericUtils.IntToPrefixCoded((int) value_Renamed, shift, buffer));
+					break;
+				
+				
+				default: 
+					// should not happen
+					throw new System.ArgumentException("valSize must be 32 or 64");
+				
+			}
+			
+			typeAtt.Type = (shift == 0)?TOKEN_TYPE_FULL_PREC:TOKEN_TYPE_LOWER_PREC;
+			posIncrAtt.PositionIncrement = (shift == 0)?1:0;
+			shift += precisionStep;
+			return true;
+		}
+		
+		// @Override
+		public override System.String ToString()
+		{
+			System.Text.StringBuilder sb = new System.Text.StringBuilder("(numeric,valSize=").Append(valSize);
+			sb.Append(",precisionStep=").Append(precisionStep).Append(')');
+			return sb.ToString();
+		}
+		
+		// members
+		private ITermAttribute termAtt;
+		private ITypeAttribute typeAtt;
+		private IPositionIncrementAttribute posIncrAtt;
+		
+		private int shift = 0, valSize = 0; // valSize==0 means not initialized
+		private readonly int precisionStep;
+		
+		private long value_Renamed = 0L;
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/PerFieldAnalyzerWrapper.cs b/src/core/Analysis/PerFieldAnalyzerWrapper.cs
new file mode 100644
index 0000000..b1c43aa
--- /dev/null
+++ b/src/core/Analysis/PerFieldAnalyzerWrapper.cs
@@ -0,0 +1,135 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> This analyzer is used to facilitate scenarios where different
+	/// fields require different analysis techniques.  Use <see cref="AddAnalyzer" />
+	/// to add a non-default analyzer on a field name basis.
+	/// 
+	/// <p/>Example usage:
+	/// 
+	/// <code>
+	/// PerFieldAnalyzerWrapper aWrapper =
+	/// new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+	/// aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+	/// aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+	/// </code>
+	/// 
+	/// <p/>In this example, StandardAnalyzer will be used for all fields except "firstname"
+	/// and "lastname", for which KeywordAnalyzer will be used.
+	/// 
+	/// <p/>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+	/// and query parsing.
+	/// </summary>
+	public class PerFieldAnalyzerWrapper:Analyzer
+	{
+		private readonly Analyzer defaultAnalyzer;
+		private readonly IDictionary<string, Analyzer> analyzerMap = new HashMap<string, Analyzer>();
+		
+		
+		/// <summary> Constructs with default analyzer.
+		/// 
+		/// </summary>
+		/// <param name="defaultAnalyzer">Any fields not specifically
+		/// defined to use a different analyzer will use the one provided here.
+		/// </param>
+		public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer)
+            : this(defaultAnalyzer, null)
+		{
+		}
+		
+		/// <summary> Constructs with default analyzer and a map of analyzers to use for 
+		/// specific fields.
+		/// 
+		/// </summary>
+		/// <param name="defaultAnalyzer">Any fields not specifically
+		/// defined to use a different analyzer will use the one provided here.
+		/// </param>
+		/// <param name="fieldAnalyzers">a Map (String field name to the Analyzer) to be 
+		/// used for those fields 
+		/// </param>
+        public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IEnumerable<KeyValuePair<string, Analyzer>> fieldAnalyzers)
+		{
+			this.defaultAnalyzer = defaultAnalyzer;
+			if (fieldAnalyzers != null)
+			{
+				foreach(var entry in fieldAnalyzers)
+					analyzerMap[entry.Key] = entry.Value;
+			}
+            SetOverridesTokenStreamMethod<PerFieldAnalyzerWrapper>();
+		}
+		
+		
+		/// <summary> Defines an analyzer to use for the specified field.
+		/// 
+		/// </summary>
+		/// <param name="fieldName">field name requiring a non-default analyzer
+		/// </param>
+		/// <param name="analyzer">non-default analyzer to use for field
+		/// </param>
+		public virtual void  AddAnalyzer(System.String fieldName, Analyzer analyzer)
+		{
+			analyzerMap[fieldName] = analyzer;
+		}
+		
+		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+
+			return analyzer.TokenStream(fieldName, reader);
+		}
+		
+		public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
+		{
+			if (overridesTokenStreamMethod)
+			{
+				// LUCENE-1678: force fallback to tokenStream() if we
+				// have been subclassed and that subclass overrides
+				// tokenStream but not reusableTokenStream
+				return TokenStream(fieldName, reader);
+			}
+			var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+
+			return analyzer.ReusableTokenStream(fieldName, reader);
+		}
+		
+		/// <summary>Return the positionIncrementGap from the analyzer assigned to fieldName </summary>
+		public override int GetPositionIncrementGap(string fieldName)
+		{
+			var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+		    return analyzer.GetPositionIncrementGap(fieldName);
+		}
+
+        /// <summary> Return the offsetGap from the analyzer assigned to field </summary>
+        public override int GetOffsetGap(Documents.IFieldable field)
+        {
+            Analyzer analyzer = analyzerMap[field.Name] ?? defaultAnalyzer;
+            return analyzer.GetOffsetGap(field);
+        }
+		
+		public override System.String ToString()
+		{
+			// {{Aroush-2.9}} will 'analyzerMap.ToString()' work in the same way as Java's java.util.HashMap.toString()? 
+			return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")";
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/PorterStemFilter.cs b/src/core/Analysis/PorterStemFilter.cs
new file mode 100644
index 0000000..b7f1dbf
--- /dev/null
+++ b/src/core/Analysis/PorterStemFilter.cs
@@ -0,0 +1,62 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>Transforms the token stream as per the Porter stemming algorithm.
+	/// Note: the input to the stemming filter must already be in lower case,
+	/// so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+	/// down the Tokenizer chain in order for this to work properly!
+	/// <p/>
+	/// To use this filter with other analyzers, you'll want to write an
+	/// Analyzer class that sets up the TokenStream chain as you want it.
+	/// To use this with LowerCaseTokenizer, for example, you'd write an
+	/// analyzer like this:
+	/// <p/>
+	/// <code>
+	/// class MyAnalyzer extends Analyzer {
+	///     public final TokenStream tokenStream(String fieldName, Reader reader) {
+	///          return new PorterStemFilter(new LowerCaseTokenizer(reader));
+	///     }
+	/// }
+	/// </code>
+	/// </summary>
+	public sealed class PorterStemFilter:TokenFilter
+	{
+		private readonly PorterStemmer stemmer;
+		private readonly ITermAttribute termAtt;
+		
+		public PorterStemFilter(TokenStream in_Renamed):base(in_Renamed)
+		{
+			stemmer = new PorterStemmer();
+            termAtt = AddAttribute<ITermAttribute>();
+		}
+		
+		public override bool IncrementToken()
+		{
+			if (!input.IncrementToken())
+				return false;
+			
+			if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength()))
+				termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);
+			return true;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/PorterStemmer.cs b/src/core/Analysis/PorterStemmer.cs
new file mode 100644
index 0000000..f47c5a7
--- /dev/null
+++ b/src/core/Analysis/PorterStemmer.cs
@@ -0,0 +1,746 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+Porter stemmer in Java. The original paper is in
+
+Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+no. 3, pp 130-137,
+
+See also http://www.tartarus.org/~martin/PorterStemmer/index.html
+
+Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+is then out outside the bounds of b.
+
+Similarly,
+
+Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+b[j] is then outside the bounds of b.
+
+Release 3.
+
+[ This version is derived from Release 3, modified by Brian Goetz to
+optimize for fewer object creations.  ]
+*/
+using System;
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> 
+	/// Stemmer, implementing the Porter Stemming Algorithm
+	/// 
+	/// The Stemmer class transforms a word into its root form.  The input
+	/// word can be provided a character at time (by calling add()), or at once
+	/// by calling one of the various stem(something) methods.
+	/// </summary>
+	
+	class PorterStemmer
+	{
+		private char[] b;
+		private int i, j, k, k0;
+		private bool dirty = false;
+		private const int INC = 50; /* unit of size whereby b is increased */
+		private const int EXTRA = 1;
+		
+		public PorterStemmer()
+		{
+			b = new char[INC];
+			i = 0;
+		}
+		
+		/// <summary> reset() resets the stemmer so it can stem another word.  If you invoke
+		/// the stemmer by calling add(char) and then stem(), you must call reset()
+		/// before starting another word.
+		/// </summary>
+		public virtual void  Reset()
+		{
+			i = 0; dirty = false;
+		}
+		
+		/// <summary> Add a character to the word being stemmed.  When you are finished
+		/// adding characters, you can call stem(void) to process the word.
+		/// </summary>
+		public virtual void  Add(char ch)
+		{
+			if (b.Length <= i + EXTRA)
+			{
+				var new_b = new char[b.Length + INC];
+				Array.Copy(b, 0, new_b, 0, b.Length);
+				b = new_b;
+			}
+			b[i++] = ch;
+		}
+		
+		/// <summary> After a word has been stemmed, it can be retrieved by toString(),
+		/// or a reference to the internal buffer can be retrieved by getResultBuffer
+		/// and getResultLength (which is generally more efficient.)
+		/// </summary>
+		public override System.String ToString()
+		{
+			return new System.String(b, 0, i);
+		}
+
+	    /// <summary> Returns the length of the word resulting from the stemming process.</summary>
+	    public virtual int ResultLength
+	    {
+	        get { return i; }
+	    }
+
+	    /// <summary> Returns a reference to a character buffer containing the results of
+	    /// the stemming process.  You also need to consult getResultLength()
+	    /// to determine the length of the result.
+	    /// </summary>
+	    public virtual char[] ResultBuffer
+	    {
+	        get { return b; }
+	    }
+
+	    /* cons(i) is true <=> b[i] is a consonant. */
+		
+		private bool Cons(int i)
+		{
+			switch (b[i])
+			{
+				
+				case 'a': 
+				case 'e': 
+				case 'i': 
+				case 'o': 
+				case 'u': 
+					return false;
+				
+				case 'y': 
+					return (i == k0)?true:!Cons(i - 1);
+				
+				default: 
+					return true;
+				
+			}
+		}
+		
+		/* m() measures the number of consonant sequences between k0 and j. if c is
+		a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+		presence,
+		
+		<c><v>       gives 0
+		<c>vc<v>     gives 1
+		<c>vcvc<v>   gives 2
+		<c>vcvcvc<v> gives 3
+		....
+		*/
+		
+		private int M()
+		{
+			int n = 0;
+			int i = k0;
+			while (true)
+			{
+				if (i > j)
+					return n;
+				if (!Cons(i))
+					break;
+				i++;
+			}
+			i++;
+			while (true)
+			{
+				while (true)
+				{
+					if (i > j)
+						return n;
+					if (Cons(i))
+						break;
+					i++;
+				}
+				i++;
+				n++;
+				while (true)
+				{
+					if (i > j)
+						return n;
+					if (!Cons(i))
+						break;
+					i++;
+				}
+				i++;
+			}
+		}
+		
+		/* vowelinstem() is true <=> k0,...j contains a vowel */
+		
+		private bool Vowelinstem()
+		{
+			int i;
+			for (i = k0; i <= j; i++)
+				if (!Cons(i))
+					return true;
+			return false;
+		}
+		
+		/* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+		
+		private bool Doublec(int j)
+		{
+			if (j < k0 + 1)
+				return false;
+			if (b[j] != b[j - 1])
+				return false;
+			return Cons(j);
+		}
+		
+		/* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+		and also if the second c is not w,x or y. this is used when trying to
+		restore an e at the end of a short word. e.g.
+		
+		cav(e), lov(e), hop(e), crim(e), but
+		snow, box, tray.
+		
+		*/
+		
+		private bool Cvc(int i)
+		{
+			if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
+				return false;
+			else
+			{
+				int ch = b[i];
+				if (ch == 'w' || ch == 'x' || ch == 'y')
+					return false;
+			}
+			return true;
+		}
+		
+		private bool Ends(System.String s)
+		{
+			int l = s.Length;
+			int o = k - l + 1;
+			if (o < k0)
+				return false;
+			for (int i = 0; i < l; i++)
+				if (b[o + i] != s[i])
+					return false;
+			j = k - l;
+			return true;
+		}
+		
+		/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+		k. */
+		
+		internal virtual void  Setto(System.String s)
+		{
+			int l = s.Length;
+			int o = j + 1;
+			for (int i = 0; i < l; i++)
+				b[o + i] = s[i];
+			k = j + l;
+			dirty = true;
+		}
+		
+		/* r(s) is used further down. */
+		
+		internal virtual void  R(System.String s)
+		{
+			if (M() > 0)
+				Setto(s);
+		}
+		
+		/* step1() gets rid of plurals and -ed or -ing. e.g.
+		
+		caresses  ->  caress
+		ponies    ->  poni
+		ties      ->  ti
+		caress    ->  caress
+		cats      ->  cat
+		
+		feed      ->  feed
+		agreed    ->  agree
+		disabled  ->  disable
+		
+		matting   ->  mat
+		mating    ->  mate
+		meeting   ->  meet
+		milling   ->  mill
+		messing   ->  mess
+		
+		meetings  ->  meet
+		
+		*/
+		
+		private void  Step1()
+		{
+			if (b[k] == 's')
+			{
+				if (Ends("sses"))
+					k -= 2;
+				else if (Ends("ies"))
+					Setto("i");
+				else if (b[k - 1] != 's')
+					k--;
+			}
+			if (Ends("eed"))
+			{
+				if (M() > 0)
+					k--;
+			}
+			else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
+			{
+				k = j;
+				if (Ends("at"))
+					Setto("ate");
+				else if (Ends("bl"))
+					Setto("ble");
+				else if (Ends("iz"))
+					Setto("ize");
+				else if (Doublec(k))
+				{
+					int ch = b[k--];
+					if (ch == 'l' || ch == 's' || ch == 'z')
+						k++;
+				}
+				else if (M() == 1 && Cvc(k))
+					Setto("e");
+			}
+		}
+		
+		/* step2() turns terminal y to i when there is another vowel in the stem. */
+		
+		private void  Step2()
+		{
+			if (Ends("y") && Vowelinstem())
+			{
+				b[k] = 'i';
+				dirty = true;
+			}
+		}
+		
+		/* step3() maps double suffices to single ones. so -ization ( = -ize plus
+		-ation) maps to -ize etc. note that the string before the suffix must give
+		m() > 0. */
+		
+		private void  Step3()
+		{
+			if (k == k0)
+				return ; /* For Bug 1 */
+			switch (b[k - 1])
+			{
+				
+				case 'a': 
+					if (Ends("ational"))
+					{
+						R("ate"); break;
+					}
+					if (Ends("tional"))
+					{
+						R("tion"); break;
+					}
+					break;
+				
+				case 'c': 
+					if (Ends("enci"))
+					{
+						R("ence"); break;
+					}
+					if (Ends("anci"))
+					{
+						R("ance"); break;
+					}
+					break;
+				
+				case 'e': 
+					if (Ends("izer"))
+					{
+						R("ize"); break;
+					}
+					break;
+				
+				case 'l': 
+					if (Ends("bli"))
+					{
+						R("ble"); break;
+					}
+					if (Ends("alli"))
+					{
+						R("al"); break;
+					}
+					if (Ends("entli"))
+					{
+						R("ent"); break;
+					}
+					if (Ends("eli"))
+					{
+						R("e"); break;
+					}
+					if (Ends("ousli"))
+					{
+						R("ous"); break;
+					}
+					break;
+				
+				case 'o': 
+					if (Ends("ization"))
+					{
+						R("ize"); break;
+					}
+					if (Ends("ation"))
+					{
+						R("ate"); break;
+					}
+					if (Ends("ator"))
+					{
+						R("ate"); break;
+					}
+					break;
+				
+				case 's': 
+					if (Ends("alism"))
+					{
+						R("al"); break;
+					}
+					if (Ends("iveness"))
+					{
+						R("ive"); break;
+					}
+					if (Ends("fulness"))
+					{
+						R("ful"); break;
+					}
+					if (Ends("ousness"))
+					{
+						R("ous"); break;
+					}
+					break;
+				
+				case 't': 
+					if (Ends("aliti"))
+					{
+						R("al"); break;
+					}
+					if (Ends("iviti"))
+					{
+						R("ive"); break;
+					}
+					if (Ends("biliti"))
+					{
+						R("ble"); break;
+					}
+					break;
+				
+				case 'g': 
+					if (Ends("logi"))
+					{
+						R("log"); break;
+					}
+					break;
+				}
+		}
+		
+		/* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+		
+		private void  Step4()
+		{
+			switch (b[k])
+			{
+				
+				case 'e': 
+					if (Ends("icate"))
+					{
+						R("ic"); break;
+					}
+					if (Ends("ative"))
+					{
+						R(""); break;
+					}
+					if (Ends("alize"))
+					{
+						R("al"); break;
+					}
+					break;
+				
+				case 'i': 
+					if (Ends("iciti"))
+					{
+						R("ic"); break;
+					}
+					break;
+				
+				case 'l': 
+					if (Ends("ical"))
+					{
+						R("ic"); break;
+					}
+					if (Ends("ful"))
+					{
+						R(""); break;
+					}
+					break;
+				
+				case 's': 
+					if (Ends("ness"))
+					{
+						R(""); break;
+					}
+					break;
+				}
+		}
+		
+		/* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+		
+		private void  Step5()
+		{
+			if (k == k0)
+				return ; /* for Bug 1 */
+			switch (b[k - 1])
+			{
+				
+				case 'a': 
+					if (Ends("al"))
+						break;
+					return ;
+				
+				case 'c': 
+					if (Ends("ance"))
+						break;
+					if (Ends("ence"))
+						break;
+					return ;
+				
+				case 'e': 
+					if (Ends("er"))
+						break; return ;
+				
+				case 'i': 
+					if (Ends("ic"))
+						break; return ;
+				
+				case 'l': 
+					if (Ends("able"))
+						break;
+					if (Ends("ible"))
+						break; return ;
+				
+				case 'n': 
+					if (Ends("ant"))
+						break;
+					if (Ends("ement"))
+						break;
+					if (Ends("ment"))
+						break;
+					/* element etc. not stripped before the m */
+					if (Ends("ent"))
+						break;
+					return ;
+				
+				case 'o': 
+					if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+						break;
+					/* j >= 0 fixes Bug 2 */
+					if (Ends("ou"))
+						break;
+					return ;
+					/* takes care of -ous */
+				
+				case 's': 
+					if (Ends("ism"))
+						break;
+					return ;
+				
+				case 't': 
+					if (Ends("ate"))
+						break;
+					if (Ends("iti"))
+						break;
+					return ;
+				
+				case 'u': 
+					if (Ends("ous"))
+						break;
+					return ;
+				
+				case 'v': 
+					if (Ends("ive"))
+						break;
+					return ;
+				
+				case 'z': 
+					if (Ends("ize"))
+						break;
+					return ;
+				
+				default: 
+					return ;
+				
+			}
+			if (M() > 1)
+				k = j;
+		}
+		
+		/* step6() removes a final -e if m() > 1. */
+		
+		private void  Step6()
+		{
+			j = k;
+			if (b[k] == 'e')
+			{
+				int a = M();
+				if (a > 1 || a == 1 && !Cvc(k - 1))
+					k--;
+			}
+			if (b[k] == 'l' && Doublec(k) && M() > 1)
+				k--;
+		}
+		
+		
+		/// <summary> Stem a word provided as a String.  Returns the result as a String.</summary>
+		public virtual System.String Stem(System.String s)
+		{
+			if (Stem(s.ToCharArray(), s.Length))
+			{
+				return ToString();
+			}
+			else
+				return s;
+		}
+		
+		/// <summary>Stem a word contained in a char[].  Returns true if the stemming process
+		/// resulted in a word different from the input.  You can retrieve the
+		/// result with getResultLength()/getResultBuffer() or toString().
+		/// </summary>
+		public virtual bool Stem(char[] word)
+		{
+			return Stem(word, word.Length);
+		}
+		
+		/// <summary>Stem a word contained in a portion of a char[] array.  Returns
+		/// true if the stemming process resulted in a word different from
+		/// the input.  You can retrieve the result with
+		/// getResultLength()/getResultBuffer() or toString().
+		/// </summary>
+		public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
+		{
+			Reset();
+			if (b.Length < wordLen)
+			{
+				var new_b = new char[wordLen + EXTRA];
+				b = new_b;
+			}
+			Array.Copy(wordBuffer, offset, b, 0, wordLen);
+			i = wordLen;
+			return Stem(0);
+		}
+		
+		/// <summary>Stem a word contained in a leading portion of a char[] array.
+		/// Returns true if the stemming process resulted in a word different
+		/// from the input.  You can retrieve the result with
+		/// getResultLength()/getResultBuffer() or toString().
+		/// </summary>
+		public virtual bool Stem(char[] word, int wordLen)
+		{
+			return Stem(word, 0, wordLen);
+		}
+		
+		/// <summary>Stem the word placed into the Stemmer buffer through calls to add().
+		/// Returns true if the stemming process resulted in a word different
+		/// from the input.  You can retrieve the result with
+		/// getResultLength()/getResultBuffer() or toString().
+		/// </summary>
+		public virtual bool Stem()
+		{
+			return Stem(0);
+		}
+		
+		public virtual bool Stem(int i0)
+		{
+			k = i - 1;
+			k0 = i0;
+			if (k > k0 + 1)
+			{
+				Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
+			}
+			// Also, a word is considered dirty if we lopped off letters
+			// Thanks to Ifigenia Vairelles for pointing this out.
+			if (i != k + 1)
+				dirty = true;
+			i = k + 1;
+			return dirty;
+		}
+		
+		/// <summary>Test program for demonstrating the Stemmer.  It reads a file and
+		/// stems each word, writing the result to standard out.
+		/// Usage: Stemmer file-name
+		/// </summary>
+		[STAThread]
+		public static void  Main(System.String[] args)
+		{
+			var s = new PorterStemmer();
+			
+			for (int i = 0; i < args.Length; i++)
+			{
+				try
+				{
+					System.IO.Stream in_Renamed = new System.IO.FileStream(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read);
+					var buffer = new byte[1024];
+
+					int bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+					int offset = 0;
+					s.Reset();
+					
+					while (true)
+					{
+						int ch;
+						if (offset < bufferLen)
+							ch = buffer[offset++];
+						else
+						{
+							bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+							offset = 0;
+							if (bufferLen < 0)
+								ch = - 1;
+							else
+								ch = buffer[offset++];
+						}
+						
+						if (Char.IsLetter((char) ch))
+						{
+							s.Add(Char.ToLower((char) ch));
+						}
+						else
+						{
+							s.Stem();
+							Console.Out.Write(s.ToString());
+							s.Reset();
+							if (ch < 0)
+								break;
+							else
+							{
+								System.Console.Out.Write((char) ch);
+							}
+						}
+					}
+					
+					in_Renamed.Close();
+				}
+				catch (System.IO.IOException)
+				{
+					Console.Out.WriteLine("error reading " + args[i]);
+				}
+			}
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/SimpleAnalyzer.cs b/src/core/Analysis/SimpleAnalyzer.cs
new file mode 100644
index 0000000..b84f470
--- /dev/null
+++ b/src/core/Analysis/SimpleAnalyzer.cs
@@ -0,0 +1,45 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>An <see cref="Analyzer" /> that filters <see cref="LetterTokenizer" /> 
+	/// with <see cref="LowerCaseFilter" /> 
+	/// </summary>
+	
+	public sealed class SimpleAnalyzer : Analyzer
+	{
+		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			return new LowerCaseTokenizer(reader);
+		}
+		
+		public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			var tokenizer = (Tokenizer) PreviousTokenStream;
+			if (tokenizer == null)
+			{
+				tokenizer = new LowerCaseTokenizer(reader);
+				PreviousTokenStream = tokenizer;
+			}
+			else
+				tokenizer.Reset(reader);
+			return tokenizer;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardAnalyzer.cs b/src/core/Analysis/Standard/StandardAnalyzer.cs
new file mode 100644
index 0000000..347d026
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardAnalyzer.cs
@@ -0,0 +1,174 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Standard
+{
+	
+	/// <summary> Filters <see cref="StandardTokenizer" /> with <see cref="StandardFilter" />,
+	/// <see cref="LowerCaseFilter" /> and <see cref="StopFilter" />, using a list of English stop
+	/// words.
+	/// 
+	/// <a name="version"/>
+	/// <p/>
+	/// You must specify the required <see cref="Version" /> compatibility when creating
+	/// StandardAnalyzer:
+	/// <list type="bullet">
+	/// <item>As of 2.9, StopFilter preserves position increments</item>
+	/// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+	/// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>)</item>
+	/// </list>
+	/// </summary>
+	public class StandardAnalyzer : Analyzer
+	{
+		private ISet<string> stopSet;
+		
+		/// <summary> Specifies whether deprecated acronyms should be replaced with HOST type.
+        /// See <a href="https://issues.apache.org/jira/browse/LUCENE-1068">https://issues.apache.org/jira/browse/LUCENE-1068</a>
+		/// </summary>
+		private bool replaceInvalidAcronym, enableStopPositionIncrements;
+
+		/// <summary>An unmodifiable set containing some common English words that are usually not
+		/// useful for searching. 
+		/// </summary>
+		public static readonly ISet<string> STOP_WORDS_SET;
+		private Version matchVersion;
+		
+		/// <summary>Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET" />).
+		/// </summary>
+		/// <param name="matchVersion">Lucene version to match see <see cref="Version">above</see></param>
+		public StandardAnalyzer(Version matchVersion)
+            : this(matchVersion, STOP_WORDS_SET)
+		{ }
+		
+		/// <summary>Builds an analyzer with the given stop words.</summary>
+        /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
+		///
+		/// </param>
+		/// <param name="stopWords">stop words 
+		/// </param>
+		public StandardAnalyzer(Version matchVersion, ISet<string> stopWords)
+		{
+			stopSet = stopWords;
+            SetOverridesTokenStreamMethod<StandardAnalyzer>();
+            enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+            replaceInvalidAcronym = matchVersion.OnOrAfter(Version.LUCENE_24);
+            this.matchVersion = matchVersion;
+		}
+        
+		/// <summary>Builds an analyzer with the stop words from the given file.</summary>
+		/// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
+		/// </seealso>
+        /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
+		///
+		/// </param>
+		/// <param name="stopwords">File to read stop words from 
+		/// </param>
+		public StandardAnalyzer(Version matchVersion, System.IO.FileInfo stopwords)
+            : this (matchVersion, WordlistLoader.GetWordSet(stopwords))
+		{
+		}
+		
+		/// <summary>Builds an analyzer with the stop words from the given reader.</summary>
+        /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
+		/// </seealso>
+        /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
+		///
+		/// </param>
+		/// <param name="stopwords">Reader to read stop words from 
+		/// </param>
+		public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
+            : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
+		{ }
+		
+		/// <summary>Constructs a <see cref="StandardTokenizer" /> filtered by a <see cref="StandardFilter" />
+		///, a <see cref="LowerCaseFilter" /> and a <see cref="StopFilter" />. 
+		/// </summary>
+		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
+			tokenStream.MaxTokenLength = maxTokenLength;
+			TokenStream result = new StandardFilter(tokenStream);
+			result = new LowerCaseFilter(result);
+			result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+			return result;
+		}
+		
+		private sealed class SavedStreams
+		{
+			internal StandardTokenizer tokenStream;
+			internal TokenStream filteredTokenStream;
+		}
+		
+		/// <summary>Default maximum allowed token length </summary>
+		public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+		
+		private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+	    /// <summary> Set maximum allowed token length.  If a token is seen
+	    /// that exceeds this length then it is discarded.  This
+	    /// setting only takes effect the next time tokenStream or
+	    /// reusableTokenStream is called.
+	    /// </summary>
+	    public virtual int MaxTokenLength
+	    {
+	        get { return maxTokenLength; }
+	        set { maxTokenLength = value; }
+	    }
+
+	    public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			if (overridesTokenStreamMethod)
+			{
+				// LUCENE-1678: force fallback to tokenStream() if we
+				// have been subclassed and that subclass overrides
+				// tokenStream but not reusableTokenStream
+				return TokenStream(fieldName, reader);
+			}
+			SavedStreams streams = (SavedStreams) PreviousTokenStream;
+			if (streams == null)
+			{
+				streams = new SavedStreams();
+				PreviousTokenStream = streams;
+				streams.tokenStream = new StandardTokenizer(matchVersion, reader);
+				streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
+				streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
+			    streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, 
+                                                             streams.filteredTokenStream, stopSet);
+			}
+			else
+			{
+				streams.tokenStream.Reset(reader);
+			}
+			streams.tokenStream.MaxTokenLength = maxTokenLength;
+			
+			streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);
+			
+			return streams.filteredTokenStream;
+		}
+		static StandardAnalyzer()
+		{
+			STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardFilter.cs b/src/core/Analysis/Standard/StandardFilter.cs
new file mode 100644
index 0000000..fd13261
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardFilter.cs
@@ -0,0 +1,88 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Token = Lucene.Net.Analysis.Token;
+using TokenFilter = Lucene.Net.Analysis.TokenFilter;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Analysis.Standard
+{
+	
+	/// <summary>Normalizes tokens extracted with <see cref="StandardTokenizer" />. </summary>
+	
+	public sealed class StandardFilter:TokenFilter
+	{
+		
+		
+		/// <summary>Construct filtering <i>in</i>. </summary>
+		public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
+		{
+            termAtt = AddAttribute<ITermAttribute>();
+			typeAtt = AddAttribute<ITypeAttribute>();
+		}
+		
+		private static readonly System.String APOSTROPHE_TYPE;
+		private static readonly System.String ACRONYM_TYPE;
+		
+		// this filters uses attribute type
+		private ITypeAttribute typeAtt;
+		private ITermAttribute termAtt;
+		
+		/// <summary>Returns the next token in the stream, or null at EOS.
+		/// <p/>Removes <tt>'s</tt> from the end of words.
+		/// <p/>Removes dots from acronyms.
+		/// </summary>
+		public override bool IncrementToken()
+		{
+			if (!input.IncrementToken())
+			{
+				return false;
+			}
+			
+			char[] buffer = termAtt.TermBuffer();
+			int bufferLength = termAtt.TermLength();
+			System.String type = typeAtt.Type;
+			
+			if ((System.Object) type == (System.Object) APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
+			{
+				// Strip last 2 characters off
+				termAtt.SetTermLength(bufferLength - 2);
+			}
+			else if ((System.Object) type == (System.Object) ACRONYM_TYPE)
+			{
+				// remove dots
+				int upto = 0;
+				for (int i = 0; i < bufferLength; i++)
+				{
+					char c = buffer[i];
+					if (c != '.')
+						buffer[upto++] = c;
+				}
+				termAtt.SetTermLength(upto);
+			}
+			
+			return true;
+		}
+		static StandardFilter()
+		{
+			APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
+			ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardTokenizer.cs b/src/core/Analysis/Standard/StandardTokenizer.cs
new file mode 100644
index 0000000..dca409d
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardTokenizer.cs
@@ -0,0 +1,232 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using CharReader = Lucene.Net.Analysis.CharReader;
+using Token = Lucene.Net.Analysis.Token;
+using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Standard
+{
+	
+	/// <summary>A grammar-based tokenizer constructed with JFlex
+	/// 
+	/// <p/> This should be a good tokenizer for most European-language documents:
+	/// 
+	/// <list type="bullet">
+	/// <item>Splits words at punctuation characters, removing punctuation. However, a 
+	/// dot that's not followed by whitespace is considered part of a token.</item>
+	/// <item>Splits words at hyphens, unless there's a number in the token, in which case
+	/// the whole token is interpreted as a product number and is not split.</item>
+	/// <item>Recognizes email addresses and internet hostnames as one token.</item>
+	/// </list>
+	/// 
+	/// <p/>Many applications have specific tokenizer needs.  If this tokenizer does
+	/// not suit your application, please consider copying this source code
+	/// directory to your project and maintaining your own grammar-based tokenizer.
+	/// 
+	/// <a name="version"/>
+	/// <p/>
+	/// You must specify the required <see cref="Version" /> compatibility when creating
+	/// StandardAnalyzer:
+	/// <list type="bullet">
+	/// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+	/// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a></item>
+	/// </list>
+	/// </summary>
+	
+	public sealed class StandardTokenizer:Tokenizer
+	{
+		private void  InitBlock()
+		{
+			maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+		}
+		/// <summary>A private instance of the JFlex-constructed scanner </summary>
+		private StandardTokenizerImpl scanner;
+		
+		public const int ALPHANUM   = 0;
+		public const int APOSTROPHE = 1;
+		public const int ACRONYM    = 2;
+		public const int COMPANY    = 3;
+		public const int EMAIL      = 4;
+		public const int HOST       = 5;
+		public const int NUM        = 6;
+		public const int CJ         = 7;
+		
+		/// <deprecated> this solves a bug where HOSTs that end with '.' are identified
+		/// as ACRONYMs.
+		/// </deprecated>
+        [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs.")]
+		public const int ACRONYM_DEP = 8;
+		
+		/// <summary>String token types that correspond to token type int constants </summary>
+		public static readonly System.String[] TOKEN_TYPES = new System.String[]{"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
+		
+		private bool replaceInvalidAcronym;
+		
+		private int maxTokenLength;
+
+	    /// <summary>Set the max allowed token length.  Any token longer
+	    /// than this is skipped. 
+	    /// </summary>
+	    public int MaxTokenLength
+	    {
+	        get { return maxTokenLength; }
+	        set { this.maxTokenLength = value; }
+	    }
+
+	    /// <summary> Creates a new instance of the
+	    /// <see cref="Lucene.Net.Analysis.Standard.StandardTokenizer" />. Attaches
+	    /// the <c>input</c> to the newly created JFlex scanner.
+	    /// 
+	    /// </summary>
+	    /// <param name="matchVersion"></param>
+	    /// <param name="input">The input reader
+	    /// 
+	    /// See http://issues.apache.org/jira/browse/LUCENE-1068
+	    /// </param>
+	    public StandardTokenizer(Version matchVersion, System.IO.TextReader input):base()
+		{
+			InitBlock();
+			this.scanner = new StandardTokenizerImpl(input);
+			Init(input, matchVersion);
+		}
+
+		/// <summary> Creates a new StandardTokenizer with a given <see cref="AttributeSource" />.</summary>
+		public StandardTokenizer(Version matchVersion, AttributeSource source, System.IO.TextReader input):base(source)
+		{
+			InitBlock();
+			this.scanner = new StandardTokenizerImpl(input);
+			Init(input, matchVersion);
+		}
+		
+		/// <summary> Creates a new StandardTokenizer with a given
+		/// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />
+		/// </summary>
+		public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input):base(factory)
+		{
+			InitBlock();
+			this.scanner = new StandardTokenizerImpl(input);
+			Init(input, matchVersion);
+		}
+		
+		private void  Init(System.IO.TextReader input, Version matchVersion)
+		{
+			if (matchVersion.OnOrAfter(Version.LUCENE_24))
+			{
+			    replaceInvalidAcronym = true;
+			}
+			else
+			{
+			    replaceInvalidAcronym = false;
+			}
+		    this.input = input;
+		    termAtt = AddAttribute<ITermAttribute>();
+		    offsetAtt = AddAttribute<IOffsetAttribute>();
+		    posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+		    typeAtt = AddAttribute<ITypeAttribute>();
+		}
+		
+		// this tokenizer generates three attributes:
+		// offset, positionIncrement and type
+		private ITermAttribute termAtt;
+		private IOffsetAttribute offsetAtt;
+		private IPositionIncrementAttribute posIncrAtt;
+		private ITypeAttribute typeAtt;
+		
+		///<summary>
+		/// (non-Javadoc)
+		/// <see cref="Lucene.Net.Analysis.TokenStream.IncrementToken()" />
+        ///</summary>
+		public override bool IncrementToken()
+		{
+			ClearAttributes();
+			int posIncr = 1;
+			
+			while (true)
+			{
+				int tokenType = scanner.GetNextToken();
+				
+				if (tokenType == StandardTokenizerImpl.YYEOF)
+				{
+					return false;
+				}
+				
+				if (scanner.Yylength() <= maxTokenLength)
+				{
+					posIncrAtt.PositionIncrement = posIncr;
+					scanner.GetText(termAtt);
+					int start = scanner.Yychar();
+					offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength()));
+					// This 'if' should be removed in the next release. For now, it converts
+					// invalid acronyms to HOST. When removed, only the 'else' part should
+					// remain.
+					if (tokenType == StandardTokenizerImpl.ACRONYM_DEP)
+					{
+						if (replaceInvalidAcronym)
+						{
+							typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST];
+							termAtt.SetTermLength(termAtt.TermLength() - 1); // remove extra '.'
+						}
+						else
+						{
+							typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+						}
+					}
+					else
+					{
+						typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[tokenType];
+					}
+					return true;
+				}
+				// When we skip a too-long term, we still increment the
+				// position increment
+				else
+					posIncr++;
+			}
+		}
+		
+		public override void  End()
+		{
+			// set final offset
+			int finalOffset = CorrectOffset(scanner.Yychar() + scanner.Yylength());
+			offsetAtt.SetOffset(finalOffset, finalOffset);
+		}
+		
+		public override void  Reset(System.IO.TextReader reader)
+		{
+			base.Reset(reader);
+			scanner.Reset(reader);
+		}
+		
+		/// <summary>
+		/// Remove in 3.X and make true the only valid value
+		/// See https://issues.apache.org/jira/browse/LUCENE-1068
+        /// </summary>
+        /// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms as HOST.
+        /// </param>
+        [Obsolete("Remove in 3.X and make true the only valid value. See https://issues.apache.org/jira/browse/LUCENE-1068")]
+		public void  SetReplaceInvalidAcronym(bool replaceInvalidAcronym)
+		{
+			this.replaceInvalidAcronym = replaceInvalidAcronym;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardTokenizerImpl.cs b/src/core/Analysis/Standard/StandardTokenizerImpl.cs
new file mode 100644
index 0000000..cb4bf5f
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardTokenizerImpl.cs
@@ -0,0 +1,707 @@
+/* The following code was generated by JFlex 1.4.1 on 9/4/08 6:49 PM */
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+    NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate the tokenizer,
+    the tokenizer, only use Java 1.4 !!!
+    This grammar currently uses constructs (eg :digit:, :letter:) whose
+    meaning can vary according to the JRE used to run jflex.  See
+    https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+    For current backwards compatibility it is needed to support
+    only Java 1.4 - this will change in Lucene 3.1.
+*/
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Analysis.Standard
+{
+	
+	
+	/// <summary> This class is a scanner generated by 
+	/// <a href="http://www.jflex.de/">JFlex</a> 1.4.1
+	/// on 9/4/08 6:49 PM from the specification file
+	/// <tt>/tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
+	/// </summary>
+	class StandardTokenizerImpl
+	{
+		
+		/// <summary>This character denotes the end of file </summary>
+		public const int YYEOF = - 1;
+		
+		/// <summary>initial size of the lookahead buffer </summary>
+		private const int ZZ_BUFFERSIZE = 16384;
+		
+		/// <summary>lexical states </summary>
+		public const int YYINITIAL = 0;
+		
+		/// <summary> Translates characters to character classes</summary>
+		private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0000\x0001\x000C\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0005" + "\x0001\x0003\x0004\x0000\x0001\x0009\x0001\x0007\x0001\x0004\x0001\x0009\x000A\x0002\x0006\x0000\x0001\x0006\x001A\x000A" + "\x0004\x0000\x0001\x0008\x0001\x0000\x001A\x000A\x002F\x0000\x0001\x000A\x000A\x0000\x0001\x000A\x0004\x0000\x0001\x000A" + "\x0005\x0000\x0017\x000A\x0001\x0000\x001F\x000A\x0001\x0000\u0128\x000A\x0002\x0000\x0012\x000A\x001C\x0000\x005E\x000A" + "\x0002\x0000\x0009\x000A\x0002\x0000\x0007\x000A\x000E\x0000\x0002\x000A\x000E\x0000\x0005\x000A\x0009\x0000\x0001\x000A" + "\x008B\x0000\x0001\x000A\x000B\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0014\x000A" + "\x0001\x0000\x002C\x000A\x0001\x0000\x0008\x000A\x0002\x0000\x001A\x000A\x000C\x0000\x0082\x000A\x000A\x0000\x0039\x000A" + "\x0002\x0000\x0002\x000A\x0002\x0000\x0002\x000A\x0003\x0000\x0026\x000A\x0002\x0000\x0002\x000A\x0037\x0000\x0026\x000A" + "\x0002\x0000\x0001\x000A\x0007\x0000\x0027\x000A\x0048\x0000\x001B\x000A\x0005\x0000\x0003\x000A\x002E\x0000\x001A\x000A" + "\x0005\x0000\x000B\x000A\x0015\x0000\x000A\x0002\x0007\x0000\x0063\x000A\x0001\x0000\x0001\x000A\x000F\x0000\x0002\x000A" + "\x0009\x0000\x000A\x0002\x0003\x000A\x0013\x0000\x0001\x000A\x0001\x0000\x001B\x000A\x0053\x0000\x0026\x000A\u015f\x0000" + "\x0035\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x0007\x0000\x000A\x000A\x0004\x0000\x000A\x0002\x0015\x0000" + "\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0003\x0000" + "\x0004\x000A\x0022\x0000\x0002\x000A\x0001\x0000\x0003\x000A\x0004\x0000\x000A\x0002\x0002\x000A\x0013\x0000\x0006\x000A" + "\x0004\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0002\x000A\x0001\x0000\x0002\x000A" + 
+			"\x0001\x0000\x0002\x000A\x001F\x0000\x0004\x000A\x0001\x0000\x0001\x000A\x0007\x0000\x000A\x0002\x0002\x0000\x0003\x000A" + "\x0010\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x0005\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x000F\x0000\x0001\x000A" + "\x0005\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0002\x0000\x0004\x000A\x0003\x0000\x0001\x000A\x001E\x0000\x0002\x000A\x0001\x0000\x0003\x000A" + "\x0004\x0000\x000A\x0002\x0015\x0000\x0006\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0004\x000A\x0003\x0000\x0002\x000A" + "\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A\x0003\x0000\x0002\x000A\x0003\x0000\x0003\x000A\x0003\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x002D\x0000\x0009\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0024\x0000\x0001\x000A" + "\x0001\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x0010\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0012\x000A\x0003\x0000\x0018\x000A" + "\x0001\x0000\x0009\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0007\x000A\x0039\x0000\x0001\x0001\x0030\x000A\x0001\x0001" + "\x0002\x000A\x000C\x0001\x0007\x000A\x0009\x0001\x000A\x0002\x0027\x0000\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000" + "\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0001\x000A\x0006\x0000\x0004\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0002\x000A\x0001\x0000\x0004\x000A\x0001\x0000" + 
+			"\x0002\x000A\x0009\x0000\x0001\x000A\x0002\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0009\x0000\x000A\x0002\x0002\x0000" + "\x0002\x000A\x0022\x0000\x0001\x000A\x001F\x0000\x000A\x0002\x0016\x0000\x0008\x000A\x0001\x0000\x0022\x000A\x001D\x0000" + "\x0004\x000A\x0074\x0000\x0022\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0002\x000A\x0015\x0000\x000A\x0002\x0006\x0000" + "\x0006\x000A\x004A\x0000\x0026\x000A\x000A\x0000\x0027\x000A\x0009\x0000\x005A\x000A\x0005\x0000\x0044\x000A\x0005\x0000" + "\x0052\x000A\x0006\x0000\x0007\x000A\x0001\x0000\x003F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000" + "\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0027\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0004\x000A\x0002\x0000\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0017\x000A\x0001\x0000" + "\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0027\x000A\x0001\x0000" + "\x0013\x000A\x000E\x0000\x0009\x0002\x002E\x0000\x0055\x000A\x000C\x0000\u026c\x000A\x0002\x0000\x0008\x000A\x000A\x0000" + "\x001A\x000A\x0005\x0000\x004B\x000A\x0095\x0000\x0034\x000A\x002C\x0000\x000A\x0002\x0026\x0000\x000A\x0002\x0006\x0000" + "\x0058\x000A\x0008\x0000\x0029\x000A\u0557\x0000\x009C\x000A\x0004\x0000\x005A\x000A\x0006\x0000\x0016\x000A\x0002\x0000" + "\x0006\x000A\x0002\x0000\x0026\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0008\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x001F\x000A\x0002\x0000\x0035\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0003\x0000\x0004\x000A\x0002\x0000\x0006\x000A\x0004\x0000" + "\x000D\x000A\x0005\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0082\x0000\x0001\x000A\x0082\x0000\x0001\x000A\x0004\x0000" + 
+			"\x0001\x000A\x0002\x0000\x000A\x000A\x0001\x0000\x0001\x000A\x0003\x0000\x0005\x000A\x0006\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0007\x000A\u0ecb\x0000" + "\x0002\x000A\x002A\x0000\x0005\x000A\x000A\x0000\x0001\x000B\x0054\x000B\x0008\x000B\x0002\x000B\x0002\x000B\x005A\x000B" + "\x0001\x000B\x0003\x000B\x0006\x000B\x0028\x000B\x0003\x000B\x0001\x0000\x005E\x000A\x0011\x0000\x0018\x000A\x0038\x0000" + "\x0010\x000B\u0100\x0000\x0080\x000B\x0080\x0000\u19b6\x000B\x000A\x000B\x0040\x0000\u51a6\x000B\x005A\x000B\u048d\x000A" + "\u0773\x0000\u2ba4\x000A\u215c\x0000\u012e\x000B\x00D2\x000B\x0007\x000A\x000C\x0000\x0005\x000A\x0005\x0000\x0001\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x000D\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x006C\x000A\x0021\x0000\u016b\x000A\x0012\x0000\x0040\x000A\x0002\x0000\x0036\x000A" + "\x0028\x0000\x000C\x000A\x0074\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0087\x000A\x0013\x0000\x000A\x0002" + "\x0007\x0000\x001A\x000A\x0006\x0000\x001A\x000A\x000A\x0000\x0001\x000B\x003A\x000B\x001F\x000A\x0003\x0000\x0006\x000A" + "\x0002\x0000\x0006\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0003\x000A\x0023\x0000";
+		
+		/// <summary> Translates characters to character classes</summary>
+		private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
+		
+		/// <summary> Translates DFA states to action switch labels.</summary>
+		private static readonly int[] ZZ_ACTION = ZzUnpackAction();
+		
+		private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0003\x0002\x0001\x0003\x0001\x0001\x000B\x0000\x0001\x0002\x0003\x0004" + "\x0002\x0000\x0001\x0005\x0001\x0000\x0001\x0005\x0003\x0004\x0006\x0005\x0001\x0006\x0001\x0004" + "\x0002\x0007\x0001\x0008\x0001\x0000\x0001\x0008\x0003\x0000\x0002\x0008\x0001\x0009\x0001\x000A" + "\x0001\x0004";
+		
+		private static int[] ZzUnpackAction()
+		{
+			int[] result = new int[51];
+			int offset = 0;
+			offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackAction(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int count = packed[i++];
+				int value_Renamed = packed[i++];
+				do 
+					result[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return j;
+		}
+		
+		
+		/// <summary> Translates a state to a row index in the transition table</summary>
+		private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
+		
+		private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000E\x0000\x001C\x0000\x002A\x0000\x0038\x0000\x000E\x0000\x0046\x0000\x0054" + "\x0000\x0062\x0000\x0070\x0000\x007E\x0000\x008C\x0000\x009A\x0000\x00A8\x0000\x00B6\x0000\x00C4" + "\x0000\x00D2\x0000\x00E0\x0000\x00EE\x0000\x00FC\x0000\u010a\x0000\u0118\x0000\u0126\x0000\u0134" + "\x0000\u0142\x0000\u0150\x0000\u015e\x0000\u016c\x0000\u017a\x0000\u0188\x0000\u0196\x0000\u01a4" + "\x0000\u01b2\x0000\u01c0\x0000\u01ce\x0000\u01dc\x0000\u01ea\x0000\u01f8\x0000\x00D2\x0000\u0206" + "\x0000\u0214\x0000\u0222\x0000\u0230\x0000\u023e\x0000\u024c\x0000\u025a\x0000\x0054\x0000\x008C" + "\x0000\u0268\x0000\u0276\x0000\u0284";
+		
+		private static int[] ZzUnpackRowMap()
+		{
+			int[] result = new int[51];
+			int offset = 0;
+			offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackRowMap(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int high = packed[i++] << 16;
+				result[j++] = high | packed[i++];
+			}
+			return j;
+		}
+		
+		/// <summary> The transition table of the DFA</summary>
+		private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
+		
+		private const System.String ZZ_TRANS_PACKED_0 = "\x0001\x0002\x0001\x0003\x0001\x0004\x0007\x0002\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0002" + "\x000F\x0000\x0002\x0003\x0001\x0000\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x0003\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x0000\x0001\x000C\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x0004\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x000F\x0001\x0010" + "\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0010\x0000\x0001\x0002\x0001\x0000" + "\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0017" + "\x0004\x0000\x0001\x0018\x0001\x0019\x0007\x0000\x0001\x001A\x0005\x0000\x0001\x001B\x0007\x0000" + "\x0001\x000B\x0004\x0000\x0001\x001C\x0001\x001D\x0007\x0000\x0001\x001E\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0007\x0000\x0001\x0021\x0004\x0000\x0001\x0022\x0001\x0023\x0007\x0000\x0001\x0024" + "\x000D\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0026\x000D\x0000" + "\x0001\x0027\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0028\x0004\x0000\x0001\x0003\x0001\x0004" + "\x0001\x000F\x0001\x0008\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0004\x0000" + "\x0002\x0014\x0001\x0000\x0001\x0029\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014" + "\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x002B\x0001\x0000\x0001\x0009\x0002\x002C" + "\x0001\x002D\x0001\x0015\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x0029\x0001\x0000" + "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0001\x0000\x0001\x002E" + "\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0017\x0004\x0000\x0002\x0018\x0001\x0000\x0001\x002A" + "\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0018\x0004\x0000\x0001\x0018\x0001\x0019" + "\x0001\x0000\x0001\x002C\x0001\x0000\x0001\x0009\x0002\x002C\x0001\x002D\x0001\x0019\x0004\x0000" + 
+			"\x0001\x0018\x0001\x0019\x0001\x0000\x0001\x002A\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000" + "\x0001\x001A\x0005\x0000\x0001\x001B\x0001\x0000\x0001\x002D\x0002\x0000\x0003\x002D\x0001\x001B" + "\x0004\x0000\x0002\x001C\x0001\x0000\x0001\x002F\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x001C\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x0030\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x001D\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x002F" + "\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001E\x0004\x0000\x0002\x001F\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001F\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0009\x0002\x000D\x0001\x000E\x0001\x0020" + "\x0004\x0000\x0001\x001F\x0001\x0020\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A" + "\x0001\x000B\x0001\x0021\x0004\x0000\x0002\x0022\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0022\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0001\x0023\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0024\x0006\x0000\x0001\x000F\x0006\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015" + "\x0001\x0000\x0001\x0031\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000" + "\x0002\x0017\x0001\x0000\x0001\x002E\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0028\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0014\x0004\x0000\x0002\x0018\x0007\x0000\x0001\x0018\x0004\x0000" + "\x0002\x001C\x0007\x0000\x0001\x001C\x0004\x0000\x0002\x001F\x0007\x0000\x0001\x001F\x0004\x0000" + "\x0002\x0022\x0007\x0000\x0001\x0022\x0004\x0000\x0002\x0032\x0007\x0000\x0001\x0032\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0033\x0004\x0000\x0002\x0032\x0001\x0000\x0001\x002E\x0002\x0000" + "\x0001\x002E\x0002\x0000\x0001\x0032\x0004\x0000\x0002\x0014\x0001\x0000\x0001\x0031\x0001\x0000" + 
+			"\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014\x0003\x0000";
+		
+		private static int[] ZzUnpackTrans()
+		{
+			int[] result = new int[658];
+			int offset = 0;
+			offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackTrans(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int count = packed[i++];
+				int value_Renamed = packed[i++];
+				value_Renamed--;
+				do 
+					result[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return j;
+		}
+		
+		
+		/* error codes */
+		private const int ZZ_UNKNOWN_ERROR = 0;
+		private const int ZZ_NO_MATCH = 1;
+		private const int ZZ_PUSHBACK_2BIG = 2;
+		
+		/* error messages for the codes above */
+		private static readonly System.String[] ZZ_ERROR_MSG = new System.String[]{"Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large"};
+		
+		/// <summary> ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c></summary>
+		private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
+		
+		private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0003\x0001\x0001\x0009\x0001\x0001\x000B\x0000\x0004\x0001\x0002\x0000" + "\x0001\x0001\x0001\x0000\x000F\x0001\x0001\x0000\x0001\x0001\x0003\x0000\x0005\x0001";
+		
+		private static int[] ZzUnpackAttribute()
+		{
+			int[] result = new int[51];
+			int offset = 0;
+			offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackAttribute(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int count = packed[i++];
+				int value_Renamed = packed[i++];
+				do 
+					result[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return j;
+		}
+		
+		/// <summary>the input device </summary>
+		private System.IO.TextReader zzReader;
+		
+		/// <summary>the current state of the DFA </summary>
+		private int zzState;
+		
+		/// <summary>the current lexical state </summary>
+		private int zzLexicalState = YYINITIAL;
+		
+		/// <summary>this buffer contains the current text to be matched and is
+		/// the source of the yytext() string 
+		/// </summary>
+		private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
+		
+		/// <summary>the textposition at the last accepting state </summary>
+		private int zzMarkedPos;
+		
+		/// <summary>the textposition at the last state to be included in yytext </summary>
+		private int zzPushbackPos;
+		
+		/// <summary>the current text position in the buffer </summary>
+		private int zzCurrentPos;
+		
+		/// <summary>startRead marks the beginning of the yytext() string in the buffer </summary>
+		private int zzStartRead;
+		
+		/// <summary>endRead marks the last character in the buffer, that has been read
+		/// from input 
+		/// </summary>
+		private int zzEndRead;
+		
+		/// <summary>number of newlines encountered up to the start of the matched text </summary>
+		private int yyline;
+		
+		/// <summary>the number of characters up to the start of the matched text </summary>
+		private int yychar;
+		
+		/// <summary> the number of characters from the last newline up to the start of the 
+		/// matched text
+		/// </summary>
+		private int yycolumn;
+
+        /// <summary> zzAtBOL == true &lt;=&gt; the scanner is currently at the beginning of a line</summary>
+		private bool zzAtBOL = true;
+
+        /// <summary>zzAtEOF == true &lt;=&gt; the scanner is at the EOF </summary>
+		private bool zzAtEOF;
+		
+		/* user code: */
+		
+		public static readonly int ALPHANUM;
+		public static readonly int APOSTROPHE;
+		public static readonly int ACRONYM;
+		public static readonly int COMPANY;
+		public static readonly int EMAIL;
+		public static readonly int HOST;
+		public static readonly int NUM;
+		public static readonly int CJ;
+		/// <deprecated> this solves a bug where HOSTs that end with '.' are identified
+		/// as ACRONYMs.
+		/// </deprecated>
+        [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs")]
+		public static readonly int ACRONYM_DEP;
+		
+		public static readonly System.String[] TOKEN_TYPES;
+		
+		public int Yychar()
+		{
+			return yychar;
+		}
+
+        /*
+        * Resets the Tokenizer to a new Reader.
+        */
+        internal void Reset(System.IO.TextReader r)
+        {
+            // reset to default buffer size, if buffer has grown
+            if (zzBuffer.Length > ZZ_BUFFERSIZE)
+            {
+                zzBuffer = new char[ZZ_BUFFERSIZE];
+            }
+            Yyreset(r);
+        }
+		
+		/// <summary> Fills Lucene token with the current token text.</summary>
+		internal void  GetText(Token t)
+		{
+			t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+		}
+		
+		/// <summary> Fills TermAttribute with the current token text.</summary>
+		internal void  GetText(ITermAttribute t)
+		{
+			t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+		}
+		
+		
+		/// <summary> Creates a new scanner
+		/// There is also a java.io.InputStream version of this constructor.
+		/// 
+		/// </summary>
+        /// <param name="in_Renamed"> the java.io.Reader to read input from.
+		/// </param>
+		internal StandardTokenizerImpl(System.IO.TextReader in_Renamed)
+		{
+			this.zzReader = in_Renamed;
+		}
+		
+		/// <summary> Creates a new scanner.
+		/// There is also java.io.Reader version of this constructor.
+		/// 
+		/// </summary>
+        /// <param name="in_Renamed"> the java.io.Inputstream to read input from.
+		/// </param>
+		internal StandardTokenizerImpl(System.IO.Stream in_Renamed):this(new System.IO.StreamReader(in_Renamed, System.Text.Encoding.Default))
+		{
+		}
+		
+		/// <summary> Unpacks the compressed character translation table.
+		/// 
+		/// </summary>
+		/// <param name="packed">  the packed character translation table
+		/// </param>
+		/// <returns>         the unpacked character translation table
+		/// </returns>
+		private static char[] ZzUnpackCMap(System.String packed)
+		{
+			char[] map = new char[0x10000];
+			int i = 0; /* index in packed string  */
+			int j = 0; /* index in unpacked array */
+			while (i < 1154)
+			{
+				int count = packed[i++];
+				char value_Renamed = packed[i++];
+				do 
+					map[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return map;
+		}
+		
+		
+		/// <summary> Refills the input buffer.
+		/// </summary>
+		/// <returns><c>false</c>, iff there was new input.
+		/// 
+		/// </returns>
+		/// <exception cref="System.IO.IOException"> if any I/O-Error occurs
+		/// </exception>
+		private bool ZzRefill()
+		{
+			
+			/* first: make room (if you can) */
+			if (zzStartRead > 0)
+			{
+				Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
+				
+				/* translate stored positions */
+				zzEndRead -= zzStartRead;
+				zzCurrentPos -= zzStartRead;
+				zzMarkedPos -= zzStartRead;
+				zzPushbackPos -= zzStartRead;
+				zzStartRead = 0;
+			}
+			
+			/* is the buffer big enough? */
+			if (zzCurrentPos >= zzBuffer.Length)
+			{
+				/* if not: blow it up */
+				char[] newBuffer = new char[zzCurrentPos * 2];
+				Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length);
+				zzBuffer = newBuffer;
+			}
+			
+			/* finally: fill the buffer with new input */
+			int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+			
+			if (numRead <= 0)
+			{
+				return true;
+			}
+			else
+			{
+				zzEndRead += numRead;
+				return false;
+			}
+		}
+		
+		
+		/// <summary> Closes the input stream.</summary>
+		public void  Yyclose()
+		{
+			zzAtEOF = true; /* indicate end of file */
+			zzEndRead = zzStartRead; /* invalidate buffer    */
+			
+			if (zzReader != null)
+				zzReader.Close();
+		}
+		
+		
+		/// <summary> Resets the scanner to read from a new input stream.
+		/// Does not close the old reader.
+		/// 
+		/// All internal variables are reset, the old input stream 
+		/// <b>cannot</b> be reused (internal buffer is discarded and lost).
+		/// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+		/// 
+		/// </summary>
+		/// <param name="reader">  the new input stream 
+		/// </param>
+		public void  Yyreset(System.IO.TextReader reader)
+		{
+			zzReader = reader;
+			zzAtBOL = true;
+			zzAtEOF = false;
+			zzEndRead = zzStartRead = 0;
+			zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
+			yyline = yychar = yycolumn = 0;
+			zzLexicalState = YYINITIAL;
+		}
+		
+		
+		/// <summary> Returns the current lexical state.</summary>
+		public int Yystate()
+		{
+			return zzLexicalState;
+		}
+		
+		
+		/// <summary> Enters a new lexical state
+		/// 
+		/// </summary>
+		/// <param name="newState">the new lexical state
+		/// </param>
+		public void  Yybegin(int newState)
+		{
+			zzLexicalState = newState;
+		}
+		
+		
+		/// <summary> Returns the text matched by the current regular expression.</summary>
+		public System.String Yytext()
+		{
+			return new System.String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+		}
+		
+		
+		/// <summary> Returns the character at position <tt>pos</tt> from the 
+		/// matched text. 
+		/// 
+		/// It is equivalent to yytext().charAt(pos), but faster
+		/// 
+		/// </summary>
+		/// <param name="pos">the position of the character to fetch. 
+		/// A value from 0 to yylength()-1.
+		/// 
+		/// </param>
+		/// <returns> the character at position pos
+		/// </returns>
+		public char Yycharat(int pos)
+		{
+			return zzBuffer[zzStartRead + pos];
+		}
+		
+		
+		/// <summary> Returns the length of the matched text region.</summary>
+		public int Yylength()
+		{
+			return zzMarkedPos - zzStartRead;
+		}
+		
+		
+		/// <summary> Reports an error that occured while scanning.
+		/// 
+		/// In a wellformed scanner (no or only correct usage of 
+		/// yypushback(int) and a match-all fallback rule) this method 
+		/// will only be called with things that "Can't Possibly Happen".
+		/// If this method is called, something is seriously wrong
+		/// (e.g. a JFlex bug producing a faulty scanner etc.).
+		/// 
+		/// Usual syntax/scanner level error handling should be done
+		/// in error fallback rules.
+		/// 
+		/// </summary>
+		/// <param name="errorCode"> the code of the errormessage to display
+		/// </param>
+		private void  ZzScanError(int errorCode)
+		{
+			System.String message;
+			try
+			{
+				message = ZZ_ERROR_MSG[errorCode];
+			}
+			catch (System.IndexOutOfRangeException)
+			{
+				message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+			}
+			
+			throw new System.ApplicationException(message);
+		}
+		
+		
+		/// <summary> Pushes the specified amount of characters back into the input stream.
+		/// 
+		/// They will be read again by then next call of the scanning method
+		/// 
+		/// </summary>
+		/// <param name="number"> the number of characters to be read again.
+		/// This number must not be greater than yylength()!
+		/// </param>
+		public virtual void  Yypushback(int number)
+		{
+			if (number > Yylength())
+				ZzScanError(ZZ_PUSHBACK_2BIG);
+			
+			zzMarkedPos -= number;
+		}
+		
+		
+		/// <summary> Resumes scanning until the next regular expression is matched,
+		/// the end of input is encountered or an I/O-Error occurs.
+		/// 
+		/// </summary>
+		/// <returns>      the next token
+		/// </returns>
+		/// <exception cref="System.IO.IOException"> if any I/O-Error occurs
+		/// </exception>
+		public virtual int GetNextToken()
+		{
+			int zzInput;
+			int zzAction;
+			
+			// cached fields:
+			int zzCurrentPosL;
+			int zzMarkedPosL;
+			int zzEndReadL = zzEndRead;
+			char[] zzBufferL = zzBuffer;
+			char[] zzCMapL = ZZ_CMAP;
+			
+			int[] zzTransL = ZZ_TRANS;
+			int[] zzRowMapL = ZZ_ROWMAP;
+			int[] zzAttrL = ZZ_ATTRIBUTE;
+			
+			while (true)
+			{
+				zzMarkedPosL = zzMarkedPos;
+				
+				yychar += zzMarkedPosL - zzStartRead;
+				
+				zzAction = - 1;
+				
+				zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+				
+				zzState = zzLexicalState;
+				
+				
+				{
+					while (true)
+					{
+						
+						if (zzCurrentPosL < zzEndReadL)
+							zzInput = zzBufferL[zzCurrentPosL++];
+						else if (zzAtEOF)
+						{
+							zzInput = YYEOF;
+							goto zzForAction_brk;   // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+						}
+						else
+						{
+							// store back cached positions
+							zzCurrentPos = zzCurrentPosL;
+							zzMarkedPos = zzMarkedPosL;
+							bool eof = ZzRefill();
+							// get translated positions and possibly new buffer
+							zzCurrentPosL = zzCurrentPos;
+							zzMarkedPosL = zzMarkedPos;
+							zzBufferL = zzBuffer;
+							zzEndReadL = zzEndRead;
+							if (eof)
+							{
+								zzInput = YYEOF;
+								goto zzForAction_brk;   // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+							}
+							else
+							{
+								zzInput = zzBufferL[zzCurrentPosL++];
+							}
+						}
+						int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
+						if (zzNext == - 1)
+						{
+							goto zzForAction_brk;   // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+						}
+						zzState = zzNext;
+						
+						int zzAttributes = zzAttrL[zzState];
+						if ((zzAttributes & 1) == 1)
+						{
+							zzAction = zzState;
+							zzMarkedPosL = zzCurrentPosL;
+							if ((zzAttributes & 8) == 8)
+							{
+								goto zzForAction_brk;   // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+							}
+						}
+					}
+				}
+
+zzForAction_brk: ;  // {{Aroush-2.9}} this 'lable' maybe in the wrong place
+				
+				
+				// store back cached position
+				zzMarkedPos = zzMarkedPosL;
+				
+				switch (zzAction < 0?zzAction:ZZ_ACTION[zzAction])
+				{
+					
+					case 4: 
+					{
+						return HOST;
+					}
+					
+					case 11:  break;
+					
+					case 9: 
+					{
+						return ACRONYM;
+					}
+					
+					case 12:  break;
+					
+					case 8: 
+					{
+						return ACRONYM_DEP;
+					}
+					
+					case 13:  break;
+					
+					case 1: 
+						{
+							/* ignore */
+						}
+						goto case 14;
+					
+					case 14:  break;
+					
+					case 5: 
+					{
+						return NUM;
+					}
+					
+					case 15:  break;
+					
+					case 3: 
+					{
+						return CJ;
+					}
+					
+					case 16:  break;
+					
+					case 2: 
+					{
+						return ALPHANUM;
+					}
+					
+					case 17:  break;
+					
+					case 7: 
+					{
+						return COMPANY;
+					}
+					
+					case 18:  break;
+					
+					case 6: 
+					{
+						return APOSTROPHE;
+					}
+					
+					case 19:  break;
+					
+					case 10: 
+					{
+						return EMAIL;
+					}
+					
+					case 20:  break;
+					
+					default: 
+						if (zzInput == YYEOF && zzStartRead == zzCurrentPos)
+						{
+							zzAtEOF = true;
+							return YYEOF;
+						}
+						else
+						{
+							ZzScanError(ZZ_NO_MATCH);
+						}
+						break;
+					
+				}
+			}
+		}
+		static StandardTokenizerImpl()
+		{
+			ALPHANUM = StandardTokenizer.ALPHANUM;
+			APOSTROPHE = StandardTokenizer.APOSTROPHE;
+			ACRONYM = StandardTokenizer.ACRONYM;
+			COMPANY = StandardTokenizer.COMPANY;
+			EMAIL = StandardTokenizer.EMAIL;
+			HOST = StandardTokenizer.HOST;
+			NUM = StandardTokenizer.NUM;
+			CJ = StandardTokenizer.CJ;
+			ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
+			TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/StopAnalyzer.cs b/src/core/Analysis/StopAnalyzer.cs
new file mode 100644
index 0000000..aabe197
--- /dev/null
+++ b/src/core/Analysis/StopAnalyzer.cs
@@ -0,0 +1,141 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Filters <see cref="LetterTokenizer" /> with <see cref="LowerCaseFilter" /> and
+	/// <see cref="StopFilter" />.
+	/// 
+	/// <a name="version"/>
+	/// <p/>
+	/// You must specify the required <see cref="Version" /> compatibility when creating
+	/// StopAnalyzer:
+	/// <list type="bullet">
+	/// <item>As of 2.9, position increments are preserved</item>
+	/// </list>
+	/// </summary>
+	
+	public sealed class StopAnalyzer:Analyzer
+	{
+		private readonly ISet<string> stopWords;
+		private readonly bool enablePositionIncrements;
+
+		/// <summary>An unmodifiable set containing some common English words that are not usually useful
+		/// for searching.
+		/// </summary>
+        public static ISet<string> ENGLISH_STOP_WORDS_SET;
+		
+		/// <summary> Builds an analyzer which removes words in ENGLISH_STOP_WORDS.</summary>
+		public StopAnalyzer(Version matchVersion)
+		{
+			stopWords = ENGLISH_STOP_WORDS_SET;
+			enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+		}
+
+		/// <summary>Builds an analyzer with the stop words from the given set.</summary>
+		public StopAnalyzer(Version matchVersion, ISet<string> stopWords)
+		{
+			this.stopWords = stopWords;
+			enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+		}
+		
+		/// <summary> Builds an analyzer with the stop words from the given file.
+		/// 
+		/// </summary>
+		/// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
+		/// </seealso>
+		/// <param name="matchVersion">See <a href="#version">above</a>
+		/// </param>
+		/// <param name="stopwordsFile">File to load stop words from
+		/// </param>
+		public StopAnalyzer(Version matchVersion, System.IO.FileInfo stopwordsFile)
+		{
+			stopWords = WordlistLoader.GetWordSet(stopwordsFile);
+			enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+		}
+
+        /// <summary>Builds an analyzer with the stop words from the given reader. </summary>
+        /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
+        /// </seealso>
+        /// <param name="matchVersion">See <a href="#Version">above</a>
+        /// </param>
+        /// <param name="stopwords">Reader to load stop words from
+        /// </param>
+        public StopAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
+        {
+            stopWords = WordlistLoader.GetWordSet(stopwords);
+            enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+        }
+
+        /// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
+		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
+		}
+		
+		/// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
+		private class SavedStreams
+		{
+			public SavedStreams(StopAnalyzer enclosingInstance)
+			{
+				InitBlock(enclosingInstance);
+			}
+			private void  InitBlock(StopAnalyzer enclosingInstance)
+			{
+				this.enclosingInstance = enclosingInstance;
+			}
+			private StopAnalyzer enclosingInstance;
+			public StopAnalyzer Enclosing_Instance
+			{
+				get
+				{
+					return enclosingInstance;
+				}
+				
+			}
+			internal Tokenizer source;
+			internal TokenStream result;
+		}
+		
+		public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			var streams = (SavedStreams) PreviousTokenStream;
+			if (streams == null)
+			{
+				streams = new SavedStreams(this) {source = new LowerCaseTokenizer(reader)};
+				streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
+				PreviousTokenStream = streams;
+			}
+			else
+				streams.source.Reset(reader);
+			return streams.result;
+		}
+		static StopAnalyzer()
+		{
+			{
+				var stopWords = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
+				var stopSet = new CharArraySet(stopWords.Length, false);
+				stopSet.AddAll(stopWords);
+				ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet);
+			}
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/StopFilter.cs b/src/core/Analysis/StopFilter.cs
new file mode 100644
index 0000000..81b7dd0
--- /dev/null
+++ b/src/core/Analysis/StopFilter.cs
@@ -0,0 +1,178 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using QueryParser = Lucene.Net.QueryParsers.QueryParser;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Removes stop words from a token stream.</summary>
+	
+	public sealed class StopFilter:TokenFilter
+	{
+		private readonly CharArraySet stopWords;
+		private bool enablePositionIncrements = false;
+		
+		private readonly ITermAttribute termAtt;
+		private readonly IPositionIncrementAttribute posIncrAtt;
+		
+		/// <summary> Construct a token stream filtering the given input.
+		/// If <c>stopWords</c> is an instance of <see cref="CharArraySet" /> (true if
+		/// <c>makeStopSet()</c> was used to construct the set) it will be directly used
+		/// and <c>ignoreCase</c> will be ignored since <c>CharArraySet</c>
+		/// directly controls case sensitivity.
+		/// <p/>
+		/// If <c>stopWords</c> is not an instance of <see cref="CharArraySet" />,
+		/// a new CharArraySet will be constructed and <c>ignoreCase</c> will be
+		/// used to specify the case sensitivity of that set.
+		/// </summary>
+		/// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param>
+		/// <param name="input">Input TokenStream</param>
+		/// <param name="stopWords">A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords</param>
+        /// <param name="ignoreCase">if true, all words are lower cased first</param>
+        public StopFilter(bool enablePositionIncrements, TokenStream input, ISet<string> stopWords, bool ignoreCase)
+            : base(input)
+		{
+		    if (stopWords is CharArraySet)
+		    {
+		        this.stopWords = (CharArraySet) stopWords;
+		    }
+		    else
+		    {
+		        this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
+		        this.stopWords.AddAll(stopWords);
+		    }
+		    this.enablePositionIncrements = enablePositionIncrements;
+		    termAtt = AddAttribute<ITermAttribute>();
+            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+		}
+
+	    /// <summary> Constructs a filter which removes words from the input
+		/// TokenStream that are named in the Set.
+		/// </summary>
+		/// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param>
+		///  <param name="in">Input stream</param>
+		/// <param name="stopWords">A Set of strings or char[] or any other ToString()-able set representing the stopwords</param>
+		/// <seealso cref="MakeStopSet(String[])"/>
+		public StopFilter(bool enablePositionIncrements, TokenStream @in, ISet<string> stopWords)
+			: this(enablePositionIncrements, @in, stopWords, false)
+		{ }
+		
+		/// <summary> Builds a Set from an array of stop words,
+		/// appropriate for passing into the StopFilter constructor.
+		/// This permits this stopWords construction to be cached once when
+		/// an Analyzer is constructed.
+		/// 
+		/// </summary>
+		/// <seealso cref="MakeStopSet(String[], bool)">passing false to ignoreCase</seealso>
+		public static ISet<string> MakeStopSet(params string[] stopWords)
+		{
+			return MakeStopSet(stopWords, false);
+		}
+		
+		/// <summary> Builds a Set from an array of stop words,
+		/// appropriate for passing into the StopFilter constructor.
+		/// This permits this stopWords construction to be cached once when
+		/// an Analyzer is constructed.
+		/// </summary>
+		/// <param name="stopWords">A list of strings or char[] or any other ToString()-able list representing the stop words</param>
+		/// <seealso cref="MakeStopSet(String[], bool)">passing false to ignoreCase</seealso>
+		public static ISet<string> MakeStopSet(IList<object> stopWords)
+		{
+			return MakeStopSet(stopWords, false);
+		}
+		
+		/// <summary></summary>
+		/// <param name="stopWords">An array of stopwords</param>
+		/// <param name="ignoreCase">If true, all words are lower cased first.</param>
+		/// <returns> a Set containing the words</returns>
+		public static ISet<string> MakeStopSet(string[] stopWords, bool ignoreCase)
+		{
+			var stopSet = new CharArraySet(stopWords.Length, ignoreCase);
+		    stopSet.AddAll(stopWords);
+			return stopSet;
+		}
+		
+		/// <summary> </summary>
+        /// <param name="stopWords">A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+		/// <param name="ignoreCase">if true, all words are lower cased first</param>
+		/// <returns>A Set (<see cref="CharArraySet"/>)containing the words</returns>
+		public static ISet<string> MakeStopSet(IList<object> stopWords, bool ignoreCase)
+		{
+			var stopSet = new CharArraySet(stopWords.Count, ignoreCase);
+            foreach(var word in stopWords)
+                stopSet.Add(word.ToString());
+			return stopSet;
+		}
+		
+		/// <summary> Returns the next input Token whose term() is not a stop word.</summary>
+		public override bool IncrementToken()
+		{
+			// return the first non-stop word found
+			int skippedPositions = 0;
+			while (input.IncrementToken())
+			{
+				if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength()))
+				{
+					if (enablePositionIncrements)
+					{
+						posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+					}
+					return true;
+				}
+				skippedPositions += posIncrAtt.PositionIncrement;
+			}
+			// reached EOS -- return false
+			return false;
+		}
+		
+		/// <summary> Returns version-dependent default for enablePositionIncrements. Analyzers
+		/// that embed StopFilter use this method when creating the StopFilter. Prior
+		/// to 2.9, this returns false. On 2.9 or later, it returns true.
+		/// </summary>
+		public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
+		{
+            return matchVersion.OnOrAfter(Version.LUCENE_29);
+		}
+
+	    /// <summary> If <c>true</c>, this StopFilter will preserve
+	    /// positions of the incoming tokens (ie, accumulate and
+	    /// set position increments of the removed stop tokens).
+	    /// Generally, <c>true</c> is best as it does not
+	    /// lose information (positions of the original tokens)
+	    /// during indexing.
+	    /// 
+	    /// <p/> When set, when a token is stopped
+	    /// (omitted), the position increment of the following
+	    /// token is incremented.
+	    /// 
+	    /// <p/> <b>NOTE</b>: be sure to also
+	    /// set <see cref="QueryParser.EnablePositionIncrements" /> if
+	    /// you use QueryParser to create queries.
+	    /// </summary>
+	    public bool EnablePositionIncrements
+	    {
+	        get { return enablePositionIncrements; }
+	        set { enablePositionIncrements = value; }
+	    }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/TeeSinkTokenFilter.cs b/src/core/Analysis/TeeSinkTokenFilter.cs
new file mode 100644
index 0000000..bec605e
--- /dev/null
+++ b/src/core/Analysis/TeeSinkTokenFilter.cs
@@ -0,0 +1,266 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Attribute = Lucene.Net.Util.Attribute;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> This TokenFilter provides the ability to set aside attribute states
+	/// that have already been analyzed.  This is useful in situations where multiple fields share
+	/// many common analysis steps and then go their separate ways.
+	/// <p/>
+	/// It is also useful for doing things like entity extraction or proper noun analysis as
+	/// part of the analysis workflow and saving off those tokens for use in another field.
+	/// 
+	/// <code>
+	/// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+	/// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+	/// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+	/// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+	/// source2.addSinkTokenStream(sink1);
+	/// source2.addSinkTokenStream(sink2);
+	/// TokenStream final1 = new LowerCaseFilter(source1);
+	/// TokenStream final2 = source2;
+	/// TokenStream final3 = new EntityDetect(sink1);
+	/// TokenStream final4 = new URLDetect(sink2);
+	/// d.add(new Field("f1", final1));
+	/// d.add(new Field("f2", final2));
+	/// d.add(new Field("f3", final3));
+	/// d.add(new Field("f4", final4));
+	/// </code>
+	/// In this example, <c>sink1</c> and <c>sink2</c> will both get tokens from both
+	/// <c>reader1</c> and <c>reader2</c> after whitespace tokenizer
+	/// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+	/// It is important, that tees are consumed before sinks (in the above example, the field names must be
+	/// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+	/// add another sink and then pass all tokens to the sinks at once using <see cref="ConsumeAllTokens" />.
+	/// This TokenFilter is exhausted after this. In the above example, change
+	/// the example above to:
+	/// <code>
+	/// ...
+	/// TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+	/// TokenStream final2 = source2.newSinkTokenStream();
+	/// sink1.consumeAllTokens();
+	/// sink2.consumeAllTokens();
+	/// ...
+	/// </code>
+	/// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+	/// <p/>Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+	/// </summary>
+	public sealed class TeeSinkTokenFilter:TokenFilter
+	{
+		public class AnonymousClassSinkFilter:SinkFilter
+		{
+			public override bool Accept(AttributeSource source)
+			{
+				return true;
+			}
+		}
+		private readonly LinkedList<WeakReference> sinks = new LinkedList<WeakReference>();
+		
+		/// <summary> Instantiates a new TeeSinkTokenFilter.</summary>
+		public TeeSinkTokenFilter(TokenStream input):base(input)
+		{
+		}
+		
+		/// <summary> Returns a new <see cref="SinkTokenStream" /> that receives all tokens consumed by this stream.</summary>
+		public SinkTokenStream NewSinkTokenStream()
+		{
+			return NewSinkTokenStream(ACCEPT_ALL_FILTER);
+		}
+		
+		/// <summary> Returns a new <see cref="SinkTokenStream" /> that receives all tokens consumed by this stream
+		/// that pass the supplied filter.
+		/// </summary>
+		/// <seealso cref="SinkFilter">
+		/// </seealso>
+		public SinkTokenStream NewSinkTokenStream(SinkFilter filter)
+		{
+			var sink = new SinkTokenStream(this.CloneAttributes(), filter);
+			sinks.AddLast(new WeakReference(sink));
+			return sink;
+		}
+		
+		/// <summary> Adds a <see cref="SinkTokenStream" /> created by another <c>TeeSinkTokenFilter</c>
+		/// to this one. The supplied stream will also receive all consumed tokens.
+		/// This method can be used to pass tokens from two different tees to one sink.
+		/// </summary>
+		public void  AddSinkTokenStream(SinkTokenStream sink)
+		{
+			// check that sink has correct factory
+			if (!this.Factory.Equals(sink.Factory))
+			{
+				throw new System.ArgumentException("The supplied sink is not compatible to this tee");
+			}
+			// add eventually missing attribute impls to the existing sink
+            foreach (var impl in this.CloneAttributes().GetAttributeImplsIterator())
+            {
+                sink.AddAttributeImpl(impl);
+            }
+			sinks.AddLast(new WeakReference(sink));
+		}
+		
+		/// <summary> <c>TeeSinkTokenFilter</c> passes all tokens to the added sinks
+		/// when itself is consumed. To be sure, that all tokens from the input
+		/// stream are passed to the sinks, you can call this methods.
+		/// This instance is exhausted after this, but all sinks are instant available.
+		/// </summary>
+		public void  ConsumeAllTokens()
+		{
+            while (IncrementToken())
+            {
+            }
+		}
+		
+		public override bool IncrementToken()
+		{
+			if (input.IncrementToken())
+			{
+				// capture state lazily - maybe no SinkFilter accepts this state
+				State state = null;
+				foreach(WeakReference wr in sinks)
+				{
+				    var sink = (SinkTokenStream)wr.Target;
+					if (sink != null)
+					{
+						if (sink.Accept(this))
+						{
+							if (state == null)
+							{
+								state = this.CaptureState();
+							}
+							sink.AddState(state);
+						}
+					}
+				}
+				return true;
+			}
+			
+			return false;
+		}
+		
+		public override void  End()
+		{
+			base.End();
+			State finalState = CaptureState();
+			foreach(WeakReference wr in sinks)
+			{
+                var sink = (SinkTokenStream)wr.Target;
+				if (sink != null)
+				{
+					sink.SetFinalState(finalState);
+				}
+			}
+		}
+		
+		/// <summary> A filter that decides which <see cref="AttributeSource" /> states to store in the sink.</summary>
+		public abstract class SinkFilter
+		{
+			/// <summary> Returns true, iff the current state of the passed-in <see cref="AttributeSource" /> shall be stored
+			/// in the sink. 
+			/// </summary>
+			public abstract bool Accept(AttributeSource source);
+			
+			/// <summary> Called by <see cref="SinkTokenStream.Reset()" />. This method does nothing by default
+			/// and can optionally be overridden.
+			/// </summary>
+			public virtual void Reset()
+			{
+				// nothing to do; can be overridden
+			}
+		}
+		
+		public sealed class SinkTokenStream : TokenStream
+		{
+            private readonly LinkedList<State> cachedStates = new LinkedList<State>();
+			private State finalState;
+			private IEnumerator<AttributeSource.State> it = null;
+			private readonly SinkFilter filter;
+
+			internal SinkTokenStream(AttributeSource source, SinkFilter filter)
+                : base(source)
+			{
+				this.filter = filter;
+			}
+			
+			internal /*private*/ bool Accept(AttributeSource source)
+			{
+				return filter.Accept(source);
+			}
+			
+			internal /*private*/ void  AddState(AttributeSource.State state)
+			{
+				if (it != null)
+				{
+					throw new System.SystemException("The tee must be consumed before sinks are consumed.");
+				}
+				cachedStates.AddLast(state);
+			}
+			
+			internal /*private*/ void  SetFinalState(AttributeSource.State finalState)
+			{
+				this.finalState = finalState;
+			}
+			
+			public override bool IncrementToken()
+			{
+				// lazy init the iterator
+				if (it == null)
+				{
+					it = cachedStates.GetEnumerator();
+				}
+				
+				if (!it.MoveNext())
+				{
+					return false;
+				}
+				
+				State state = it.Current;
+				RestoreState(state);
+				return true;
+			}
+			
+			public override void  End()
+			{
+				if (finalState != null)
+				{
+					RestoreState(finalState);
+				}
+			}
+			
+			public override void  Reset()
+			{
+				it = cachedStates.GetEnumerator();
+			}
+
+		    protected override void Dispose(bool disposing)
+		    {
+		        // Do nothing.
+		    }
+		}
+		
+		private static readonly SinkFilter ACCEPT_ALL_FILTER;
+		static TeeSinkTokenFilter()
+		{
+			ACCEPT_ALL_FILTER = new AnonymousClassSinkFilter();
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Token.cs b/src/core/Analysis/Token.cs
new file mode 100644
index 0000000..3357f34
--- /dev/null
+++ b/src/core/Analysis/Token.cs
@@ -0,0 +1,852 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Payload = Lucene.Net.Index.Payload;
+using TermPositions = Lucene.Net.Index.TermPositions;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>A Token is an occurrence of a term from the text of a field.  It consists of
+	/// a term's text, the start and end offset of the term in the text of the field,
+	/// and a type string.
+	/// <p/>
+	/// The start and end offsets permit applications to re-associate a token with
+	/// its source text, e.g., to display highlighted query terms in a document
+	/// browser, or to show matching text fragments in a <abbr
+	/// title="KeyWord In Context">KWIC</abbr> display, etc.
+	/// <p/>
+	/// The type is a string, assigned by a lexical analyzer
+	/// (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+	/// belongs to.  For example an end of sentence marker token might be implemented
+	/// with type "eos".  The default token type is "word".  
+	/// <p/>
+	/// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
+	/// length byte array. Use <see cref="TermPositions.PayloadLength" /> and 
+	/// <see cref="TermPositions.GetPayload(byte[], int)" /> to retrieve the payloads from the index.
+	/// </summary>
+	/// <summary><br/><br/>
+	/// </summary>
+	/// <summary><p/><b>NOTE:</b> As of 2.9, Token implements all <see cref="IAttribute" /> interfaces
+	/// that are part of core Lucene and can be found in the <see cref="Lucene.Net.Analysis.Tokenattributes"/> namespace.
+	/// Even though it is not necessary to use Token anymore, with the new TokenStream API it can
+	/// be used as convenience class that implements all <see cref="IAttribute" />s, which is especially useful
+	/// to easily switch from the old to the new TokenStream API.
+	/// <br/><br/>
+	/// <p/>Tokenizers and TokenFilters should try to re-use a Token instance when
+	/// possible for best performance, by implementing the
+	/// <see cref="TokenStream.IncrementToken()" /> API.
+	/// Failing that, to create a new Token you should first use
+	/// one of the constructors that starts with null text.  To load
+	/// the token from a char[] use <see cref="SetTermBuffer(char[], int, int)" />.
+	/// To load from a String use <see cref="SetTermBuffer(String)" /> or <see cref="SetTermBuffer(String, int, int)" />.
+	/// Alternatively you can get the Token's termBuffer by calling either <see cref="TermBuffer()" />,
+	/// if you know that your text is shorter than the capacity of the termBuffer
+	/// or <see cref="ResizeTermBuffer(int)" />, if there is any possibility
+	/// that you may need to grow the buffer. Fill in the characters of your term into this
+    /// buffer, with <see cref="string.ToCharArray(int, int)" /> if loading from a string,
+	/// or with <see cref="Array.Copy(Array, long, Array, long, long)" />, and finally call <see cref="SetTermLength(int)" /> to
+	/// set the length of the term text.  See <a target="_top"
+	/// href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
+	/// for details.<p/>
+	/// <p/>Typical Token reuse patterns:
+	/// <list type="bullet">
+	/// <item> Copying text from a string (type is reset to <see cref="DEFAULT_TYPE" /> if not
+	/// specified):<br/>
+	/// <code>
+	/// return reusableToken.reinit(string, startOffset, endOffset[, type]);
+	/// </code>
+	/// </item>
+	/// <item> Copying some text from a string (type is reset to <see cref="DEFAULT_TYPE" />
+	/// if not specified):<br/>
+    /// <code>
+	/// return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+    /// </code>
+	/// </item>
+	/// <item> Copying text from char[] buffer (type is reset to <see cref="DEFAULT_TYPE" />
+	/// if not specified):<br/>
+    /// <code>
+	/// return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+    /// </code>
+	/// </item>
+	/// <item> Copying some text from a char[] buffer (type is reset to
+	/// <see cref="DEFAULT_TYPE" /> if not specified):<br/>
+    /// <code>
+	/// return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+    /// </code>
+	/// </item>
+	/// <item> Copying from one one Token to another (type is reset to
+	/// <see cref="DEFAULT_TYPE" /> if not specified):<br/>
+    /// <code>
+	/// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+    /// </code>
+	/// </item>
+	/// </list>
+	/// A few things to note:
+	/// <list type="bullet">
+	/// <item>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</item>
+	/// <item>Because <c>TokenStreams</c> can be chained, one cannot assume that the <c>Token's</c> current type is correct.</item>
+	/// <item>The startOffset and endOffset represent the start and offset in the
+	/// source text, so be careful in adjusting them.</item>
+	/// <item>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</item>
+	/// </list>
+	/// <p/>
+	/// </summary>
+	/// <seealso cref="Lucene.Net.Index.Payload">
+	/// </seealso>
+	[Serializable]
+	public class Token : Attribute, ITermAttribute, ITypeAttribute, IPositionIncrementAttribute, IFlagsAttribute, IOffsetAttribute, IPayloadAttribute
+	{
+		public const String DEFAULT_TYPE = "word";
+
+		private const int MIN_BUFFER_SIZE = 10;
+
+		private char[] termBuffer;
+		private int termLength;
+		private int startOffset, endOffset;
+		private string type = DEFAULT_TYPE;
+		private int flags;
+		private Payload payload;
+		private int positionIncrement = 1;
+		
+		/// <summary>Constructs a Token will null text. </summary>
+		public Token()
+		{
+		}
+		
+		/// <summary>Constructs a Token with null text and start &amp; end
+		/// offsets.
+		/// </summary>
+		/// <param name="start">start offset in the source text</param>
+		/// <param name="end">end offset in the source text</param>
+		public Token(int start, int end)
+		{
+			startOffset = start;
+			endOffset = end;
+		}
+		
+		/// <summary>Constructs a Token with null text and start &amp; end
+		/// offsets plus the Token type.
+		/// </summary>
+		/// <param name="start">start offset in the source text</param>
+		/// <param name="end">end offset in the source text</param>
+		/// <param name="typ">the lexical type of this Token</param>
+		public Token(int start, int end, String typ)
+		{
+			startOffset = start;
+			endOffset = end;
+			type = typ;
+		}
+		
+		/// <summary> Constructs a Token with null text and start &amp; end
+		/// offsets plus flags. NOTE: flags is EXPERIMENTAL.
+		/// </summary>
+		/// <param name="start">start offset in the source text</param>
+		/// <param name="end">end offset in the source text</param>
+		/// <param name="flags">The bits to set for this token</param>
+		public Token(int start, int end, int flags)
+		{
+			startOffset = start;
+			endOffset = end;
+			this.flags = flags;
+		}
+		
+		/// <summary>Constructs a Token with the given term text, and start
+		/// &amp; end offsets.  The type defaults to "word."
+		/// <b>NOTE:</b> for better indexing speed you should
+		/// instead use the char[] termBuffer methods to set the
+		/// term text.
+		/// </summary>
+		/// <param name="text">term text</param>
+		/// <param name="start">start offset</param>
+		/// <param name="end">end offset</param>
+		public Token(String text, int start, int end)
+		{
+		    SetTermBuffer(text);
+			startOffset = start;
+			endOffset = end;
+		}
+		
+		/// <summary>Constructs a Token with the given text, start and end
+		/// offsets, &amp; type.  <b>NOTE:</b> for better indexing
+		/// speed you should instead use the char[] termBuffer
+		/// methods to set the term text.
+		/// </summary>
+		/// <param name="text">term text</param>
+		/// <param name="start">start offset</param>
+		/// <param name="end">end offset</param>
+		/// <param name="typ">token type</param>
+		public Token(System.String text, int start, int end, System.String typ)
+		{
+		    SetTermBuffer(text);
+			startOffset = start;
+			endOffset = end;
+			type = typ;
+		}
+		
+		/// <summary>  Constructs a Token with the given text, start and end
+		/// offsets, &amp; type.  <b>NOTE:</b> for better indexing
+		/// speed you should instead use the char[] termBuffer
+		/// methods to set the term text.
+		/// </summary>
+		/// <param name="text"></param>
+		/// <param name="start"></param>
+		/// <param name="end"></param>
+		/// <param name="flags">token type bits</param>
+		public Token(System.String text, int start, int end, int flags)
+		{
+		    SetTermBuffer(text);
+			startOffset = start;
+			endOffset = end;
+			this.flags = flags;
+		}
+		
+		/// <summary>  Constructs a Token with the given term buffer (offset
+		/// &amp; length), start and end
+		/// offsets
+		/// </summary>
+		/// <param name="startTermBuffer"></param>
+		/// <param name="termBufferOffset"></param>
+		/// <param name="termBufferLength"></param>
+		/// <param name="start"></param>
+		/// <param name="end"></param>
+		public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end)
+		{
+			SetTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+			startOffset = start;
+			endOffset = end;
+		}
+
+	    /// <summary>Set the position increment.  This determines the position of this token
+	    /// relative to the previous Token in a <see cref="TokenStream" />, used in phrase
+	    /// searching.
+	    /// 
+	    /// <p/>The default value is one.
+	    /// 
+	    /// <p/>Some common uses for this are:<list>
+	    /// 
+	    /// <item>Set it to zero to put multiple terms in the same position.  This is
+	    /// useful if, e.g., a word has multiple stems.  Searches for phrases
+	    /// including either stem will match.  In this case, all but the first stem's
+	    /// increment should be set to zero: the increment of the first instance
+	    /// should be one.  Repeating a token with an increment of zero can also be
+	    /// used to boost the scores of matches on that token.</item>
+	    /// 
+	    /// <item>Set it to values greater than one to inhibit exact phrase matches.
+	    /// If, for example, one does not want phrases to match across removed stop
+	    /// words, then one could build a stop word filter that removes stop words and
+	    /// also sets the increment to the number of stop words removed before each
+	    /// non-stop word.  Then exact phrase queries will only match when the terms
+	    /// occur with no intervening stop words.</item>
+	    /// 
+	    /// </list>
+	    /// </summary>
+	    /// <value> the distance from the prior term </value>
+	    /// <seealso cref="Lucene.Net.Index.TermPositions">
+	    /// </seealso>
+	    public virtual int PositionIncrement
+	    {
+	        set
+	        {
+	            if (value < 0)
+	                throw new System.ArgumentException("Increment must be zero or greater: " + value);
+	            this.positionIncrement = value;
+	        }
+	        get { return positionIncrement; }
+	    }
+
+	    /// <summary>Returns the Token's term text.
+	    /// 
+	    /// This method has a performance penalty
+	    /// because the text is stored internally in a char[].  If
+	    /// possible, use <see cref="TermBuffer()" /> and <see cref="TermLength()"/>
+	    /// directly instead.  If you really need a
+	    /// String, use this method, which is nothing more than
+	    /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+	    /// </summary>
+	    public string Term
+	    {
+	        get
+	        {
+	            InitTermBuffer();
+	            return new System.String(termBuffer, 0, termLength);
+	        }
+	    }
+
+	    /// <summary>Copies the contents of buffer, starting at offset for
+		/// length characters, into the termBuffer array.
+		/// </summary>
+		/// <param name="buffer">the buffer to copy</param>
+		/// <param name="offset">the index in the buffer of the first character to copy</param>
+		/// <param name="length">the number of characters to copy</param>
+		public void  SetTermBuffer(char[] buffer, int offset, int length)
+		{
+			GrowTermBuffer(length);
+			Array.Copy(buffer, offset, termBuffer, 0, length);
+			termLength = length;
+		}
+		
+		/// <summary>Copies the contents of buffer into the termBuffer array.</summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		public void  SetTermBuffer(System.String buffer)
+		{
+			int length = buffer.Length;
+			GrowTermBuffer(length);
+			TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
+			termLength = length;
+		}
+		
+		/// <summary>Copies the contents of buffer, starting at offset and continuing
+		/// for length characters, into the termBuffer array.
+		/// </summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		/// <param name="offset">the index in the buffer of the first character to copy
+		/// </param>
+		/// <param name="length">the number of characters to copy
+		/// </param>
+		public void  SetTermBuffer(System.String buffer, int offset, int length)
+		{
+			System.Diagnostics.Debug.Assert(offset <= buffer.Length);
+			System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
+			GrowTermBuffer(length);
+			TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
+			termLength = length;
+		}
+		
+		/// <summary>Returns the internal termBuffer character array which
+		/// you can then directly alter.  If the array is too
+		/// small for your token, use <see cref="ResizeTermBuffer(int)" />
+		/// to increase it.  After
+		/// altering the buffer be sure to call <see cref="SetTermLength" />
+		/// to record the number of valid
+		/// characters that were placed into the termBuffer. 
+		/// </summary>
+		public char[] TermBuffer()
+		{
+			InitTermBuffer();
+			return termBuffer;
+		}
+		
+		/// <summary>Grows the termBuffer to at least size newSize, preserving the
+		/// existing content. Note: If the next operation is to change
+		/// the contents of the term buffer use
+		/// <see cref="SetTermBuffer(char[], int, int)" />,
+		/// <see cref="SetTermBuffer(String)" />, or
+		/// <see cref="SetTermBuffer(String, int, int)" />
+		/// to optimally combine the resize with the setting of the termBuffer.
+		/// </summary>
+		/// <param name="newSize">minimum size of the new termBuffer
+		/// </param>
+		/// <returns> newly created termBuffer with length >= newSize
+		/// </returns>
+		public virtual char[] ResizeTermBuffer(int newSize)
+		{
+			if (termBuffer == null)
+			{
+                termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];
+			}
+			else
+			{
+				if (termBuffer.Length < newSize)
+				{
+					// Not big enough; create a new array with slight
+					// over allocation and preserve content
+					var newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+					Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
+					termBuffer = newCharBuffer;
+				}
+			}
+			return termBuffer;
+		}
+		
+		/// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content.
+		/// its always used in places that set the content 
+		/// </summary>
+		/// <param name="newSize">minimum size of the buffer
+		/// </param>
+		private void  GrowTermBuffer(int newSize)
+		{
+			if (termBuffer == null)
+			{
+				// The buffer is always at least MIN_BUFFER_SIZE    
+				termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+			}
+			else
+			{
+				if (termBuffer.Length < newSize)
+				{
+					// Not big enough; create a new array with slight
+					// over allocation:
+					termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+				}
+			}
+		}
+		
+		private void  InitTermBuffer()
+		{
+			if (termBuffer == null)
+			{
+                termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
+                termLength = 0;
+			}
+		}
+		
+		/// <summary>Return number of valid characters (length of the term)
+		/// in the termBuffer array. 
+		/// </summary>
+		public int TermLength()
+		{
+			InitTermBuffer();
+			return termLength;
+		}
+		
+		/// <summary>Set number of valid characters (length of the term) in
+		/// the termBuffer array. Use this to truncate the termBuffer
+		/// or to synchronize with external manipulation of the termBuffer.
+		/// Note: to grow the size of the array,
+		/// use <see cref="ResizeTermBuffer(int)" /> first.
+		/// </summary>
+		/// <param name="length">the truncated length
+		/// </param>
+		public void  SetTermLength(int length)
+		{
+			InitTermBuffer();
+			if (length > termBuffer.Length)
+				throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
+			termLength = length;
+		}
+
+	    /// <summary>Gets or sets this Token's starting offset, the position of the first character
+	    /// corresponding to this token in the source text.
+	    /// Note that the difference between endOffset() and startOffset() may not be
+	    /// equal to <see cref="TermLength"/>, as the term text may have been altered by a
+	    /// stemmer or some other filter. 
+	    /// </summary>
+	    public virtual int StartOffset
+	    {
+	        get { return startOffset; }
+	        set { this.startOffset = value; }
+	    }
+
+	    /// <summary>Gets or sets this Token's ending offset, one greater than the position of the
+	    /// last character corresponding to this token in the source text. The length
+	    /// of the token in the source text is (endOffset - startOffset). 
+	    /// </summary>
+	    public virtual int EndOffset
+	    {
+	        get { return endOffset; }
+	        set { this.endOffset = value; }
+	    }
+
+	    /// <summary>Set the starting and ending offset.
+		/// See StartOffset() and EndOffset()
+		/// </summary>
+		public virtual void  SetOffset(int startOffset, int endOffset)
+		{
+			this.startOffset = startOffset;
+			this.endOffset = endOffset;
+		}
+
+	    /// <summary>Returns this Token's lexical type.  Defaults to "word". </summary>
+	    public string Type
+	    {
+	        get { return type; }
+	        set { this.type = value; }
+	    }
+
+	    /// <summary> EXPERIMENTAL:  While we think this is here to stay, we may want to change it to be a long.
+	    /// <p/>
+	    /// 
+	    /// Get the bitset for any bits that have been set.  This is completely distinct from <see cref="Type()" />, although they do share similar purposes.
+	    /// The flags can be used to encode information about the token for use by other <see cref="TokenFilter"/>s.
+	    /// 
+	    /// 
+	    /// </summary>
+	    /// <value> The bits </value>
+	    public virtual int Flags
+	    {
+	        get { return flags; }
+	        set { flags = value; }
+	    }
+
+	    /// <summary> Returns this Token's payload.</summary>
+	    public virtual Payload Payload
+	    {
+	        get { return payload; }
+	        set { payload = value; }
+	    }
+
+	    public override String ToString()
+		{
+			var sb = new System.Text.StringBuilder();
+			sb.Append('(');
+			InitTermBuffer();
+			if (termBuffer == null)
+				sb.Append("null");
+			else
+				sb.Append(termBuffer, 0, termLength);
+			sb.Append(',').Append(startOffset).Append(',').Append(endOffset);
+			if (!type.Equals("word"))
+				sb.Append(",type=").Append(type);
+			if (positionIncrement != 1)
+				sb.Append(",posIncr=").Append(positionIncrement);
+			sb.Append(')');
+			return sb.ToString();
+		}
+		
+		/// <summary>Resets the term text, payload, flags, and positionIncrement,
+		/// startOffset, endOffset and token type to default.
+		/// </summary>
+		public override void  Clear()
+		{
+			payload = null;
+			// Leave termBuffer to allow re-use
+			termLength = 0;
+			positionIncrement = 1;
+			flags = 0;
+			startOffset = endOffset = 0;
+			type = DEFAULT_TYPE;
+		}
+		
+		public override System.Object Clone()
+		{
+			var t = (Token) base.Clone();
+			// Do a deep clone
+			if (termBuffer != null)
+			{
+				t.termBuffer = new char[termBuffer.Length];
+				termBuffer.CopyTo(t.termBuffer, 0);
+			}
+			if (payload != null)
+			{
+				t.payload = (Payload) payload.Clone();
+			}
+			return t;
+		}
+		
+		/// <summary>Makes a clone, but replaces the term buffer &amp;
+		/// start/end offset in the process.  This is more
+		/// efficient than doing a full clone (and then calling
+		/// setTermBuffer) because it saves a wasted copy of the old
+		/// termBuffer. 
+		/// </summary>
+		public virtual Token Clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+		{
+			var t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)
+			        	{positionIncrement = positionIncrement, flags = flags, type = type};
+			if (payload != null)
+				t.payload = (Payload) payload.Clone();
+			return t;
+		}
+		
+		public  override bool Equals(Object obj)
+		{
+			if (obj == this)
+				return true;
+
+			var other = obj as Token;
+			if (other == null)
+				return false;
+			
+			InitTermBuffer();
+			other.InitTermBuffer();
+
+			if (termLength == other.termLength && startOffset == other.startOffset && endOffset == other.endOffset &&
+			    flags == other.flags && positionIncrement == other.positionIncrement && SubEqual(type, other.type) &&
+			    SubEqual(payload, other.payload))
+			{
+				for (int i = 0; i < termLength; i++)
+					if (termBuffer[i] != other.termBuffer[i])
+						return false;
+				return true;
+			}
+			return false;
+		}
+		
+		private bool SubEqual(System.Object o1, System.Object o2)
+		{
+			if (o1 == null)
+				return o2 == null;
+			return o1.Equals(o2);
+		}
+
+		public override int GetHashCode()
+		{
+			InitTermBuffer();
+			int code = termLength;
+			code = code * 31 + startOffset;
+			code = code * 31 + endOffset;
+			code = code * 31 + flags;
+			code = code * 31 + positionIncrement;
+			code = code * 31 + type.GetHashCode();
+			code = (payload == null?code:code * 31 + payload.GetHashCode());
+			code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
+			return code;
+		}
+		
+		// like clear() but doesn't clear termBuffer/text
+		private void  ClearNoTermBuffer()
+		{
+			payload = null;
+			positionIncrement = 1;
+			flags = 0;
+			startOffset = endOffset = 0;
+			type = DEFAULT_TYPE;
+		}
+		
+		/// <summary>Shorthand for calling <see cref="Clear" />,
+		/// <see cref="SetTermBuffer(char[], int, int)" />,
+		/// <see cref="StartOffset" />,
+		/// <see cref="EndOffset" />,
+		/// <see cref="Type" />
+		/// </summary>
+		/// <returns> this Token instance 
+		/// </returns>
+		public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
+		{
+			ClearNoTermBuffer();
+			payload = null;
+			positionIncrement = 1;
+			SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+			startOffset = newStartOffset;
+			endOffset = newEndOffset;
+			type = newType;
+			return this;
+		}
+		
+		/// <summary>Shorthand for calling <see cref="Clear" />,
+		/// <see cref="SetTermBuffer(char[], int, int)" />,
+		/// <see cref="StartOffset" />,
+		/// <see cref="EndOffset" />
+		/// <see cref="Type" /> on Token.DEFAULT_TYPE
+		/// </summary>
+		/// <returns> this Token instance 
+		/// </returns>
+		public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+		{
+			ClearNoTermBuffer();
+			SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+			startOffset = newStartOffset;
+			endOffset = newEndOffset;
+			type = DEFAULT_TYPE;
+			return this;
+		}
+		
+		/// <summary>Shorthand for calling <see cref="Clear" />,
+		/// <see cref="SetTermBuffer(String)" />,
+		/// <see cref="StartOffset" />,
+		/// <see cref="EndOffset" />
+		/// <see cref="Type" />
+		/// </summary>
+		/// <returns> this Token instance 
+		/// </returns>
+		public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset, System.String newType)
+		{
+			ClearNoTermBuffer();
+			SetTermBuffer(newTerm);
+			startOffset = newStartOffset;
+			endOffset = newEndOffset;
+			type = newType;
+			return this;
+		}
+		
+		/// <summary>Shorthand for calling <see cref="Clear" />,
+		/// <see cref="SetTermBuffer(String, int, int)" />,
+		/// <see cref="StartOffset" />,
+		/// <see cref="EndOffset" />
+		/// <see cref="Type" />
+		/// </summary>
+		/// <returns> this Token instance 
+		/// </returns>
+		public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
+		{
+			ClearNoTermBuffer();
+			SetTermBuffer(newTerm, newTermOffset, newTermLength);
+			startOffset = newStartOffset;
+			endOffset = newEndOffset;
+			type = newType;
+			return this;
+		}
+		
+		/// <summary>Shorthand for calling <see cref="Clear" />,
+		/// <see cref="SetTermBuffer(String)" />,
+		/// <see cref="StartOffset" />,
+		/// <see cref="EndOffset" />
+		/// <see cref="Type" /> on Token.DEFAULT_TYPE
+		/// </summary>
+		/// <returns> this Token instance 
+		/// </returns>
+		public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset)
+		{
+			ClearNoTermBuffer();
+			SetTermBuffer(newTerm);
+			startOffset = newStartOffset;
+			endOffset = newEndOffset;
+			type = DEFAULT_TYPE;
+			return this;
+		}
+		
+		/// <summary>Shorthand for calling <see cref="Clear" />,
+		/// <see cref="SetTermBuffer(String, int, int)" />,
+		/// <see cref="StartOffset" />,
+		/// <see cref="EndOffset" />
+		/// <see cref="Type" /> on Token.DEFAULT_TYPE
+		/// </summary>
+		/// <returns> this Token instance 
+		/// </returns>
+		public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+		{
+			ClearNoTermBuffer();
+			SetTermBuffer(newTerm, newTermOffset, newTermLength);
+			startOffset = newStartOffset;
+			endOffset = newEndOffset;
+			type = DEFAULT_TYPE;
+			return this;
+		}
+		
+		/// <summary> Copy the prototype token's fields into this one. Note: Payloads are shared.</summary>
+		/// <param name="prototype">
+		/// </param>
+		public virtual void  Reinit(Token prototype)
+		{
+			prototype.InitTermBuffer();
+			SetTermBuffer(prototype.termBuffer, 0, prototype.termLength);
+			positionIncrement = prototype.positionIncrement;
+			flags = prototype.flags;
+			startOffset = prototype.startOffset;
+			endOffset = prototype.endOffset;
+			type = prototype.type;
+			payload = prototype.payload;
+		}
+		
+		/// <summary> Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.</summary>
+		/// <param name="prototype">
+		/// </param>
+		/// <param name="newTerm">
+		/// </param>
+		public virtual void  Reinit(Token prototype, System.String newTerm)
+		{
+			SetTermBuffer(newTerm);
+			positionIncrement = prototype.positionIncrement;
+			flags = prototype.flags;
+			startOffset = prototype.startOffset;
+			endOffset = prototype.endOffset;
+			type = prototype.type;
+			payload = prototype.payload;
+		}
+		
+		/// <summary> Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.</summary>
+		/// <param name="prototype">
+		/// </param>
+		/// <param name="newTermBuffer">
+		/// </param>
+		/// <param name="offset">
+		/// </param>
+		/// <param name="length">
+		/// </param>
+		public virtual void  Reinit(Token prototype, char[] newTermBuffer, int offset, int length)
+		{
+			SetTermBuffer(newTermBuffer, offset, length);
+			positionIncrement = prototype.positionIncrement;
+			flags = prototype.flags;
+			startOffset = prototype.startOffset;
+			endOffset = prototype.endOffset;
+			type = prototype.type;
+			payload = prototype.payload;
+		}
+		
+		public override void  CopyTo(Attribute target)
+		{
+			if (target is Token)
+			{
+				var to = (Token) target;
+				to.Reinit(this);
+				// reinit shares the payload, so clone it:
+				if (payload != null)
+				{
+					to.payload = (Payload) payload.Clone();
+				}
+			}
+			else
+			{
+				InitTermBuffer();
+				((ITermAttribute) target).SetTermBuffer(termBuffer, 0, termLength);
+				((IOffsetAttribute) target).SetOffset(startOffset, endOffset);
+				((IPositionIncrementAttribute) target).PositionIncrement = positionIncrement;
+				((IPayloadAttribute) target).Payload = (payload == null)?null:(Payload) payload.Clone();
+				((IFlagsAttribute) target).Flags = flags;
+				((ITypeAttribute) target).Type = type;
+			}
+		}
+       
+        ///<summary>
+        /// Convenience factory that returns <c>Token</c> as implementation for the basic
+        /// attributes and return the default impl (with &quot;Impl&quot; appended) for all other
+        /// attributes.
+        /// @since 3.0
+        /// </summary>
+	    public static AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
+	        new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
+  
+        /// <summary>
+        /// <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+        /// and for all other attributes calls the given delegate factory.
+        /// </summary>
+        public class TokenAttributeFactory : AttributeSource.AttributeFactory
+        {
+
+            private readonly AttributeSource.AttributeFactory _delegateFactory;
+
+            /// <summary>
+            /// <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+            /// and for all other attributes calls the given delegate factory.
+            /// </summary>
+            public TokenAttributeFactory(AttributeSource.AttributeFactory delegateFactory)
+            {
+                this._delegateFactory = delegateFactory;
+            }
+
+            public override Attribute CreateAttributeInstance<T>()
+            {
+                return typeof(T).IsAssignableFrom(typeof(Token))
+                           ? new Token()
+                           : _delegateFactory.CreateAttributeInstance<T>();
+            }
+
+            public override bool Equals(Object other)
+            {
+                if (this == other) return true;
+
+            	var af = other as TokenAttributeFactory;
+            	return af != null && _delegateFactory.Equals(af._delegateFactory);
+            }
+
+            public override int GetHashCode()
+            {
+                return _delegateFactory.GetHashCode() ^ 0x0a45aa31;
+            }
+        }
+    }
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/TokenFilter.cs b/src/core/Analysis/TokenFilter.cs
new file mode 100644
index 0000000..7483c82
--- /dev/null
+++ b/src/core/Analysis/TokenFilter.cs
@@ -0,0 +1,72 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> A TokenFilter is a TokenStream whose input is another TokenStream.
+	/// <p/>
+    /// This is an abstract class; subclasses must override <see cref="TokenStream.IncrementToken()" />.
+	/// 
+	/// </summary>
+	/// <seealso cref="TokenStream">
+	/// </seealso>
+	public abstract class TokenFilter:TokenStream
+	{
+		/// <summary>The source of tokens for this filter. </summary>
+		protected internal TokenStream input;
+
+	    private bool isDisposed;
+
+	    /// <summary>Construct a token stream filtering the given input. </summary>
+		protected internal TokenFilter(TokenStream input):base(input)
+		{
+			this.input = input;
+		}
+		
+		/// <summary>Performs end-of-stream operations, if any, and calls then <c>end()</c> on the
+		/// input TokenStream.<p/> 
+		/// <b>NOTE:</b> Be sure to call <c>super.end()</c> first when overriding this method.
+		/// </summary>
+		public override void  End()
+		{
+			input.End();
+		}
+
+        protected override void Dispose(bool disposing)
+        {
+            if (isDisposed) return;
+
+            if (disposing)
+            {
+                if (input != null)
+                {
+                    input.Close();
+                }
+            }
+
+            //input = null;
+            isDisposed = true;
+        }
+		
+		/// <summary>Reset the filter as well as the input TokenStream. </summary>
+		public override void  Reset()
+		{
+			input.Reset();
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/TokenStream.cs b/src/core/Analysis/TokenStream.cs
new file mode 100644
index 0000000..c624696
--- /dev/null
+++ b/src/core/Analysis/TokenStream.cs
@@ -0,0 +1,162 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> A <c>TokenStream</c> enumerates the sequence of tokens, either from
+	/// <see cref="Field" />s of a <see cref="Document" /> or from query text.
+	/// <p/>
+	/// This is an abstract class. Concrete subclasses are:
+	/// <list type="bullet">
+	/// <item><see cref="Tokenizer" />, a <c>TokenStream</c> whose input is a Reader; and</item>
+	/// <item><see cref="TokenFilter" />, a <c>TokenStream</c> whose input is another
+	/// <c>TokenStream</c>.</item>
+	/// </list>
+	/// A new <c>TokenStream</c> API has been introduced with Lucene 2.9. This API
+	/// has moved from being <see cref="Token" /> based to <see cref="IAttribute" /> based. While
+	/// <see cref="Token" /> still exists in 2.9 as a convenience class, the preferred way
+	/// to store the information of a <see cref="Token" /> is to use <see cref="Util.Attribute" />s.
+	/// <p/>
+	/// <c>TokenStream</c> now extends <see cref="AttributeSource" />, which provides
+	/// access to all of the token <see cref="IAttribute" />s for the <c>TokenStream</c>.
+	/// Note that only one instance per <see cref="Util.Attribute" /> is created and reused
+	/// for every token. This approach reduces object creation and allows local
+	/// caching of references to the <see cref="Util.Attribute" />s. See
+	/// <see cref="IncrementToken()" /> for further details.
+	/// <p/>
+	/// <b>The workflow of the new <c>TokenStream</c> API is as follows:</b>
+	/// <list type="bullet">
+	/// <item>Instantiation of <c>TokenStream</c>/<see cref="TokenFilter" />s which add/get
+	/// attributes to/from the <see cref="AttributeSource" />.</item>
+	/// <item>The consumer calls <see cref="TokenStream.Reset()" />.</item>
+	/// <item>The consumer retrieves attributes from the stream and stores local
+	/// references to all attributes it wants to access</item>
+	/// <item>The consumer calls <see cref="IncrementToken()" /> until it returns false and
+	/// consumes the attributes after each call.</item>
+	/// <item>The consumer calls <see cref="End()" /> so that any end-of-stream operations
+	/// can be performed.</item>
+	/// <item>The consumer calls <see cref="Close()" /> to release any resource when finished
+	/// using the <c>TokenStream</c></item>
+	/// </list>
+	/// To make sure that filters and consumers know which attributes are available,
+	/// the attributes must be added during instantiation. Filters and consumers are
+	/// not required to check for availability of attributes in
+	/// <see cref="IncrementToken()" />.
+	/// <p/>
+	/// You can find some example code for the new API in the analysis package level
+	/// Javadoc.
+	/// <p/>
+	/// Sometimes it is desirable to capture a current state of a <c>TokenStream</c>
+	/// , e. g. for buffering purposes (see <see cref="CachingTokenFilter" />,
+	/// <see cref="TeeSinkTokenFilter" />). For this usecase
+	/// <see cref="AttributeSource.CaptureState" /> and <see cref="AttributeSource.RestoreState" />
+	/// can be used.
+	/// </summary>
+	public abstract class TokenStream : AttributeSource, IDisposable
+	{
+		/// <summary> A TokenStream using the default attribute factory.</summary>
+		protected internal TokenStream()
+		{ }
+		
+		/// <summary> A TokenStream that uses the same attributes as the supplied one.</summary>
+        protected internal TokenStream(AttributeSource input)
+            : base(input)
+		{ }
+		
+		/// <summary> A TokenStream using the supplied AttributeFactory for creating new <see cref="IAttribute" /> instances.</summary>
+        protected internal TokenStream(AttributeFactory factory)
+            : base(factory)
+		{ }
+
+	    /// <summary> Consumers (i.e., <see cref="IndexWriter" />) use this method to advance the stream to
+	    /// the next token. Implementing classes must implement this method and update
+	    /// the appropriate <see cref="Util.Attribute" />s with the attributes of the next
+	    /// token.
+	    /// 
+	    /// The producer must make no assumptions about the attributes after the
+	    /// method has been returned: the caller may arbitrarily change it. If the
+	    /// producer needs to preserve the state for subsequent calls, it can use
+	    /// <see cref="AttributeSource.CaptureState" /> to create a copy of the current attribute state.
+	    /// 
+	    /// This method is called for every token of a document, so an efficient
+	    /// implementation is crucial for good performance. To avoid calls to
+	    /// <see cref="AttributeSource.AddAttribute{T}()" /> and <see cref="AttributeSource.GetAttribute{T}()" />,
+	    /// references to all <see cref="Util.Attribute" />s that this stream uses should be
+	    /// retrieved during instantiation.
+	    /// 
+	    /// To ensure that filters and consumers know which attributes are available,
+	    /// the attributes must be added during instantiation. Filters and consumers
+	    /// are not required to check for availability of attributes in
+	    /// <see cref="IncrementToken()" />.
+	    /// 
+	    /// </summary>
+	    /// <returns> false for end of stream; true otherwise</returns>
+	    public abstract bool IncrementToken();
+		
+		/// <summary> This method is called by the consumer after the last token has been
+		/// consumed, after <see cref="IncrementToken" /> returned <c>false</c>
+		/// (using the new <c>TokenStream</c> API). Streams implementing the old API
+		/// should upgrade to use this feature.
+		/// <p/>
+		/// This method can be used to perform any end-of-stream operations, such as
+		/// setting the final offset of a stream. The final offset of a stream might
+		/// differ from the offset of the last token eg in case one or more whitespaces
+		/// followed after the last token, but a <see cref="WhitespaceTokenizer" /> was used.
+		/// 
+		/// </summary>
+		/// <throws>  IOException </throws>
+		public virtual void  End()
+		{
+			// do nothing by default
+		}
+		
+		/// <summary> Resets this stream to the beginning. This is an optional operation, so
+		/// subclasses may or may not implement this method. <see cref="Reset()" /> is not needed for
+		/// the standard indexing process. However, if the tokens of a
+		/// <c>TokenStream</c> are intended to be consumed more than once, it is
+		/// necessary to implement <see cref="Reset()" />. Note that if your TokenStream
+		/// caches tokens and feeds them back again after a reset, it is imperative
+		/// that you clone the tokens when you store them away (on the first pass) as
+		/// well as when you return them (on future passes after <see cref="Reset()" />).
+		/// </summary>
+		public virtual void  Reset()
+		{
+		}
+		
+		/// <summary>Releases resources associated with this stream. </summary>
+		[Obsolete("Use Dispose() instead")]
+		public void  Close()
+		{
+            Dispose();
+		}
+
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+
+	    protected abstract void Dispose(bool disposing);
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/FlagsAttribute.cs b/src/core/Analysis/Tokenattributes/FlagsAttribute.cs
new file mode 100644
index 0000000..b5c4b7b
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/FlagsAttribute.cs
@@ -0,0 +1,85 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> This attribute can be used to pass different flags down the tokenizer chain,
+	/// eg from one TokenFilter to another one. 
+	/// </summary>
+	[Serializable]
+	public class FlagsAttribute:Util.Attribute, IFlagsAttribute, System.ICloneable
+	{
+		private int flags = 0;
+
+	    /// <summary> EXPERIMENTAL:  While we think this is here to stay, we may want to change it to be a long.
+	    /// <p/>
+	    /// 
+	    /// Get the bitset for any bits that have been set.  This is completely distinct from <see cref="ITypeAttribute.Type()" />, although they do share similar purposes.
+	    /// The flags can be used to encode information about the token for use by other <see cref="Lucene.Net.Analysis.TokenFilter" />s.
+	    /// 
+	    /// 
+	    /// </summary>
+	    /// <value> The bits </value>
+	    public virtual int Flags
+	    {
+	        get { return flags; }
+	        set { this.flags = value; }
+	    }
+
+	    public override void  Clear()
+		{
+			flags = 0;
+		}
+		
+		public  override bool Equals(System.Object other)
+		{
+			if (this == other)
+			{
+				return true;
+			}
+			
+			if (other is FlagsAttribute)
+			{
+				return ((FlagsAttribute) other).flags == flags;
+			}
+			
+			return false;
+		}
+		
+		public override int GetHashCode()
+		{
+			return flags;
+		}
+		
+		public override void  CopyTo(Attribute target)
+		{
+			IFlagsAttribute t = (IFlagsAttribute) target;
+			t.Flags = flags;
+		}
+		
+		override public System.Object Clone()
+		{
+            FlagsAttribute impl = new FlagsAttribute();
+            impl.flags = this.flags;
+            return impl;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs b/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs
new file mode 100644
index 0000000..24b2bea
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs
@@ -0,0 +1,41 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> This attribute can be used to pass different flags down the <see cref="Tokenizer" /> chain,
+	/// eg from one TokenFilter to another one. 
+	/// </summary>
+	public interface IFlagsAttribute:IAttribute
+	{
+	    /// <summary> EXPERIMENTAL:  While we think this is here to stay, we may want to change it to be a long.
+	    /// <p/>
+	    /// 
+	    /// Get the bitset for any bits that have been set.  This is completely distinct from <see cref="ITypeAttribute.Type()" />, although they do share similar purposes.
+	    /// The flags can be used to encode information about the token for use by other <see cref="Lucene.Net.Analysis.TokenFilter" />s.
+	    /// 
+	    /// 
+	    /// </summary>
+	    /// <value> The bits </value>
+	    int Flags { get; set; }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs b/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs
new file mode 100644
index 0000000..ffbbe02
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs
@@ -0,0 +1,48 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> The start and end character offset of a Token. </summary>
+	public interface IOffsetAttribute : IAttribute
+	{
+	    /// <summary>Returns this Token's starting offset, the position of the first character
+	    /// corresponding to this token in the source text.
+	    /// Note that the difference between endOffset() and startOffset() may not be
+	    /// equal to termText.length(), as the term text may have been altered by a
+	    /// stemmer or some other filter. 
+	    /// </summary>
+	    int StartOffset { get; }
+
+
+	    /// <summary>Set the starting and ending offset.
+        /// See StartOffset() and EndOffset()
+        /// </summary>
+		void  SetOffset(int startOffset, int endOffset);
+
+
+	    /// <summary>Returns this Token's ending offset, one greater than the position of the
+	    /// last character corresponding to this token in the source text. The length
+	    /// of the token in the source text is (endOffset - startOffset). 
+	    /// </summary>
+	    int EndOffset { get; }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs b/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs
new file mode 100644
index 0000000..7e313ce
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs
@@ -0,0 +1,31 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Payload = Lucene.Net.Index.Payload;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> The payload of a Token. See also <see cref="Payload" />.</summary>
+	public interface IPayloadAttribute:IAttribute
+	{
+	    /// <summary> Returns this Token's payload.</summary>
+	    Payload Payload { get; set; }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs b/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs
new file mode 100644
index 0000000..6c2a131
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs
@@ -0,0 +1,59 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary>The positionIncrement determines the position of this token
+	/// relative to the previous Token in a TokenStream, used in phrase
+	/// searching.
+	/// 
+	/// <p/>The default value is one.
+	/// 
+	/// <p/>Some common uses for this are:<list>
+	/// 
+	/// <item>Set it to zero to put multiple terms in the same position.  This is
+	/// useful if, e.g., a word has multiple stems.  Searches for phrases
+	/// including either stem will match.  In this case, all but the first stem's
+	/// increment should be set to zero: the increment of the first instance
+	/// should be one.  Repeating a token with an increment of zero can also be
+	/// used to boost the scores of matches on that token.</item>
+	/// 
+	/// <item>Set it to values greater than one to inhibit exact phrase matches.
+	/// If, for example, one does not want phrases to match across removed stop
+	/// words, then one could build a stop word filter that removes stop words and
+	/// also sets the increment to the number of stop words removed before each
+	/// non-stop word.  Then exact phrase queries will only match when the terms
+	/// occur with no intervening stop words.</item>
+	/// 
+	/// </list>
+	/// 
+	/// </summary>
+	/// <seealso cref="Lucene.Net.Index.TermPositions">
+	/// </seealso>
+	public interface IPositionIncrementAttribute:IAttribute
+	{
+	    /// <summary>Gets or sets the position increment. The default value is one.
+	    /// 
+	    /// </summary>
+	    /// <value> the distance from the prior term </value>
+	    int PositionIncrement { set; get; }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/ITermAttribute.cs b/src/core/Analysis/Tokenattributes/ITermAttribute.cs
new file mode 100644
index 0000000..8f9b030
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/ITermAttribute.cs
@@ -0,0 +1,104 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> The term text of a Token.</summary>
+	public interface ITermAttribute:IAttribute
+	{
+	    /// <summary>Returns the Token's term text.
+	    /// 
+	    /// This method has a performance penalty
+	    /// because the text is stored internally in a char[].  If
+	    /// possible, use <see cref="TermBuffer()" /> and <see cref="TermLength()" />
+	    /// directly instead.  If you really need a
+	    /// String, use this method, which is nothing more than
+	    /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+	    /// </summary>
+	    string Term { get; }
+
+	    /// <summary>Copies the contents of buffer, starting at offset for
+		/// length characters, into the termBuffer array.
+		/// </summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		/// <param name="offset">the index in the buffer of the first character to copy
+		/// </param>
+		/// <param name="length">the number of characters to copy
+		/// </param>
+		void  SetTermBuffer(char[] buffer, int offset, int length);
+		
+		/// <summary>Copies the contents of buffer into the termBuffer array.</summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		void  SetTermBuffer(System.String buffer);
+		
+		/// <summary>Copies the contents of buffer, starting at offset and continuing
+		/// for length characters, into the termBuffer array.
+		/// </summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		/// <param name="offset">the index in the buffer of the first character to copy
+		/// </param>
+		/// <param name="length">the number of characters to copy
+		/// </param>
+		void  SetTermBuffer(System.String buffer, int offset, int length);
+		
+		/// <summary>Returns the internal termBuffer character array which
+		/// you can then directly alter.  If the array is too
+		/// small for your token, use <see cref="ResizeTermBuffer(int)" />
+		/// to increase it.  After
+		/// altering the buffer be sure to call <see cref="SetTermLength" />
+		/// to record the number of valid
+		/// characters that were placed into the termBuffer. 
+		/// </summary>
+		char[] TermBuffer();
+		
+		/// <summary>Grows the termBuffer to at least size newSize, preserving the
+		/// existing content. Note: If the next operation is to change
+		/// the contents of the term buffer use
+		/// <see cref="SetTermBuffer(char[], int, int)" />,
+		/// <see cref="SetTermBuffer(String)" />, or
+		/// <see cref="SetTermBuffer(String, int, int)" />
+		/// to optimally combine the resize with the setting of the termBuffer.
+		/// </summary>
+		/// <param name="newSize">minimum size of the new termBuffer
+		/// </param>
+		/// <returns> newly created termBuffer with length >= newSize
+		/// </returns>
+		char[] ResizeTermBuffer(int newSize);
+		
+		/// <summary>Return number of valid characters (length of the term)
+		/// in the termBuffer array. 
+		/// </summary>
+		int TermLength();
+		
+		/// <summary>Set number of valid characters (length of the term) in
+		/// the termBuffer array. Use this to truncate the termBuffer
+		/// or to synchronize with external manipulation of the termBuffer.
+		/// Note: to grow the size of the array,
+		/// use <see cref="ResizeTermBuffer(int)" /> first.
+		/// </summary>
+		/// <param name="length">the truncated length
+		/// </param>
+		void  SetTermLength(int length);
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/ITypeAttribute.cs b/src/core/Analysis/Tokenattributes/ITypeAttribute.cs
new file mode 100644
index 0000000..48bcc10
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/ITypeAttribute.cs
@@ -0,0 +1,30 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> A Token's lexical type. The Default value is "word". </summary>
+	public interface ITypeAttribute:IAttribute
+	{
+	    /// <summary>Gets or sets this Token's lexical type.  Defaults to "word". </summary>
+	    string Type { get; set; }
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/OffsetAttribute.cs b/src/core/Analysis/Tokenattributes/OffsetAttribute.cs
new file mode 100644
index 0000000..5149559
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/OffsetAttribute.cs
@@ -0,0 +1,106 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> The start and end character offset of a Token. </summary>
+	[Serializable]
+	public class OffsetAttribute:Attribute, IOffsetAttribute, System.ICloneable
+	{
+		private int startOffset;
+		private int endOffset;
+
+	    /// <summary>Returns this Token's starting offset, the position of the first character
+	    /// corresponding to this token in the source text.
+	    /// Note that the difference between endOffset() and startOffset() may not be
+	    /// equal to termText.length(), as the term text may have been altered by a
+	    /// stemmer or some other filter. 
+	    /// </summary>
+	    public virtual int StartOffset
+	    {
+	        get { return startOffset; }
+	    }
+
+
+	    /// <summary>Set the starting and ending offset.
+        /// See StartOffset() and EndOffset()
+        /// </summary>
+		public virtual void  SetOffset(int startOffset, int endOffset)
+		{
+			this.startOffset = startOffset;
+			this.endOffset = endOffset;
+		}
+
+
+	    /// <summary>Returns this Token's ending offset, one greater than the position of the
+	    /// last character corresponding to this token in the source text. The length
+	    /// of the token in the source text is (endOffset - startOffset). 
+	    /// </summary>
+	    public virtual int EndOffset
+	    {
+	        get { return endOffset; }
+	    }
+
+
+	    public override void  Clear()
+		{
+			startOffset = 0;
+			endOffset = 0;
+		}
+		
+		public  override bool Equals(System.Object other)
+		{
+			if (other == this)
+			{
+				return true;
+			}
+			
+			if (other is OffsetAttribute)
+			{
+				OffsetAttribute o = (OffsetAttribute) other;
+				return o.startOffset == startOffset && o.endOffset == endOffset;
+			}
+			
+			return false;
+		}
+		
+		public override int GetHashCode()
+		{
+			int code = startOffset;
+			code = code * 31 + endOffset;
+			return code;
+		}
+		
+		public override void  CopyTo(Attribute target)
+		{
+			IOffsetAttribute t = (IOffsetAttribute) target;
+			t.SetOffset(startOffset, endOffset);
+		}
+		
+		override public System.Object Clone()
+		{
+            OffsetAttribute impl = new OffsetAttribute();
+            impl.endOffset = endOffset;
+            impl.startOffset = startOffset;
+            return impl;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/PayloadAttribute.cs b/src/core/Analysis/Tokenattributes/PayloadAttribute.cs
new file mode 100644
index 0000000..ae1c4d9
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/PayloadAttribute.cs
@@ -0,0 +1,100 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+using Payload = Lucene.Net.Index.Payload;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> The payload of a Token. See also <see cref="Payload" />.</summary>
+	[Serializable]
+	public class PayloadAttribute:Attribute, IPayloadAttribute, System.ICloneable
+	{
+		private Payload payload;
+		
+		/// <summary> Initialize this attribute with no payload.</summary>
+		public PayloadAttribute()
+		{
+		}
+		
+		/// <summary> Initialize this attribute with the given payload. </summary>
+		public PayloadAttribute(Payload payload)
+		{
+			this.payload = payload;
+		}
+
+	    /// <summary> Returns this Token's payload.</summary>
+	    public virtual Payload Payload
+	    {
+	        get { return this.payload; }
+	        set { this.payload = value; }
+	    }
+
+	    public override void  Clear()
+		{
+			payload = null;
+		}
+		
+		public override System.Object Clone()
+		{
+		    var clone = (PayloadAttribute) base.Clone();
+            if (payload != null)
+            {
+                clone.payload = (Payload) payload.Clone();
+            }
+		    return clone;
+            // TODO: This code use to be as below.  Any reason why?  the if(payload!=null) was missing...
+		    //PayloadAttributeImpl impl = new PayloadAttributeImpl();
+		    //impl.payload = new Payload(this.payload.data, this.payload.offset, this.payload.length);
+		    //return impl;
+		}
+		
+		public  override bool Equals(System.Object other)
+		{
+			if (other == this)
+			{
+				return true;
+			}
+			
+			if (other is IPayloadAttribute)
+			{
+				PayloadAttribute o = (PayloadAttribute) other;
+				if (o.payload == null || payload == null)
+				{
+					return o.payload == null && payload == null;
+				}
+				
+				return o.payload.Equals(payload);
+			}
+			
+			return false;
+		}
+		
+		public override int GetHashCode()
+		{
+			return (payload == null)?0:payload.GetHashCode();
+		}
+		
+		public override void  CopyTo(Attribute target)
+		{
+			IPayloadAttribute t = (IPayloadAttribute) target;
+			t.Payload = (payload == null)?null:(Payload) payload.Clone();
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs b/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs
new file mode 100644
index 0000000..4f7a04f
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs
@@ -0,0 +1,107 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary>The positionIncrement determines the position of this token
+	/// relative to the previous Token in a <see cref="TokenStream" />, used in phrase
+	/// searching.
+	/// 
+	/// <p/>The default value is one.
+	/// 
+	/// <p/>Some common uses for this are:<list>
+	/// 
+	/// <item>Set it to zero to put multiple terms in the same position.  This is
+	/// useful if, e.g., a word has multiple stems.  Searches for phrases
+	/// including either stem will match.  In this case, all but the first stem's
+	/// increment should be set to zero: the increment of the first instance
+	/// should be one.  Repeating a token with an increment of zero can also be
+	/// used to boost the scores of matches on that token.</item>
+	/// 
+	/// <item>Set it to values greater than one to inhibit exact phrase matches.
+	/// If, for example, one does not want phrases to match across removed stop
+	/// words, then one could build a stop word filter that removes stop words and
+	/// also sets the increment to the number of stop words removed before each
+	/// non-stop word.  Then exact phrase queries will only match when the terms
+	/// occur with no intervening stop words.</item>
+	/// 
+	/// </list>
+	/// </summary>
+	[Serializable]
+	public class PositionIncrementAttribute:Attribute, IPositionIncrementAttribute, System.ICloneable
+	{
+		private int positionIncrement = 1;
+
+	    /// <summary>Set the position increment. The default value is one.
+	    /// 
+	    /// </summary>
+	    /// <value> the distance from the prior term </value>
+	    public virtual int PositionIncrement
+	    {
+	        set
+	        {
+	            if (value < 0)
+	                throw new System.ArgumentException("Increment must be zero or greater: " + value);
+	            this.positionIncrement = value;
+	        }
+	        get { return positionIncrement; }
+	    }
+
+	    public override void  Clear()
+		{
+			this.positionIncrement = 1;
+		}
+		
+		public  override bool Equals(System.Object other)
+		{
+			if (other == this)
+			{
+				return true;
+			}
+			
+			if (other is PositionIncrementAttribute)
+			{
+				return positionIncrement == ((PositionIncrementAttribute) other).positionIncrement;
+			}
+			
+			return false;
+		}
+		
+		public override int GetHashCode()
+		{
+			return positionIncrement;
+		}
+		
+		public override void  CopyTo(Attribute target)
+		{
+			IPositionIncrementAttribute t = (IPositionIncrementAttribute) target;
+			t.PositionIncrement = positionIncrement;
+		}
+		
+		override public System.Object Clone()
+		{
+            PositionIncrementAttribute impl = new PositionIncrementAttribute();
+            impl.positionIncrement = positionIncrement;
+            return impl;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/TermAttribute.cs b/src/core/Analysis/Tokenattributes/TermAttribute.cs
new file mode 100644
index 0000000..f95402c
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/TermAttribute.cs
@@ -0,0 +1,268 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> The term text of a Token.</summary>
+	[Serializable]
+	public class TermAttribute:Attribute, ITermAttribute, System.ICloneable
+	{
+		private static int MIN_BUFFER_SIZE = 10;
+		
+		private char[] termBuffer;
+		private int termLength;
+
+	    /// <summary>Returns the Token's term text.
+	    /// 
+	    /// This method has a performance penalty
+	    /// because the text is stored internally in a char[].  If
+	    /// possible, use <see cref="TermBuffer()" /> and 
+	    /// <see cref="TermLength()" /> directly instead.  If you 
+	    /// really need a String, use this method, which is nothing more than
+	    /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+	    /// </summary>
+	    public virtual string Term
+	    {
+	        get
+	        {
+	            InitTermBuffer();
+	            return new System.String(termBuffer, 0, termLength);
+	        }
+	    }
+
+	    /// <summary>Copies the contents of buffer, starting at offset for
+		/// length characters, into the termBuffer array.
+		/// </summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		/// <param name="offset">the index in the buffer of the first character to copy
+		/// </param>
+		/// <param name="length">the number of characters to copy
+		/// </param>
+		public virtual void  SetTermBuffer(char[] buffer, int offset, int length)
+		{
+			GrowTermBuffer(length);
+			Array.Copy(buffer, offset, termBuffer, 0, length);
+			termLength = length;
+		}
+		
+		/// <summary>Copies the contents of buffer into the termBuffer array.</summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		public virtual void  SetTermBuffer(System.String buffer)
+		{
+			int length = buffer.Length;
+			GrowTermBuffer(length);
+			TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
+			termLength = length;
+		}
+		
+		/// <summary>Copies the contents of buffer, starting at offset and continuing
+		/// for length characters, into the termBuffer array.
+		/// </summary>
+		/// <param name="buffer">the buffer to copy
+		/// </param>
+		/// <param name="offset">the index in the buffer of the first character to copy
+		/// </param>
+		/// <param name="length">the number of characters to copy
+		/// </param>
+		public virtual void  SetTermBuffer(System.String buffer, int offset, int length)
+		{
+			System.Diagnostics.Debug.Assert(offset <= buffer.Length);
+			System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
+			GrowTermBuffer(length);
+			TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
+			termLength = length;
+		}
+		
+		/// <summary>Returns the internal termBuffer character array which
+		/// you can then directly alter.  If the array is too
+		/// small for your token, use <see cref="ResizeTermBuffer(int)" />
+		/// to increase it.  After
+		/// altering the buffer be sure to call <see cref="SetTermLength" />
+		/// to record the number of valid
+		/// characters that were placed into the termBuffer. 
+		/// </summary>
+		public virtual char[] TermBuffer()
+		{
+			InitTermBuffer();
+			return termBuffer;
+		}
+		
+		/// <summary>Grows the termBuffer to at least size newSize, preserving the
+		/// existing content. Note: If the next operation is to change
+		/// the contents of the term buffer use
+		/// <see cref="SetTermBuffer(char[], int, int)" />,
+		/// <see cref="SetTermBuffer(String)" />, or
+		/// <see cref="SetTermBuffer(String, int, int)" />
+		/// to optimally combine the resize with the setting of the termBuffer.
+		/// </summary>
+		/// <param name="newSize">minimum size of the new termBuffer
+		/// </param>
+		/// <returns> newly created termBuffer with length >= newSize
+		/// </returns>
+		public virtual char[] ResizeTermBuffer(int newSize)
+		{
+			if (termBuffer == null)
+			{
+				// The buffer is always at least MIN_BUFFER_SIZE
+				termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+			}
+			else
+			{
+				if (termBuffer.Length < newSize)
+				{
+					// Not big enough; create a new array with slight
+					// over allocation and preserve content
+					char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+					Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
+					termBuffer = newCharBuffer;
+				}
+			}
+			return termBuffer;
+		}
+		
+		
+		/// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content.
+		/// its always used in places that set the content 
+		/// </summary>
+		/// <param name="newSize">minimum size of the buffer
+		/// </param>
+		private void  GrowTermBuffer(int newSize)
+		{
+			if (termBuffer == null)
+			{
+				// The buffer is always at least MIN_BUFFER_SIZE
+				termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+			}
+			else
+			{
+				if (termBuffer.Length < newSize)
+				{
+					// Not big enough; create a new array with slight
+					// over allocation:
+					termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+				}
+			}
+		}
+		
+		private void  InitTermBuffer()
+		{
+			if (termBuffer == null)
+			{
+				termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
+				termLength = 0;
+			}
+		}
+		
+		/// <summary>Return number of valid characters (length of the term)
+		/// in the termBuffer array. 
+		/// </summary>
+		public virtual int TermLength()
+		{
+			return termLength;
+		}
+		
+		/// <summary>Set number of valid characters (length of the term) in
+		/// the termBuffer array. Use this to truncate the termBuffer
+		/// or to synchronize with external manipulation of the termBuffer.
+		/// Note: to grow the size of the array,
+		/// use <see cref="ResizeTermBuffer(int)" /> first.
+		/// </summary>
+		/// <param name="length">the truncated length
+		/// </param>
+		public virtual void  SetTermLength(int length)
+		{
+			InitTermBuffer();
+			if (length > termBuffer.Length)
+				throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
+			termLength = length;
+		}
+		
+		public override int GetHashCode()
+		{
+			InitTermBuffer();
+			int code = termLength;
+			code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
+			return code;
+		}
+		
+		public override void  Clear()
+		{
+			termLength = 0;
+		}
+		
+		public override System.Object Clone()
+		{
+			TermAttribute t = (TermAttribute) base.Clone();
+			// Do a deep clone
+			if (termBuffer != null)
+			{
+				t.termBuffer = new char[termBuffer.Length];
+				termBuffer.CopyTo(t.termBuffer, 0);
+			}
+			return t;
+		}
+		
+		public  override bool Equals(System.Object other)
+		{
+			if (other == this)
+			{
+				return true;
+			}
+			
+			if (other is ITermAttribute)
+			{
+				InitTermBuffer();
+				TermAttribute o = ((TermAttribute) other);
+				o.InitTermBuffer();
+				
+				if (termLength != o.termLength)
+					return false;
+				for (int i = 0; i < termLength; i++)
+				{
+					if (termBuffer[i] != o.termBuffer[i])
+					{
+						return false;
+					}
+				}
+				return true;
+			}
+			
+			return false;
+		}
+		
+		public override System.String ToString()
+		{
+			InitTermBuffer();
+			return "term=" + new System.String(termBuffer, 0, termLength);
+		}
+		
+		public override void  CopyTo(Attribute target)
+		{
+			InitTermBuffer();
+			ITermAttribute t = (ITermAttribute) target;
+			t.SetTermBuffer(termBuffer, 0, termLength);
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/TypeAttribute.cs b/src/core/Analysis/Tokenattributes/TypeAttribute.cs
new file mode 100644
index 0000000..1da1c50
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/TypeAttribute.cs
@@ -0,0 +1,85 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+	
+	/// <summary> A Token's lexical type. The Default value is "word". </summary>
+	[Serializable]
+	public class TypeAttribute:Attribute, ITypeAttribute, System.ICloneable
+	{
+		private System.String type;
+		public const System.String DEFAULT_TYPE = "word";
+		
+		public TypeAttribute():this(DEFAULT_TYPE)
+		{
+		}
+		
+		public TypeAttribute(System.String type)
+		{
+			this.type = type;
+		}
+
+	    /// <summary>Returns this Token's lexical type.  Defaults to "word". </summary>
+	    public virtual string Type
+	    {
+	        get { return type; }
+	        set { this.type = value; }
+	    }
+
+	    public override void  Clear()
+		{
+			type = DEFAULT_TYPE;
+		}
+		
+		public  override bool Equals(System.Object other)
+		{
+			if (other == this)
+			{
+				return true;
+			}
+			
+			if (other is TypeAttribute)
+			{
+				return type.Equals(((TypeAttribute) other).type);
+			}
+			
+			return false;
+		}
+		
+		public override int GetHashCode()
+		{
+			return type.GetHashCode();
+		}
+		
+		public override void  CopyTo(Attribute target)
+		{
+			ITypeAttribute t = (ITypeAttribute) target;
+			t.Type = type;
+		}
+		
+		override public System.Object Clone()
+		{
+            TypeAttribute impl = new TypeAttribute();
+            impl.type = type;
+            return impl;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/Tokenizer.cs b/src/core/Analysis/Tokenizer.cs
new file mode 100644
index 0000000..5ab741e
--- /dev/null
+++ b/src/core/Analysis/Tokenizer.cs
@@ -0,0 +1,112 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> A Tokenizer is a TokenStream whose input is a Reader.
+	/// <p/>
+	/// This is an abstract class; subclasses must override <see cref="TokenStream.IncrementToken()" />
+	/// <p/>
+    /// NOTE: Subclasses overriding <see cref="TokenStream.IncrementToken()" /> must call
+	/// <see cref="AttributeSource.ClearAttributes()" /> before setting attributes.
+	/// </summary>
+	
+	public abstract class Tokenizer:TokenStream
+	{
+		/// <summary>The text source for this Tokenizer. </summary>
+		protected internal System.IO.TextReader input;
+
+	    private bool isDisposed;
+		
+		/// <summary>Construct a tokenizer with null input. </summary>
+		protected internal Tokenizer()
+		{
+		}
+		
+		/// <summary>Construct a token stream processing the given input. </summary>
+		protected internal Tokenizer(System.IO.TextReader input)
+		{
+			this.input = CharReader.Get(input);
+		}
+		
+		/// <summary>Construct a tokenizer with null input using the given AttributeFactory. </summary>
+		protected internal Tokenizer(AttributeFactory factory):base(factory)
+		{
+		}
+		
+		/// <summary>Construct a token stream processing the given input using the given AttributeFactory. </summary>
+		protected internal Tokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory)
+		{
+			this.input = CharReader.Get(input);
+		}
+		
+		/// <summary>Construct a token stream processing the given input using the given AttributeSource. </summary>
+		protected internal Tokenizer(AttributeSource source):base(source)
+		{
+		}
+		
+		/// <summary>Construct a token stream processing the given input using the given AttributeSource. </summary>
+		protected internal Tokenizer(AttributeSource source, System.IO.TextReader input):base(source)
+		{
+			this.input = CharReader.Get(input);
+		}
+		
+        protected override void Dispose(bool disposing)
+        {
+            if (isDisposed) return;
+
+            if (disposing)
+            {
+                if (input != null)
+                {
+                    input.Close();
+                }
+            }
+
+            // LUCENE-2387: don't hold onto Reader after close, so
+            // GC can reclaim
+            input = null;
+            isDisposed = true;
+        }
+  
+		/// <summary>Return the corrected offset. If <see cref="input" /> is a <see cref="CharStream" /> subclass
+		/// this method calls <see cref="CharStream.CorrectOffset" />, else returns <c>currentOff</c>.
+		/// </summary>
+		/// <param name="currentOff">offset as seen in the output
+		/// </param>
+		/// <returns> corrected offset based on the input
+		/// </returns>
+		/// <seealso cref="CharStream.CorrectOffset">
+		/// </seealso>
+		protected internal int CorrectOffset(int currentOff)
+		{
+			return (input is CharStream)?((CharStream) input).CorrectOffset(currentOff):currentOff;
+		}
+		
+		/// <summary>Expert: Reset the tokenizer to a new reader.  Typically, an
+		/// analyzer (in its reusableTokenStream method) will use
+		/// this to re-use a previously created tokenizer. 
+		/// </summary>
+		public virtual void  Reset(System.IO.TextReader input)
+		{
+			this.input = input;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/WhitespaceAnalyzer.cs b/src/core/Analysis/WhitespaceAnalyzer.cs
new file mode 100644
index 0000000..77dbaa3
--- /dev/null
+++ b/src/core/Analysis/WhitespaceAnalyzer.cs
@@ -0,0 +1,43 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>An Analyzer that uses <see cref="WhitespaceTokenizer" />. </summary>
+	
+	public sealed class WhitespaceAnalyzer:Analyzer
+	{
+		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			return new WhitespaceTokenizer(reader);
+		}
+		
+		public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			var tokenizer = (Tokenizer) PreviousTokenStream;
+			if (tokenizer == null)
+			{
+				tokenizer = new WhitespaceTokenizer(reader);
+				PreviousTokenStream = tokenizer;
+			}
+			else
+				tokenizer.Reset(reader);
+			return tokenizer;
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/WhitespaceTokenizer.cs b/src/core/Analysis/WhitespaceTokenizer.cs
new file mode 100644
index 0000000..c96ad50
--- /dev/null
+++ b/src/core/Analysis/WhitespaceTokenizer.cs
@@ -0,0 +1,55 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary>A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
+	/// Adjacent sequences of non-Whitespace characters form tokens. 
+	/// </summary>
+	
+	public class WhitespaceTokenizer:CharTokenizer
+	{
+		/// <summary>Construct a new WhitespaceTokenizer. </summary>
+		public WhitespaceTokenizer(System.IO.TextReader @in)
+			: base(@in)
+		{
+		}
+		
+		/// <summary>Construct a new WhitespaceTokenizer using a given <see cref="AttributeSource" />. </summary>
+		public WhitespaceTokenizer(AttributeSource source, System.IO.TextReader @in)
+			: base(source, @in)
+		{
+		}
+		
+		/// <summary>Construct a new WhitespaceTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+		public WhitespaceTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+			: base(factory, @in)
+		{
+		}
+		
+		/// <summary>Collects only characters which do not satisfy
+        /// <see cref="char.IsWhiteSpace(char)" />.
+		/// </summary>
+		protected internal override bool IsTokenChar(char c)
+		{
+			return !System.Char.IsWhiteSpace(c);
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/core/Analysis/WordlistLoader.cs b/src/core/Analysis/WordlistLoader.cs
new file mode 100644
index 0000000..bfd1b07
--- /dev/null
+++ b/src/core/Analysis/WordlistLoader.cs
@@ -0,0 +1,146 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Loader for text files that represent a list of stopwords.</summary>
+	public class WordlistLoader
+	{
+		
+		/// <summary> Loads a text file and adds every line as an entry to a HashSet (omitting
+		/// leading and trailing whitespace). Every line of the file should contain only
+		/// one word. The words need to be in lowercase if you make use of an
+		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+		/// </summary>
+		/// <param name="wordfile">File containing the wordlist</param>
+		/// <returns> A HashSet with the file's words</returns>
+		public static ISet<string> GetWordSet(System.IO.FileInfo wordfile)
+		{
+            using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
+            {
+                return GetWordSet(reader);
+            }
+		}
+		
+		/// <summary> Loads a text file and adds every non-comment line as an entry to a HashSet (omitting
+		/// leading and trailing whitespace). Every line of the file should contain only
+		/// one word. The words need to be in lowercase if you make use of an
+		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+		/// </summary>
+		/// <param name="wordfile">File containing the wordlist</param>
+		/// <param name="comment">The comment string to ignore</param>
+		/// <returns> A HashSet with the file's words</returns>
+		public static ISet<string> GetWordSet(System.IO.FileInfo wordfile, System.String comment)
+		{
+            using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
+            {
+                return GetWordSet(reader, comment);
+            }
+		}
+		
+		
+		/// <summary> Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+		/// leading and trailing whitespace). Every line of the Reader should contain only
+		/// one word. The words need to be in lowercase if you make use of an
+		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+		/// </summary>
+		/// <param name="reader">Reader containing the wordlist</param>
+		/// <returns>A HashSet with the reader's words</returns>
+		public static ISet<string> GetWordSet(System.IO.TextReader reader)
+		{
+            var result = Support.Compatibility.SetFactory.CreateHashSet<string>();
+
+			System.String word;
+			while ((word = reader.ReadLine()) != null)
+			{
+				result.Add(word.Trim());
+			}
+
+			return result;
+		}
+
+		/// <summary> Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting
+		/// leading and trailing whitespace). Every line of the Reader should contain only
+		/// one word. The words need to be in lowercase if you make use of an
+		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+		/// 
+		/// </summary>
+		/// <param name="reader">Reader containing the wordlist
+		/// </param>
+		/// <param name="comment">The string representing a comment.
+		/// </param>
+		/// <returns> A HashSet with the reader's words
+		/// </returns>
+		public static ISet<string> GetWordSet(System.IO.TextReader reader, System.String comment)
+		{
+            var result = Support.Compatibility.SetFactory.CreateHashSet<string>();
+
+            System.String word = null;
+			while ((word = reader.ReadLine()) != null)
+			{
+				if (word.StartsWith(comment) == false)
+				{
+					result.Add(word.Trim());
+				}
+			}
+
+			return result;
+		}
+
+
+
+		/// <summary> Reads a stem dictionary. Each line contains:
+		/// <c>word<b>\t</b>stem</c>
+		/// (i.e. two tab seperated words)
+		/// 
+		/// </summary>
+		/// <returns> stem dictionary that overrules the stemming algorithm
+		/// </returns>
+		/// <throws>  IOException  </throws>
+		public static Dictionary<string, string> GetStemDict(System.IO.FileInfo wordstemfile)
+		{
+			if (wordstemfile == null)
+				throw new System.NullReferenceException("wordstemfile may not be null");
+            var result = new Dictionary<string, string>();
+			System.IO.StreamReader br = null;
+			System.IO.StreamReader fr = null;
+			try
+			{
+				fr = new System.IO.StreamReader(wordstemfile.FullName, System.Text.Encoding.Default);
+				br = new System.IO.StreamReader(fr.BaseStream, fr.CurrentEncoding);
+				System.String line;
+                char[] tab = {'\t'};
+				while ((line = br.ReadLine()) != null)
+				{
+					System.String[] wordstem = line.Split(tab, 2);
+					result[wordstem[0]] = wordstem[1];
+				}
+			}
+			finally
+			{
+				if (fr != null)
+					fr.Close();
+				if (br != null)
+					br.Close();
+			}
+			return result;
+		}
+	}
+}
+\ No newline at end of file