Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/Analysis/KeywordTokenizer.cs')
-rw-r--r--src/core/Analysis/KeywordTokenizer.cs99
1 files changed, 99 insertions, 0 deletions
diff --git a/src/core/Analysis/KeywordTokenizer.cs b/src/core/Analysis/KeywordTokenizer.cs
new file mode 100644
index 0000000..f97ff95
--- /dev/null
+++ b/src/core/Analysis/KeywordTokenizer.cs
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Emits the entire input as a single token.</summary>
+ public sealed class KeywordTokenizer:Tokenizer
+ {
+
+ private const int DEFAULT_BUFFER_SIZE = 256;
+
+ private bool done;
+ private int finalOffset;
+ private ITermAttribute termAtt;
+ private IOffsetAttribute offsetAtt;
+
+ public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE)
+ {
+ }
+
+ public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input)
+ {
+ Init(bufferSize);
+ }
+
+ public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input)
+ {
+ Init(bufferSize);
+ }
+
+ public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input)
+ {
+ Init(bufferSize);
+ }
+
+ private void Init(int bufferSize)
+ {
+ this.done = false;
+ termAtt = AddAttribute<ITermAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ termAtt.ResizeTermBuffer(bufferSize);
+ }
+
+ public override bool IncrementToken()
+ {
+ if (!done)
+ {
+ ClearAttributes();
+ done = true;
+ int upto = 0;
+ char[] buffer = termAtt.TermBuffer();
+ while (true)
+ {
+ int length = input.Read(buffer, upto, buffer.Length - upto);
+ if (length == 0)
+ break;
+ upto += length;
+ if (upto == buffer.Length)
+ buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
+ }
+ termAtt.SetTermLength(upto);
+ finalOffset = CorrectOffset(upto);
+ offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
+ return true;
+ }
+ return false;
+ }
+
+ public override void End()
+ {
+ // set final offset
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset(System.IO.TextReader input)
+ {
+ base.Reset(input);
+ this.done = false;
+ }
+ }
+} \ No newline at end of file