diff options
Diffstat (limited to 'src/core/Analysis/Tokenizer.cs')
-rw-r--r-- | src/core/Analysis/Tokenizer.cs | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/src/core/Analysis/Tokenizer.cs b/src/core/Analysis/Tokenizer.cs new file mode 100644 index 0000000..5ab741e --- /dev/null +++ b/src/core/Analysis/Tokenizer.cs @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// <summary> A Tokenizer is a TokenStream whose input is a Reader. + /// <p/> + /// This is an abstract class; subclasses must override <see cref="TokenStream.IncrementToken()" /> + /// <p/> + /// NOTE: Subclasses overriding <see cref="TokenStream.IncrementToken()" /> must call + /// <see cref="AttributeSource.ClearAttributes()" /> before setting attributes. + /// </summary> + + public abstract class Tokenizer:TokenStream + { + /// <summary>The text source for this Tokenizer. </summary> + protected internal System.IO.TextReader input; + + private bool isDisposed; + + /// <summary>Construct a tokenizer with null input. </summary> + protected internal Tokenizer() + { + } + + /// <summary>Construct a token stream processing the given input. </summary> + protected internal Tokenizer(System.IO.TextReader input) + { + this.input = CharReader.Get(input); + } + + /// <summary>Construct a tokenizer with null input using the given AttributeFactory. </summary> + protected internal Tokenizer(AttributeFactory factory):base(factory) + { + } + + /// <summary>Construct a token stream processing the given input using the given AttributeFactory. </summary> + protected internal Tokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory) + { + this.input = CharReader.Get(input); + } + + /// <summary>Construct a token stream processing the given input using the given AttributeSource. </summary> + protected internal Tokenizer(AttributeSource source):base(source) + { + } + + /// <summary>Construct a token stream processing the given input using the given AttributeSource. </summary> + protected internal Tokenizer(AttributeSource source, System.IO.TextReader input):base(source) + { + this.input = CharReader.Get(input); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (input != null) + { + input.Close(); + } + } + + // LUCENE-2387: don't hold onto Reader after close, so + // GC can reclaim + input = null; + isDisposed = true; + } + + /// <summary>Return the corrected offset. If <see cref="input" /> is a <see cref="CharStream" /> subclass + /// this method calls <see cref="CharStream.CorrectOffset" />, else returns <c>currentOff</c>. + /// </summary> + /// <param name="currentOff">offset as seen in the output + /// </param> + /// <returns> corrected offset based on the input + /// </returns> + /// <seealso cref="CharStream.CorrectOffset"> + /// </seealso> + protected internal int CorrectOffset(int currentOff) + { + return (input is CharStream)?((CharStream) input).CorrectOffset(currentOff):currentOff; + } + + /// <summary>Expert: Reset the tokenizer to a new reader. Typically, an + /// analyzer (in its reusableTokenStream method) will use + /// this to re-use a previously created tokenizer. + /// </summary> + public virtual void Reset(System.IO.TextReader input) + { + this.input = input; + } + } +}
\ No newline at end of file |