Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/Analysis/CharArraySet.cs')
-rw-r--r--src/core/Analysis/CharArraySet.cs517
1 files changed, 517 insertions, 0 deletions
diff --git a/src/core/Analysis/CharArraySet.cs b/src/core/Analysis/CharArraySet.cs
new file mode 100644
index 0000000..e7df0ba
--- /dev/null
+++ b/src/core/Analysis/CharArraySet.cs
@@ -0,0 +1,517 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Linq;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+ /// <summary> A simple class that stores Strings as char[]'s in a
+ /// hash table. Note that this is not a general purpose
+ /// class. For example, it cannot remove items from the
+ /// set, nor does it resize its hash table to be smaller,
+ /// etc. It is designed to be quick to test if a char[]
+ /// is in the set without the necessity of converting it
+ /// to a String first.
+ /// <p/>
+ /// <em>Please note:</em> This class implements <see cref="System.Collections.Generic.ISet{T}"/> but
+ /// does not behave like it should in all cases. The generic type is
+ /// <see cref="System.Collections.Generic.ICollection{T}"/>, because you can add any object to it,
+ /// that has a string representation. The add methods will use
+ /// <see cref="object.ToString()"/> and store the result using a <see cref="char"/>
+ /// buffer. The same behaviour have the <see cref="Contains(object)"/> methods.
+ /// The <see cref="GetEnumerator"/> method returns an <see cref="string"/> IEnumerable.
+ /// For type safety also {@link #stringIterator()} is provided.
+ /// </summary>
+ // TODO: java uses wildcards, .net doesn't have this, easiest way is to
+ // make the entire class generic. Ultimately, though, since this
+ // works with strings, I can't think of a reason not to just declare
+ // this as an ISet<string>.
+ public class CharArraySet : ISet<string>
+ {
+ bool _ReadOnly = false;
+ const int INIT_SIZE = 8;
+ char[][] _Entries;
+ int _Count;
+ bool _IgnoreCase;
+ public static CharArraySet EMPTY_SET = UnmodifiableSet(new CharArraySet(0, false));
+
+ private void Init(int startSize, bool ignoreCase)
+ {
+ this._IgnoreCase = ignoreCase;
+ int size = INIT_SIZE;
+ while (startSize + (startSize >> 2) > size)
+ size <<= 1;
+ _Entries = new char[size][];
+ }
+
+ /// <summary>Create set with enough capacity to hold startSize
+ /// terms
+ /// </summary>
+ public CharArraySet(int startSize, bool ignoreCase)
+ {
+ Init(startSize, ignoreCase);
+ }
+
+ public CharArraySet(IEnumerable<string> c, bool ignoreCase)
+ {
+ Init(c.Count(), ignoreCase);
+ AddItems(c);
+ }
+
+ /// <summary>Create set from a Collection of char[] or String </summary>
+ public CharArraySet(IEnumerable<object> c, bool ignoreCase)
+ {
+ Init(c.Count(), ignoreCase);
+ AddItems(c);
+ }
+
+ private void AddItems<T>(IEnumerable<T> items)
+ {
+ foreach(var item in items)
+ {
+ Add(item.ToString());
+ }
+ }
+
+ /// <summary>Create set from entries </summary>
+ private CharArraySet(char[][] entries, bool ignoreCase, int count)
+ {
+ this._Entries = entries;
+ this._IgnoreCase = ignoreCase;
+ this._Count = count;
+ }
+
+ /// <summary>true if the <c>len</c> chars of <c>text</c> starting at <c>off</c>
+ /// are in the set
+ /// </summary>
+ public virtual bool Contains(char[] text, int off, int len)
+ {
+ return _Entries[GetSlot(text, off, len)] != null;
+ }
+
+ public virtual bool Contains(string text)
+ {
+ return _Entries[GetSlot(text)] != null;
+ }
+
+
+ private int GetSlot(char[] text, int off, int len)
+ {
+ int code = GetHashCode(text, off, len);
+ int pos = code & (_Entries.Length - 1);
+ char[] text2 = _Entries[pos];
+ if (text2 != null && !Equals(text, off, len, text2))
+ {
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (_Entries.Length - 1);
+ text2 = _Entries[pos];
+ }
+ while (text2 != null && !Equals(text, off, len, text2));
+ }
+ return pos;
+ }
+
+ /// <summary>Returns true if the String is in the set </summary>
+ private int GetSlot(string text)
+ {
+ int code = GetHashCode(text);
+ int pos = code & (_Entries.Length - 1);
+ char[] text2 = _Entries[pos];
+ if (text2 != null && !Equals(text, text2))
+ {
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (_Entries.Length - 1);
+ text2 = _Entries[pos];
+ }
+ while (text2 != null && !Equals(text, text2));
+ }
+ return pos;
+ }
+
+ public bool Add(string text)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+ return Add(text.ToCharArray());
+ }
+
+ /// <summary>Add this char[] directly to the set.
+ /// If ignoreCase is true for this Set, the text array will be directly modified.
+ /// The user should never modify this text array after calling this method.
+ /// </summary>
+ public bool Add(char[] text)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+
+ if (_IgnoreCase)
+ for (int i = 0; i < text.Length; i++)
+ text[i] = Char.ToLower(text[i]);
+ int slot = GetSlot(text, 0, text.Length);
+ if (_Entries[slot] != null)
+ return false;
+ _Entries[slot] = text;
+ _Count++;
+
+ if (_Count + (_Count >> 2) > _Entries.Length)
+ {
+ Rehash();
+ }
+
+ return true;
+ }
+
+ private bool Equals(char[] text1, int off, int len, char[] text2)
+ {
+ if (len != text2.Length)
+ return false;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (char.ToLower(text1[off + i]) != text2[i])
+ return false;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (text1[off + i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private bool Equals(string text1, char[] text2)
+ {
+ int len = text1.Length;
+ if (len != text2.Length)
+ return false;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (char.ToLower(text1[i]) != text2[i])
+ return false;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (text1[i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void Rehash()
+ {
+ int newSize = 2 * _Entries.Length;
+ char[][] oldEntries = _Entries;
+ _Entries = new char[newSize][];
+
+ for (int i = 0; i < oldEntries.Length; i++)
+ {
+ char[] text = oldEntries[i];
+ if (text != null)
+ {
+ // todo: could be faster... no need to compare strings on collision
+ _Entries[GetSlot(text, 0, text.Length)] = text;
+ }
+ }
+ }
+
+ private int GetHashCode(char[] text, int offset, int len)
+ {
+ int code = 0;
+ int stop = offset + len;
+ if (_IgnoreCase)
+ {
+ for (int i = offset; i < stop; i++)
+ {
+ code = code * 31 + char.ToLower(text[i]);
+ }
+ }
+ else
+ {
+ for (int i = offset; i < stop; i++)
+ {
+ code = code * 31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ private int GetHashCode(string text)
+ {
+ int code = 0;
+ int len = text.Length;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ code = code * 31 + char.ToLower(text[i]);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ code = code * 31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ public int Count
+ {
+ get { return _Count; }
+ }
+
+ public bool IsEmpty
+ {
+ get { return _Count == 0; }
+ }
+
+ public bool Contains(object item)
+ {
+ var text = item as char[];
+ return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString());
+ }
+
+ public bool Add(object item)
+ {
+ return Add(item.ToString());
+ }
+
+ void ICollection<string>.Add(string item)
+ {
+ this.Add(item);
+ }
+
+ /// <summary>
+ /// Returns an unmodifiable <see cref="CharArraySet"/>. This allows to provide
+ /// unmodifiable views of internal sets for "read-only" use
+ /// </summary>
+ /// <param name="set">A Set for which the unmodifiable set it returns.</param>
+ /// <returns>A new unmodifiable <see cref="CharArraySet"/></returns>
+ /// <throws>ArgumentNullException of the given set is <c>null</c></throws>
+ public static CharArraySet UnmodifiableSet(CharArraySet set)
+ {
+ if(set == null)
+ throw new ArgumentNullException("Given set is null");
+ if (set == EMPTY_SET)
+ return EMPTY_SET;
+ if (set._ReadOnly)
+ return set;
+
+ var newSet = new CharArraySet(set._Entries, set._IgnoreCase, set.Count) {IsReadOnly = true};
+ return newSet;
+ }
+
+ /// <summary>
+ /// returns a copy of the given set as a <see cref="CharArraySet"/>. If the given set
+ /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.
+ /// </summary>
+ /// <param name="set">A set to copy</param>
+ /// <returns>a copy of the given set as a <see cref="CharArraySet"/>. If the given set
+ /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.</returns>
+ public static CharArraySet Copy<T>(ISet<T> set)
+ {
+ if (set == null)
+ throw new ArgumentNullException("set", "Given set is null!");
+ if (set == EMPTY_SET)
+ return EMPTY_SET;
+ bool ignoreCase = set is CharArraySet && ((CharArraySet)set)._IgnoreCase;
+ var arrSet = new CharArraySet(set.Count, ignoreCase);
+ arrSet.AddItems(set);
+ return arrSet;
+ }
+
+ public void Clear()
+ {
+ throw new NotSupportedException("Remove not supported!");
+ }
+
+ public bool IsReadOnly
+ {
+ get { return _ReadOnly; }
+ private set { _ReadOnly = value; }
+ }
+
+ /// <summary>Adds all of the elements in the specified collection to this collection </summary>
+ public void UnionWith(IEnumerable<string> other)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+
+ foreach (string s in other)
+ {
+ Add(s.ToCharArray());
+ }
+ }
+
+ /// <summary>Wrapper that calls UnionWith</summary>
+ public void AddAll(IEnumerable<string> coll)
+ {
+ UnionWith(coll);
+ }
+
+ #region Unneeded methods
+ public void RemoveAll(ICollection<string> c)
+ {
+ throw new NotSupportedException();
+ }
+
+ public void RetainAll(ICollection<string> c)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection<string>.CopyTo(string[] array, int arrayIndex)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet<string>.IntersectWith(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet<string>.ExceptWith(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet<string>.SymmetricExceptWith(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsSubsetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsSupersetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsProperSupersetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsProperSubsetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.Overlaps(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.SetEquals(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ICollection<string>.Remove(string item)
+ {
+ throw new NotSupportedException();
+ }
+ #endregion
+
+ /// <summary>
+ /// The IEnumerator&lt;String&gt; for this set. Strings are constructed on the fly,
+ /// so use <c>nextCharArray</c> for more efficient access
+ /// </summary>
+ public class CharArraySetEnumerator : IEnumerator<string>
+ {
+ readonly CharArraySet _Creator;
+ int pos = -1;
+ char[] cur;
+
+ protected internal CharArraySetEnumerator(CharArraySet creator)
+ {
+ _Creator = creator;
+ }
+
+ public bool MoveNext()
+ {
+ cur = null;
+ pos++;
+ while (pos < _Creator._Entries.Length && (cur = _Creator._Entries[pos]) == null)
+ pos++;
+ return cur != null;
+ }
+
+ /// <summary>do not modify the returned char[] </summary>
+ public char[] NextCharArray()
+ {
+ return cur;
+ }
+
+ public string Current
+ {
+ get { return new string(NextCharArray()); }
+ }
+
+ public void Dispose()
+ {
+ }
+
+ object IEnumerator.Current
+ {
+ get { return new string(NextCharArray()); }
+ }
+
+ public void Reset()
+ {
+ throw new NotImplementedException();
+ }
+ }
+
+ public IEnumerator<string> StringEnumerator()
+ {
+ return new CharArraySetEnumerator(this);
+ }
+
+ public IEnumerator<string> GetEnumerator()
+ {
+ return new CharArraySetEnumerator(this);
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+
+} \ No newline at end of file