Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/Util/BitVector.cs')
-rw-r--r--src/core/Util/BitVector.cs315
1 files changed, 315 insertions, 0 deletions
diff --git a/src/core/Util/BitVector.cs b/src/core/Util/BitVector.cs
new file mode 100644
index 0000000..17b1212
--- /dev/null
+++ b/src/core/Util/BitVector.cs
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary>Optimized implementation of a vector of bits. This is more-or-less like
+ /// java.util.BitSet, but also includes the following:
+ /// <list type="bullet">
+ /// <item>a count() method, which efficiently computes the number of one bits;</item>
+ /// <item>optimized read from and write to disk;</item>
+ /// <item>inlinable get() method;</item>
+ /// <item>store and load, as bit set or d-gaps, depending on sparseness;</item>
+ /// </list>
+ /// </summary>
+ public sealed class BitVector : ICloneable
+ {
+
+ private byte[] bits;
+ private int size;
+ private int count;
+
+ /// <summary>Constructs a vector capable of holding <c>n</c> bits. </summary>
+ public BitVector(int n)
+ {
+ size = n;
+ bits = new byte[(size >> 3) + 1];
+ count = 0;
+ }
+
+ internal BitVector(byte[] bits, int size)
+ {
+ this.bits = bits;
+ this.size = size;
+ count = -1;
+ }
+
+ public System.Object Clone()
+ {
+ byte[] copyBits = new byte[bits.Length];
+ Array.Copy(bits, 0, copyBits, 0, bits.Length);
+ BitVector clone = new BitVector(copyBits, size);
+ clone.count = count;
+ return clone;
+ }
+
+ /// <summary>Sets the value of <c>bit</c> to one. </summary>
+ public void Set(int bit)
+ {
+ if (bit >= size)
+ {
+ throw new System. IndexOutOfRangeException("Index of bound " + bit);
+ }
+ bits[bit >> 3] |= (byte) (1 << (bit & 7));
+ count = - 1;
+ }
+
+ /// <summary>Sets the value of <c>bit</c> to true, and
+ /// returns true if bit was already set
+ /// </summary>
+ public bool GetAndSet(int bit)
+ {
+ if (bit >= size)
+ {
+ throw new System. IndexOutOfRangeException("Index of bound " + bit);
+ }
+ int pos = bit >> 3;
+ int v = bits[pos];
+ int flag = 1 << (bit & 7);
+ if ((flag & v) != 0)
+ return true;
+ else
+ {
+ bits[pos] = (byte) (v | flag);
+ if (count != - 1)
+ count++;
+ return false;
+ }
+ }
+
+ /// <summary>Sets the value of <c>bit</c> to zero. </summary>
+ public void Clear(int bit)
+ {
+ if (bit >= size)
+ {
+ throw new System.IndexOutOfRangeException("Index of bound " + bit);
+ }
+ bits[bit >> 3] &= (byte) (~ (1 << (bit & 7)));
+ count = - 1;
+ }
+
+ /// <summary>Returns <c>true</c> if <c>bit</c> is one and
+ /// <c>false</c> if it is zero.
+ /// </summary>
+ public bool Get(int bit)
+ {
+ System.Diagnostics.Debug.Assert(bit >= 0 && bit < size, "bit " + bit + " is out of bounds 0.." +(size - 1));
+ return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
+ }
+
+ /// <summary>Returns the number of bits in this vector. This is also one greater than
+ /// the number of the largest valid bit number.
+ /// </summary>
+ public int Size()
+ {
+ return size;
+ }
+
+ /// <summary>Returns the total number of one bits in this vector. This is efficiently
+ /// computed and cached, so that, if the vector is not changed, no
+ /// recomputation is done for repeated calls.
+ /// </summary>
+ public int Count()
+ {
+ // if the vector has been modified
+ if (count == - 1)
+ {
+ int c = 0;
+ int end = bits.Length;
+ for (int i = 0; i < end; i++)
+ c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
+ count = c;
+ }
+ return count;
+ }
+
+ /// <summary>
+ /// For testing
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public int GetRecomputedCount()
+ {
+ int c = 0;
+ int end = bits.Length;
+ for (int i = 0; i < end; i++)
+ c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
+ return c;
+ }
+
+ private static readonly byte[] BYTE_COUNTS = new byte[]{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+
+ /// <summary>Writes this vector to the file <c>name</c> in Directory
+ /// <c>d</c>, in a format that can be read by the constructor
+ /// <see cref="BitVector(Directory, String)" />.
+ /// </summary>
+ public void Write(Directory d, System.String name)
+ {
+ IndexOutput output = d.CreateOutput(name);
+ try
+ {
+ if (IsSparse())
+ {
+ WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps.
+ }
+ else
+ {
+ WriteBits(output);
+ }
+ }
+ finally
+ {
+ output.Close();
+ }
+ }
+
+ /// <summary>Write as a bit set </summary>
+ private void WriteBits(IndexOutput output)
+ {
+ output.WriteInt(Size()); // write size
+ output.WriteInt(Count()); // write count
+ output.WriteBytes(bits, bits.Length);
+ }
+
+ /// <summary>Write as a d-gaps list </summary>
+ private void WriteDgaps(IndexOutput output)
+ {
+ output.WriteInt(- 1); // mark using d-gaps
+ output.WriteInt(Size()); // write size
+ output.WriteInt(Count()); // write count
+ int last = 0;
+ int n = Count();
+ int m = bits.Length;
+ for (int i = 0; i < m && n > 0; i++)
+ {
+ if (bits[i] != 0)
+ {
+ output.WriteVInt(i - last);
+ output.WriteByte(bits[i]);
+ last = i;
+ n -= BYTE_COUNTS[bits[i] & 0xFF];
+ }
+ }
+ }
+
+ /// <summary>Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. </summary>
+ private bool IsSparse()
+ {
+ // note: order of comparisons below set to favor smaller values (no binary range search.)
+ // note: adding 4 because we start with ((int) -1) to indicate d-gaps format.
+ // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore
+ // multiplying count by (8+8) or (8+16) or (8+24) etc.:
+ // - first 8 for writing bits[i] (1 byte vs. 1 bit), and
+ // - second part for writing the byte-number d-gap as vint.
+ // note: factor is for read/write of byte-arrays being faster than vints.
+ int factor = 10;
+ if (bits.Length < (1 << 7))
+ return factor * (4 + (8 + 8) * Count()) < Size();
+ if (bits.Length < (1 << 14))
+ return factor * (4 + (8 + 16) * Count()) < Size();
+ if (bits.Length < (1 << 21))
+ return factor * (4 + (8 + 24) * Count()) < Size();
+ if (bits.Length < (1 << 28))
+ return factor * (4 + (8 + 32) * Count()) < Size();
+ return factor * (4 + (8 + 40) * Count()) < Size();
+ }
+
+ /// <summary>Constructs a bit vector from the file <c>name</c> in Directory
+ /// <c>d</c>, as written by the <see cref="Write" /> method.
+ /// </summary>
+ public BitVector(Directory d, System.String name)
+ {
+ IndexInput input = d.OpenInput(name);
+ try
+ {
+ size = input.ReadInt(); // read size
+ if (size == - 1)
+ {
+ ReadDgaps(input);
+ }
+ else
+ {
+ ReadBits(input);
+ }
+ }
+ finally
+ {
+ input.Close();
+ }
+ }
+
+ /// <summary>Read as a bit set </summary>
+ private void ReadBits(IndexInput input)
+ {
+ count = input.ReadInt(); // read count
+ bits = new byte[(size >> 3) + 1]; // allocate bits
+ input.ReadBytes(bits, 0, bits.Length);
+ }
+
+ /// <summary>read as a d-gaps list </summary>
+ private void ReadDgaps(IndexInput input)
+ {
+ size = input.ReadInt(); // (re)read size
+ count = input.ReadInt(); // read count
+ bits = new byte[(size >> 3) + 1]; // allocate bits
+ int last = 0;
+ int n = Count();
+ while (n > 0)
+ {
+ last += input.ReadVInt();
+ bits[last] = input.ReadByte();
+ n -= BYTE_COUNTS[bits[last] & 0xFF];
+ }
+ }
+
+ /// <summary> Retrieve a subset of this BitVector.
+ ///
+ /// </summary>
+ /// <param name="start">starting index, inclusive
+ /// </param>
+ /// <param name="end">ending index, exclusive
+ /// </param>
+ /// <returns> subset
+ /// </returns>
+ public BitVector Subset(int start, int end)
+ {
+ if (start < 0 || end > Size() || end < start)
+ throw new System.IndexOutOfRangeException();
+ // Special case -- return empty vector is start == end
+ if (end == start)
+ return new BitVector(0);
+ byte[] bits = new byte[(Number.URShift((end - start - 1), 3)) + 1];
+ int s = Number.URShift(start, 3);
+ for (int i = 0; i < bits.Length; i++)
+ {
+ int cur = 0xFF & this.bits[i + s];
+ int next = i + s + 1 >= this.bits.Length?0:0xFF & this.bits[i + s + 1];
+ bits[i] = (byte) ((Number.URShift(cur, (start & 7))) | ((next << (8 - (start & 7)))));
+ }
+ int bitsToClear = (bits.Length * 8 - (end - start)) % 8;
+ bits[bits.Length - 1] &= (byte) (~ (0xFF << (8 - bitsToClear)));
+ return new BitVector(bits, end - start);
+ }
+ }
+} \ No newline at end of file