diff options
Diffstat (limited to 'src/core/Index/MergeDocIDRemapper.cs')
-rw-r--r-- | src/core/Index/MergeDocIDRemapper.cs | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/src/core/Index/MergeDocIDRemapper.cs b/src/core/Index/MergeDocIDRemapper.cs new file mode 100644 index 0000000..2771b53 --- /dev/null +++ b/src/core/Index/MergeDocIDRemapper.cs @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// <summary>Remaps docIDs after a merge has completed, where the + /// merged segments had at least one deletion. This is used + /// to renumber the buffered deletes in IndexWriter when a + /// merge of segments with deletions commits. + /// </summary> + + sealed class MergeDocIDRemapper + { + internal int[] starts; // used for binary search of mapped docID + internal int[] newStarts; // starts, minus the deletes + internal int[][] docMaps; // maps docIDs in the merged set + internal int minDocID; // minimum docID that needs renumbering + internal int maxDocID; // 1+ the max docID that needs renumbering + internal int docShift; // total # deleted docs that were compacted by this merge + + public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) + { + this.docMaps = docMaps; + SegmentInfo firstSegment = merge.segments.Info(0); + int i = 0; + while (true) + { + SegmentInfo info = infos.Info(i); + if (info.Equals(firstSegment)) + break; + minDocID += info.docCount; + i++; + } + + int numDocs = 0; + for (int j = 0; j < docMaps.Length; i++, j++) + { + numDocs += infos.Info(i).docCount; + System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); + } + maxDocID = minDocID + numDocs; + + starts = new int[docMaps.Length]; + newStarts = new int[docMaps.Length]; + + starts[0] = minDocID; + newStarts[0] = minDocID; + for (i = 1; i < docMaps.Length; i++) + { + int lastDocCount = merge.segments.Info(i - 1).docCount; + starts[i] = starts[i - 1] + lastDocCount; + newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; + } + docShift = numDocs - mergedDocCount; + + // There are rare cases when docShift is 0. It happens + // if you try to delete a docID that's out of bounds, + // because the SegmentReader still allocates deletedDocs + // and pretends it has deletions ... so we can't make + // this assert here + // assert docShift > 0; + + // Make sure it all adds up: + System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); + } + + public int Remap(int oldDocID) + { + if (oldDocID < minDocID) + // Unaffected by merge + return oldDocID; + else if (oldDocID >= maxDocID) + // This doc was "after" the merge, so simple shift + return oldDocID - docShift; + else + { + // Binary search to locate this document & find its new docID + int lo = 0; // search starts array + int hi = docMaps.Length - 1; // for first element less + + while (hi >= lo) + { + int mid = Number.URShift((lo + hi), 1); + int midValue = starts[mid]; + if (oldDocID < midValue) + hi = mid - 1; + else if (oldDocID > midValue) + lo = mid + 1; + else + { + // found a match + while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue) + { + mid++; // scan to last match + } + if (docMaps[mid] != null) + return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]]; + else + return newStarts[mid] + oldDocID - starts[mid]; + } + } + if (docMaps[hi] != null) + return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]]; + else + return newStarts[hi] + oldDocID - starts[hi]; + } + } + } +}
\ No newline at end of file |