diff options
Diffstat (limited to 'src/core/Index/NormsWriter.cs')
-rw-r--r-- | src/core/Index/NormsWriter.cs | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/src/core/Index/NormsWriter.cs b/src/core/Index/NormsWriter.cs new file mode 100644 index 0000000..507d69c --- /dev/null +++ b/src/core/Index/NormsWriter.cs @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using Similarity = Lucene.Net.Search.Similarity; + +namespace Lucene.Net.Index +{ + + // TODO FI: norms could actually be stored as doc store + + /// <summary>Writes norms. Each thread X field accumulates the norms + /// for the doc/fields it saw, then the flush method below + /// merges all of these together into a single _X.nrm file. + /// </summary> + + sealed class NormsWriter : InvertedDocEndConsumer + { + + private static readonly byte defaultNorm; + private FieldInfos fieldInfos; + public override InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread) + { + return new NormsWriterPerThread(docInverterPerThread, this); + } + + public override void Abort() + { + } + + // We only write the _X.nrm file at flush + internal void Files(ICollection<string> files) + { + } + + internal override void SetFieldInfos(FieldInfos fieldInfos) + { + this.fieldInfos = fieldInfos; + } + + /// <summary>Produce _X.nrm if any document had a field with norms + /// not disabled + /// </summary> + public override void Flush(IDictionary<InvertedDocEndConsumerPerThread,ICollection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) + { + + IDictionary<FieldInfo, IList<NormsWriterPerField>> byField = new HashMap<FieldInfo, IList<NormsWriterPerField>>(); + + // Typically, each thread will have encountered the same + // field. So first we collate by field, ie, all + // per-thread field instances that correspond to the + // same FieldInfo + foreach(var entry in threadsAndFields) + { + ICollection<InvertedDocEndConsumerPerField> fields = entry.Value; + IEnumerator<InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator(); + var fieldsToRemove = new HashSet<NormsWriterPerField>(); + while (fieldsIt.MoveNext()) + { + NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.Current; + + if (perField.upto > 0) + { + // It has some norms + IList<NormsWriterPerField> l = byField[perField.fieldInfo]; + if (l == null) + { + l = new List<NormsWriterPerField>(); + byField[perField.fieldInfo] = l; + } + l.Add(perField); + } + // Remove this field since we haven't seen it + // since the previous flush + else + { + fieldsToRemove.Add(perField); + } + } + foreach (var field in fieldsToRemove) + { + fields.Remove(field); + } + } + + System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; + state.flushedFiles.Add(normsFileName); + IndexOutput normsOut = state.directory.CreateOutput(normsFileName); + + try + { + normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); + + int numField = fieldInfos.Size(); + + int normCount = 0; + + for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) + { + + FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); + + IList<NormsWriterPerField> toMerge = byField[fieldInfo]; + int upto = 0; + if (toMerge != null) + { + + int numFields = toMerge.Count; + + normCount++; + + NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; + int[] uptos = new int[numFields]; + + for (int j = 0; j < numFields; j++) + fields[j] = toMerge[j]; + + int numLeft = numFields; + + while (numLeft > 0) + { + + System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length)); + + int minLoc = 0; + int minDocID = fields[0].docIDs[uptos[0]]; + + for (int j = 1; j < numLeft; j++) + { + int docID = fields[j].docIDs[uptos[j]]; + if (docID < minDocID) + { + minDocID = docID; + minLoc = j; + } + } + + System.Diagnostics.Debug.Assert(minDocID < state.numDocs); + + // Fill hole + for (; upto < minDocID; upto++) + normsOut.WriteByte(defaultNorm); + + normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); + (uptos[minLoc])++; + upto++; + + if (uptos[minLoc] == fields[minLoc].upto) + { + fields[minLoc].Reset(); + if (minLoc != numLeft - 1) + { + fields[minLoc] = fields[numLeft - 1]; + uptos[minLoc] = uptos[numLeft - 1]; + } + numLeft--; + } + } + + // Fill final hole with defaultNorm + for (; upto < state.numDocs; upto++) + normsOut.WriteByte(defaultNorm); + } + else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) + { + normCount++; + // Fill entire field with default norm: + for (; upto < state.numDocs; upto++) + normsOut.WriteByte(defaultNorm); + } + + System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.FilePointer, ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.FilePointer); + } + } + finally + { + normsOut.Close(); + } + } + + internal override void CloseDocStore(SegmentWriteState state) + { + } + static NormsWriter() + { + defaultNorm = Similarity.EncodeNorm(1.0f); + } + } +}
\ No newline at end of file |