diff options
Diffstat (limited to 'src/core/Search/TermScorer.cs')
-rw-r--r-- | src/core/Search/TermScorer.cs | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/src/core/Search/TermScorer.cs b/src/core/Search/TermScorer.cs new file mode 100644 index 0000000..88863bb --- /dev/null +++ b/src/core/Search/TermScorer.cs @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using TermDocs = Lucene.Net.Index.TermDocs; + +namespace Lucene.Net.Search +{ + + /// <summary>Expert: A <c>Scorer</c> for documents matching a <c>Term</c>.</summary> + public sealed class TermScorer:Scorer + { + + private static readonly float[] SIM_NORM_DECODER; + + private Weight weight; + private TermDocs termDocs; + private byte[] norms; + private float weightValue; + private int doc = - 1; + + private int[] docs = new int[32]; // buffered doc numbers + private int[] freqs = new int[32]; // buffered term freqs + private int pointer; + private int pointerMax; + + private const int SCORE_CACHE_SIZE = 32; + private float[] scoreCache = new float[SCORE_CACHE_SIZE]; + + /// <summary> Construct a <c>TermScorer</c>. + /// + /// </summary> + /// <param name="weight">The weight of the <c>Term</c> in the query. + /// </param> + /// <param name="td">An iterator over the documents matching the <c>Term</c>. + /// </param> + /// <param name="similarity">The <c>Similarity</c> implementation to be used for score + /// computations. + /// </param> + /// <param name="norms">The field norms of the document fields for the <c>Term</c>. + /// </param> + public /*internal*/ TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms):base(similarity) + { + this.weight = weight; + this.termDocs = td; + this.norms = norms; + this.weightValue = weight.Value; + + for (int i = 0; i < SCORE_CACHE_SIZE; i++) + scoreCache[i] = Similarity.Tf(i) * weightValue; + } + + public override void Score(Collector c) + { + Score(c, System.Int32.MaxValue, NextDoc()); + } + + // firstDocID is ignored since nextDoc() sets 'doc' + public /*protected internal*/ override bool Score(Collector c, int end, int firstDocID) + { + c.SetScorer(this); + while (doc < end) + { + // for docs in window + c.Collect(doc); // collect score + + if (++pointer >= pointerMax) + { + pointerMax = termDocs.Read(docs, freqs); // refill buffers + if (pointerMax != 0) + { + pointer = 0; + } + else + { + termDocs.Close(); // close stream + doc = System.Int32.MaxValue; // set to sentinel value + return false; + } + } + doc = docs[pointer]; + } + return true; + } + + public override int DocID() + { + return doc; + } + + /// <summary> Advances to the next document matching the query. <br/> + /// The iterator over the matching documents is buffered using + /// <see cref="TermDocs.Read(int[],int[])" />. + /// + /// </summary> + /// <returns> the document matching the query or -1 if there are no more documents. + /// </returns> + public override int NextDoc() + { + pointer++; + if (pointer >= pointerMax) + { + pointerMax = termDocs.Read(docs, freqs); // refill buffer + if (pointerMax != 0) + { + pointer = 0; + } + else + { + termDocs.Close(); // close stream + return doc = NO_MORE_DOCS; + } + } + doc = docs[pointer]; + return doc; + } + + public override float Score() + { + System.Diagnostics.Debug.Assert(doc != - 1); + int f = freqs[pointer]; + float raw = f < SCORE_CACHE_SIZE?scoreCache[f]:Similarity.Tf(f) * weightValue; // cache miss + + return norms == null?raw:raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field + } + + /// <summary> Advances to the first match beyond the current whose document number is + /// greater than or equal to a given target. <br/> + /// The implementation uses <see cref="TermDocs.SkipTo(int)" />. + /// + /// </summary> + /// <param name="target">The target document number. + /// </param> + /// <returns> the matching document or -1 if none exist. + /// </returns> + public override int Advance(int target) + { + // first scan in cache + for (pointer++; pointer < pointerMax; pointer++) + { + if (docs[pointer] >= target) + { + return doc = docs[pointer]; + } + } + + // not found in cache, seek underlying stream + bool result = termDocs.SkipTo(target); + if (result) + { + pointerMax = 1; + pointer = 0; + docs[pointer] = doc = termDocs.Doc; + freqs[pointer] = termDocs.Freq; + } + else + { + doc = NO_MORE_DOCS; + } + return doc; + } + + /// <summary>Returns a string representation of this <c>TermScorer</c>. </summary> + public override System.String ToString() + { + return "scorer(" + weight + ")"; + } + static TermScorer() + { + SIM_NORM_DECODER = Search.Similarity.GetNormDecoder(); + } + } +}
\ No newline at end of file |