Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/Search/PhraseScorer.cs')
-rw-r--r--src/core/Search/PhraseScorer.cs224
1 files changed, 224 insertions, 0 deletions
diff --git a/src/core/Search/PhraseScorer.cs b/src/core/Search/PhraseScorer.cs
new file mode 100644
index 0000000..59c9771
--- /dev/null
+++ b/src/core/Search/PhraseScorer.cs
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using TermPositions = Lucene.Net.Index.TermPositions;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: Scoring functionality for phrase queries.
+ /// <br/>A document is considered matching if it contains the phrase-query terms
+ /// at "valid" positons. What "valid positions" are
+ /// depends on the type of the phrase query: for an exact phrase query terms are required
+ /// to appear in adjacent locations, while for a sloppy phrase query some distance between
+ /// the terms is allowed. The abstract method <see cref="PhraseFreq()" /> of extending classes
+ /// is invoked for each document containing all the phrase query terms, in order to
+ /// compute the frequency of the phrase query in that document. A non zero frequency
+ /// means a match.
+ /// </summary>
+ abstract class PhraseScorer:Scorer
+ {
+ private Weight weight;
+ protected internal byte[] norms;
+ protected internal float value_Renamed;
+
+ private bool firstTime = true;
+ private bool more = true;
+ protected internal PhraseQueue pq;
+ protected internal PhrasePositions first, last;
+
+ private float freq; //prhase frequency in current doc as computed by phraseFreq().
+
+ internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity)
+ {
+ this.norms = norms;
+ this.weight = weight;
+ this.value_Renamed = weight.Value;
+
+ // convert tps to a list of phrase positions.
+ // note: phrase-position differs from term-position in that its position
+ // reflects the phrase offset: pp.pos = tp.pos - offset.
+ // this allows to easily identify a matching (exact) phrase
+ // when all PhrasePositions have exactly the same position.
+ for (int i = 0; i < tps.Length; i++)
+ {
+ PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
+ if (last != null)
+ {
+ // add next to end of list
+ last.next = pp;
+ }
+ else
+ {
+ first = pp;
+ }
+ last = pp;
+ }
+
+ pq = new PhraseQueue(tps.Length); // construct empty pq
+ first.doc = - 1;
+ }
+
+ public override int DocID()
+ {
+ return first.doc;
+ }
+
+ public override int NextDoc()
+ {
+ if (firstTime)
+ {
+ Init();
+ firstTime = false;
+ }
+ else if (more)
+ {
+ more = last.Next(); // trigger further scanning
+ }
+ if (!DoNext())
+ {
+ first.doc = NO_MORE_DOCS;
+ }
+ return first.doc;
+ }
+
+ // next without initial increment
+ private bool DoNext()
+ {
+ while (more)
+ {
+ while (more && first.doc < last.doc)
+ {
+ // find doc w/ all the terms
+ more = first.SkipTo(last.doc); // skip first upto last
+ FirstToLast(); // and move it to the end
+ }
+
+ if (more)
+ {
+ // found a doc with all of the terms
+ freq = PhraseFreq(); // check for phrase
+ if (freq == 0.0f)
+ // no match
+ more = last.Next();
+ // trigger further scanning
+ else
+ return true; // found a match
+ }
+ }
+ return false; // no more matches
+ }
+
+ public override float Score()
+ {
+ //System.out.println("scoring " + first.doc);
+ float raw = Similarity.Tf(freq) * value_Renamed; // raw score
+ return norms == null?raw:raw * Similarity.DecodeNorm(norms[first.doc]); // normalize
+ }
+
+ public override int Advance(int target)
+ {
+ firstTime = false;
+ for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
+ {
+ more = pp.SkipTo(target);
+ }
+ if (more)
+ {
+ Sort(); // re-sort
+ }
+ if (!DoNext())
+ {
+ first.doc = NO_MORE_DOCS;
+ }
+ return first.doc;
+ }
+
+ /// <summary>
+ /// Phrase frequency in current doc as computed by PhraseFreq()
+ /// </summary>
+ /// <returns></returns>
+ public float CurrentFreq()
+ {
+ return freq;
+ }
+
+ /// <summary> For a document containing all the phrase query terms, compute the
+ /// frequency of the phrase in that document.
+ /// A non zero frequency means a match.
+ /// <br/>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
+ /// </summary>
+ /// <returns> frequency of the phrase in current doc, 0 if not found.
+ /// </returns>
+ protected internal abstract float PhraseFreq();
+
+ private void Init()
+ {
+ for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
+ {
+ more = pp.Next();
+ }
+ if (more)
+ {
+ Sort();
+ }
+ }
+
+ private void Sort()
+ {
+ pq.Clear();
+ for (PhrasePositions pp = first; pp != null; pp = pp.next)
+ {
+ pq.Add(pp);
+ }
+ PqToList();
+ }
+
+ protected internal void PqToList()
+ {
+ last = first = null;
+ while (pq.Top() != null)
+ {
+ PhrasePositions pp = pq.Pop();
+ if (last != null)
+ {
+ // add next to end of list
+ last.next = pp;
+ }
+ else
+ first = pp;
+ last = pp;
+ pp.next = null;
+ }
+ }
+
+ protected internal void FirstToLast()
+ {
+ last.next = first; // move first to end of list
+ last = first;
+ first = first.next;
+ last.next = null;
+ }
+
+ public override System.String ToString()
+ {
+ return "scorer(" + weight + ")";
+ }
+ }
+} \ No newline at end of file