Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/Search/DisjunctionMaxQuery.cs')
-rw-r--r--src/core/Search/DisjunctionMaxQuery.cs344
1 files changed, 344 insertions, 0 deletions
diff --git a/src/core/Search/DisjunctionMaxQuery.cs b/src/core/Search/DisjunctionMaxQuery.cs
new file mode 100644
index 0000000..f59e3dc
--- /dev/null
+++ b/src/core/Search/DisjunctionMaxQuery.cs
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
+ /// score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
+ /// This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be
+ /// combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
+ /// not the sum of the field scores (as BooleanQuery would give).
+ /// If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
+ /// another gets a higher score than "albino" matching both fields.
+ /// To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
+ /// each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
+ /// The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
+ /// include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
+ /// in the multiple fields.
+ /// </summary>
+ [Serializable]
+ public class DisjunctionMaxQuery : Query, System.Collections.Generic.IEnumerable<Query>, System.ICloneable
+ {
+
+ /* The subqueries */
+ private EquatableList<Query> disjuncts = new EquatableList<Query>();
+
+ /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
+ private float tieBreakerMultiplier = 0.0f;
+
+ /// <summary>Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.</summary>
+ /// <param name="tieBreakerMultiplier">the score of each non-maximum disjunct for a document is multiplied by this weight
+ /// and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
+ /// 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
+ /// word in the lower scored field (i.e., one that is not in any higher scored field.
+ /// </param>
+ public DisjunctionMaxQuery(float tieBreakerMultiplier)
+ {
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ }
+
+ /// <summary> Creates a new DisjunctionMaxQuery</summary>
+ /// <param name="disjuncts">a Collection&lt;Query&gt; of all the disjuncts to add
+ /// </param>
+ /// <param name="tieBreakerMultiplier"> the weight to give to each matching non-maximum disjunct
+ /// </param>
+ public DisjunctionMaxQuery(System.Collections.Generic.ICollection<Query> disjuncts, float tieBreakerMultiplier)
+ {
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ Add(disjuncts);
+ }
+
+ /// <summary>Add a subquery to this disjunction</summary>
+ /// <param name="query">the disjunct added
+ /// </param>
+ public virtual void Add(Query query)
+ {
+ disjuncts.Add(query);
+ }
+
+ /// <summary>Add a collection of disjuncts to this disjunction
+ /// via Iterable
+ /// </summary>
+ public virtual void Add(System.Collections.Generic.ICollection<Query> disjuncts)
+ {
+ this.disjuncts.AddRange(disjuncts);
+ }
+
+ /// <summary>An Iterator&lt;Query&gt; over the disjuncts </summary>
+ public virtual System.Collections.Generic.IEnumerator<Query> GetEnumerator()
+ {
+ return disjuncts.GetEnumerator();
+ }
+
+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ /// <summary> Expert: the Weight for DisjunctionMaxQuery, used to
+ /// normalize, score and explain these queries.
+ ///
+ /// <p/>NOTE: this API and implementation is subject to
+ /// change suddenly in the next release.<p/>
+ /// </summary>
+ [Serializable]
+ protected internal class DisjunctionMaxWeight:Weight
+ {
+ private void InitBlock(DisjunctionMaxQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DisjunctionMaxQuery enclosingInstance;
+ public DisjunctionMaxQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /// <summary>The Similarity implementation. </summary>
+ protected internal Similarity similarity;
+
+ /// <summary>The Weights for our subqueries, in 1-1 correspondence with disjuncts </summary>
+ protected internal System.Collections.Generic.List<Weight> weights = new System.Collections.Generic.List<Weight>(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
+
+ /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
+ public DisjunctionMaxWeight(DisjunctionMaxQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = searcher.Similarity;
+ foreach(Query disjunctQuery in enclosingInstance.disjuncts)
+ {
+ weights.Add(disjunctQuery.CreateWeight(searcher));
+ }
+ }
+
+ /* Return our associated DisjunctionMaxQuery */
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ /* Return our boost */
+
+ public override float Value
+ {
+ get { return Enclosing_Instance.Boost; }
+ }
+
+ /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
+
+ public override float GetSumOfSquaredWeights()
+ {
+ float max = 0.0f, sum = 0.0f;
+ foreach (Weight currentWeight in weights)
+ {
+ float sub = currentWeight.GetSumOfSquaredWeights();
+ sum += sub;
+ max = System.Math.Max(max, sub);
+ }
+ float boost = Enclosing_Instance.Boost;
+ return (((sum - max) * Enclosing_Instance.tieBreakerMultiplier * Enclosing_Instance.tieBreakerMultiplier) + max) *
+ boost * boost;
+ }
+
+ /* Apply the computed normalization factor to our subqueries */
+ public override void Normalize(float norm)
+ {
+ norm *= Enclosing_Instance.Boost; // Incorporate our boost
+ foreach(Weight wt in weights)
+ {
+ wt.Normalize(norm);
+ }
+ }
+
+ /* Create the scorer used to score our associated DisjunctionMaxQuery */
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ Scorer[] scorers = new Scorer[weights.Count];
+ int idx = 0;
+ foreach(Weight w in weights)
+ {
+ Scorer subScorer = w.Scorer(reader, true, false);
+ if (subScorer != null && subScorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ scorers[idx++] = subScorer;
+ }
+ }
+ if (idx == 0)
+ return null; // all scorers did not have documents
+ DisjunctionMaxScorer result = new DisjunctionMaxScorer(Enclosing_Instance.tieBreakerMultiplier, similarity, scorers, idx);
+ return result;
+ }
+
+ /* Explain the score we computed for doc */
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+ if (Enclosing_Instance.disjuncts.Count == 1)
+ return weights[0].Explain(reader, doc);
+ ComplexExplanation result = new ComplexExplanation();
+ float max = 0.0f, sum = 0.0f;
+ result.Description = Enclosing_Instance.tieBreakerMultiplier == 0.0f?"max of:":"max plus " + Enclosing_Instance.tieBreakerMultiplier + " times others of:";
+ foreach(Weight wt in weights)
+ {
+ Explanation e = wt.Explain(reader, doc);
+ if (e.IsMatch)
+ {
+ System.Boolean tempAux = true;
+ result.Match = tempAux;
+ result.AddDetail(e);
+ sum += e.Value;
+ max = System.Math.Max(max, e.Value);
+ }
+ }
+ result.Value = max + (sum - max) * Enclosing_Instance.tieBreakerMultiplier;
+ return result;
+ }
+ } // end of DisjunctionMaxWeight inner class
+
+ /* Create the Weight used to score us */
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new DisjunctionMaxWeight(this, searcher);
+ }
+
+ /// <summary>Optimize our representation and our subqueries representations</summary>
+ /// <param name="reader">the IndexReader we query
+ /// </param>
+ /// <returns> an optimized copy of us (which may not be a copy if there is nothing to optimize)
+ /// </returns>
+ public override Query Rewrite(IndexReader reader)
+ {
+ int numDisjunctions = disjuncts.Count;
+ if (numDisjunctions == 1)
+ {
+ Query singleton = disjuncts[0];
+ Query result = singleton.Rewrite(reader);
+ if (Boost != 1.0f)
+ {
+ if (result == singleton)
+ result = (Query) result.Clone();
+ result.Boost = Boost * result.Boost;
+ }
+ return result;
+ }
+ DisjunctionMaxQuery clone = null;
+ for (int i = 0; i < numDisjunctions; i++)
+ {
+ Query clause = disjuncts[i];
+ Query rewrite = clause.Rewrite(reader);
+ if (rewrite != clause)
+ {
+ if (clone == null)
+ clone = (DisjunctionMaxQuery) this.Clone();
+ clone.disjuncts[i] = rewrite;
+ }
+ }
+ if (clone != null)
+ return clone;
+ else
+ return this;
+ }
+
+ /// <summary>Create a shallow copy of us -- used in rewriting if necessary</summary>
+ /// <returns> a copy of us (but reuse, don't copy, our subqueries)
+ /// </returns>
+ public override System.Object Clone()
+ {
+ DisjunctionMaxQuery clone = (DisjunctionMaxQuery) base.Clone();
+ clone.disjuncts = (EquatableList<Query>) this.disjuncts.Clone();
+ return clone;
+ }
+
+ // inherit javadoc
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ foreach(Query query in disjuncts)
+ {
+ query.ExtractTerms(terms);
+ }
+ }
+
+ /// <summary>Prettyprint us.</summary>
+ /// <param name="field">the field to which we are applied
+ /// </param>
+ /// <returns> a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
+ /// </returns>
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("(");
+ int numDisjunctions = disjuncts.Count;
+ for (int i = 0; i < numDisjunctions; i++)
+ {
+ Query subquery = disjuncts[i];
+ if (subquery is BooleanQuery)
+ {
+ // wrap sub-bools in parens
+ buffer.Append("(");
+ buffer.Append(subquery.ToString(field));
+ buffer.Append(")");
+ }
+ else
+ buffer.Append(subquery.ToString(field));
+ if (i != numDisjunctions - 1)
+ buffer.Append(" | ");
+ }
+ buffer.Append(")");
+ if (tieBreakerMultiplier != 0.0f)
+ {
+ buffer.Append("~");
+ buffer.Append(tieBreakerMultiplier);
+ }
+ if (Boost != 1.0)
+ {
+ buffer.Append("^");
+ buffer.Append(Boost);
+ }
+ return buffer.ToString();
+ }
+
+ /// <summary>Return true iff we represent the same query as o</summary>
+ /// <param name="o">another object
+ /// </param>
+ /// <returns> true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
+ /// </returns>
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is DisjunctionMaxQuery))
+ return false;
+ DisjunctionMaxQuery other = (DisjunctionMaxQuery) o;
+ return this.Boost == other.Boost && this.tieBreakerMultiplier == other.tieBreakerMultiplier && this.disjuncts.Equals(other.disjuncts);
+ }
+
+ /// <summary>Compute a hash code for hashing us</summary>
+ /// <returns> the hash code
+ /// </returns>
+ public override int GetHashCode()
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) + BitConverter.ToInt32(BitConverter.GetBytes(tieBreakerMultiplier), 0) + disjuncts.GetHashCode();
+ }
+ }
+} \ No newline at end of file