diff options
author | Miguel de Icaza <miguel@gnome.org> | 2015-06-11 19:34:09 +0300 |
---|---|---|
committer | Miguel de Icaza <miguel@gnome.org> | 2015-06-11 19:34:09 +0300 |
commit | 85978b7eb94738f516824341213d5e94060f5284 (patch) | |
tree | 879c92ba9e56a74ae2a0cbbaa802344b9c39e7d0 /src/core/Util/FieldCacheSanityChecker.cs |
Initial commit of lightweight Lucene.Net to be used in Mono
Diffstat (limited to 'src/core/Util/FieldCacheSanityChecker.cs')
-rw-r--r-- | src/core/Util/FieldCacheSanityChecker.cs | 439 |
1 files changed, 439 insertions, 0 deletions
diff --git a/src/core/Util/FieldCacheSanityChecker.cs b/src/core/Util/FieldCacheSanityChecker.cs new file mode 100644 index 0000000..7456969 --- /dev/null +++ b/src/core/Util/FieldCacheSanityChecker.cs @@ -0,0 +1,439 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +using IndexReader = Lucene.Net.Index.IndexReader; +using FieldCache = Lucene.Net.Search.FieldCache; +using CacheEntry = Lucene.Net.Search.CacheEntry; + +namespace Lucene.Net.Util +{ + + /// <summary> Provides methods for sanity checking that entries in the FieldCache + /// are not wasteful or inconsistent. + /// <p/> + /// <p/> + /// Lucene 2.9 Introduced numerous enhancements into how the FieldCache + /// is used by the low levels of Lucene searching (for Sorting and + /// ValueSourceQueries) to improve both the speed for Sorting, as well + /// as reopening of IndexReaders. But these changes have shifted the + /// usage of FieldCache from "top level" IndexReaders (frequently a + /// MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + /// As a result, existing applications that directly access the FieldCache + /// may find RAM usage increase significantly when upgrading to 2.9 or + /// Later. This class provides an API for these applications (or their + /// Unit tests) to check at run time if the FieldCache contains "insane" + /// usages of the FieldCache. + /// <p/> + /// <p/> + /// <b>EXPERIMENTAL API:</b> This API is considered extremely advanced and + /// experimental. It may be removed or altered w/o warning in future releases + /// of Lucene. + /// <p/> + /// </summary> + /// <seealso cref="FieldCache"> + /// </seealso> + /// <seealso cref="FieldCacheSanityChecker.Insanity"> + /// </seealso> + /// <seealso cref="FieldCacheSanityChecker.InsanityType"> + /// </seealso> + public sealed class FieldCacheSanityChecker + { + + private RamUsageEstimator ramCalc = null; + public FieldCacheSanityChecker() + { + /* NOOP */ + } + /// <summary> If set, will be used to estimate size for all CacheEntry objects + /// dealt with. + /// </summary> + public void SetRamUsageEstimator(RamUsageEstimator r) + { + ramCalc = r; + } + + + /// <summary> Quick and dirty convenience method</summary> + /// <seealso cref="Check"> + /// </seealso> + public static Insanity[] CheckSanity(FieldCache cache) + { + return CheckSanity(cache.GetCacheEntries()); + } + + /// <summary> Quick and dirty convenience method that instantiates an instance with + /// "good defaults" and uses it to test the CacheEntrys + /// </summary> + /// <seealso cref="Check"> + /// </seealso> + public static Insanity[] CheckSanity(params CacheEntry[] cacheEntries) + { + FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker(); + // doesn't check for interned + sanityChecker.SetRamUsageEstimator(new RamUsageEstimator(false)); + return sanityChecker.Check(cacheEntries); + } + + + /// <summary> Tests a CacheEntry[] for indication of "insane" cache usage. + /// <p/> + /// NOTE:FieldCache CreationPlaceholder objects are ignored. + /// (:TODO: is this a bad idea? are we masking a real problem?) + /// <p/> + /// </summary> + public Insanity[] Check(params CacheEntry[] cacheEntries) + { + if (null == cacheEntries || 0 == cacheEntries.Length) + return new Insanity[0]; + + if (null != ramCalc) + { + for (int i = 0; i < cacheEntries.Length; i++) + { + cacheEntries[i].EstimateSize(ramCalc); + } + } + + // the indirect mapping lets MapOfSet dedup identical valIds for us + // + // maps the (valId) identityhashCode of cache values to + // sets of CacheEntry instances + MapOfSets<int,CacheEntry> valIdToItems = new MapOfSets<int,CacheEntry>(new Dictionary<int,HashSet<CacheEntry>>(17)); + // maps ReaderField keys to Sets of ValueIds + MapOfSets<ReaderField,int> readerFieldToValIds = new MapOfSets<ReaderField,int>(new Dictionary<ReaderField,HashSet<int>>(17)); + // + + // any keys that we know result in more then one valId + HashSet<ReaderField> valMismatchKeys = new HashSet<ReaderField>(); + + // iterate over all the cacheEntries to get the mappings we'll need + for (int i = 0; i < cacheEntries.Length; i++) + { + CacheEntry item = cacheEntries[i]; + System.Object val = item.Value; + + if (val is Lucene.Net.Search.CreationPlaceholder) + continue; + + ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName); + + System.Int32 valId = val.GetHashCode(); + + // indirect mapping, so the MapOfSet will dedup identical valIds for us + valIdToItems.Put(valId, item); + if (1 < readerFieldToValIds.Put(rf, valId)) + { + valMismatchKeys.Add(rf); + } + } + + List<Insanity> insanity = new List<Insanity>(valMismatchKeys.Count * 3); + + insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); + insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds)); + + return insanity.ToArray(); + } + + /// <summary> Internal helper method used by check that iterates over + /// valMismatchKeys and generates a Collection of Insanity + /// instances accordingly. The MapOfSets are used to populate + /// the Insantiy objects. + /// </summary> + /// <seealso cref="InsanityType.VALUEMISMATCH"> + /// </seealso> + private List<Insanity> CheckValueMismatch(MapOfSets<int,CacheEntry> valIdToItems, + MapOfSets<ReaderField,int> readerFieldToValIds, + HashSet<ReaderField> valMismatchKeys) + { + + List<Insanity> insanity = new List<Insanity>(valMismatchKeys.Count * 3); + + if (!(valMismatchKeys.Count == 0)) + { + // we have multiple values for some ReaderFields + + IDictionary<ReaderField,HashSet<int>> rfMap = readerFieldToValIds.Map; + IDictionary<int,HashSet<CacheEntry>> valMap = valIdToItems.Map; + foreach (ReaderField rf in valMismatchKeys) + { + List<CacheEntry> badEntries = new List<CacheEntry>(valMismatchKeys.Count * 2); + foreach (int val in rfMap[rf]) + { + foreach (CacheEntry entry in valMap[val]) + { + badEntries.Add(entry); + } + } + + insanity.Add(new Insanity(InsanityType.VALUEMISMATCH, "Multiple distinct value objects for " + rf.ToString(), badEntries.ToArray())); + } + } + return insanity; + } + + /// <summary> Internal helper method used by check that iterates over + /// the keys of readerFieldToValIds and generates a Collection + /// of Insanity instances whenever two (or more) ReaderField instances are + /// found that have an ancestery relationships. + /// + /// </summary> + /// <seealso cref="InsanityType.SUBREADER"> + /// </seealso> + private List<Insanity> CheckSubreaders(MapOfSets<int,CacheEntry> valIdToItems, + MapOfSets<ReaderField,int> readerFieldToValIds) + { + List<Insanity> insanity = new List<Insanity>(23); + + Dictionary<ReaderField, HashSet<ReaderField>> badChildren = new Dictionary<ReaderField, HashSet<ReaderField>>(17); + MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<ReaderField, ReaderField>(badChildren); // wrapper + + IDictionary<int, HashSet<CacheEntry>> viToItemSets = valIdToItems.Map; + IDictionary<ReaderField, HashSet<int>> rfToValIdSets = readerFieldToValIds.Map; + + HashSet<ReaderField> seen = new HashSet<ReaderField>(); + + foreach (ReaderField rf in rfToValIdSets.Keys) + { + if (seen.Contains(rf)) + continue; + + System.Collections.IList kids = GetAllDecendentReaderKeys(rf.readerKey); + foreach (Object kidKey in kids) + { + ReaderField kid = new ReaderField(kidKey, rf.fieldName); + + if (badChildren.ContainsKey(kid)) + { + // we've already process this kid as RF and found other problems + // track those problems as our own + badKids.Put(rf, kid); + badKids.PutAll(rf, badChildren[kid]); + badChildren.Remove(kid); + } + else if (rfToValIdSets.ContainsKey(kid)) + { + // we have cache entries for the kid + badKids.Put(rf, kid); + } + seen.Add(kid); + } + seen.Add(rf); + } + + // every mapping in badKids represents an Insanity + foreach (ReaderField parent in badChildren.Keys) + { + HashSet<ReaderField> kids = badChildren[parent]; + + List<CacheEntry> badEntries = new List<CacheEntry>(kids.Count * 2); + + // put parent entr(ies) in first + { + foreach (int val in rfToValIdSets[parent]) + { + badEntries.AddRange(viToItemSets[val]); + } + } + + // now the entries for the descendants + foreach (ReaderField kid in kids) + { + foreach (int val in rfToValIdSets[kid]) + { + badEntries.AddRange(viToItemSets[val]); + } + } + + insanity.Add(new Insanity(InsanityType.SUBREADER, "Found caches for decendents of " + parent.ToString(), badEntries.ToArray())); + } + + return insanity; + } + + /// <summary> Checks if the seed is an IndexReader, and if so will walk + /// the hierarchy of subReaders building up a list of the objects + /// returned by obj.getFieldCacheKey() + /// </summary> + private System.Collections.IList GetAllDecendentReaderKeys(System.Object seed) + { + List<object> all = new List<object>(17); // will grow as we iter + all.Add(seed); + for (int i = 0; i < all.Count; i++) + { + System.Object obj = all[i]; + if (obj is IndexReader) + { + IndexReader[] subs = ((IndexReader) obj).GetSequentialSubReaders(); + for (int j = 0; (null != subs) && (j < subs.Length); j++) + { + all.Add(subs[j].FieldCacheKey); + } + } + } + // need to skip the first, because it was the seed + return all.GetRange(1, all.Count - 1); + } + + /// <summary> Simple pair object for using "readerKey + fieldName" a Map key</summary> + private sealed class ReaderField + { + public System.Object readerKey; + public System.String fieldName; + public ReaderField(System.Object readerKey, System.String fieldName) + { + this.readerKey = readerKey; + this.fieldName = fieldName; + } + public override int GetHashCode() + { + return readerKey.GetHashCode() * fieldName.GetHashCode(); + } + public override bool Equals(System.Object that) + { + if (!(that is ReaderField)) + return false; + + ReaderField other = (ReaderField) that; + return (this.readerKey == other.readerKey && this.fieldName.Equals(other.fieldName)); + } + public override System.String ToString() + { + return readerKey.ToString() + "+" + fieldName; + } + } + + /// <summary> Simple container for a collection of related CacheEntry objects that + /// in conjunction with eachother represent some "insane" usage of the + /// FieldCache. + /// </summary> + public sealed class Insanity + { + private InsanityType type; + private System.String msg; + private CacheEntry[] entries; + public Insanity(InsanityType type, System.String msg, params CacheEntry[] entries) + { + if (null == type) + { + throw new System.ArgumentException("Insanity requires non-null InsanityType"); + } + if (null == entries || 0 == entries.Length) + { + throw new System.ArgumentException("Insanity requires non-null/non-empty CacheEntry[]"); + } + this.type = type; + this.msg = msg; + this.entries = entries; + } + + /// <summary> Type of insane behavior this object represents</summary> + public InsanityType Type + { + get { return type; } + } + + /// <summary> Description of hte insane behavior</summary> + public string Msg + { + get { return msg; } + } + + /// <summary> CacheEntry objects which suggest a problem</summary> + public CacheEntry[] GetCacheEntries() + { + return entries; + } + /// <summary> Multi-Line representation of this Insanity object, starting with + /// the Type and Msg, followed by each CacheEntry.toString() on it's + /// own line prefaced by a tab character + /// </summary> + public override System.String ToString() + { + System.Text.StringBuilder buf = new System.Text.StringBuilder(); + buf.Append(Type).Append(": "); + + System.String m = Msg; + if (null != m) + buf.Append(m); + + buf.Append('\n'); + + CacheEntry[] ce = GetCacheEntries(); + for (int i = 0; i < ce.Length; i++) + { + buf.Append('\t').Append(ce[i].ToString()).Append('\n'); + } + + return buf.ToString(); + } + } + + /// <summary> An Enumaration of the differnet types of "insane" behavior that + /// may be detected in a FieldCache. + /// + /// </summary> + /// <seealso cref="InsanityType.SUBREADER"> + /// </seealso> + /// <seealso cref="InsanityType.VALUEMISMATCH"> + /// </seealso> + /// <seealso cref="InsanityType.EXPECTED"> + /// </seealso> + public sealed class InsanityType + { + private System.String label; + internal InsanityType(System.String label) + { + this.label = label; + } + public override System.String ToString() + { + return label; + } + + /// <summary> Indicates an overlap in cache usage on a given field + /// in sub/super readers. + /// </summary> + public static readonly InsanityType SUBREADER = new InsanityType("SUBREADER"); + + /// <summary> <p/> + /// Indicates entries have the same reader+fieldname but + /// different cached values. This can happen if different datatypes, + /// or parsers are used -- and while it's not necessarily a bug + /// it's typically an indication of a possible problem. + /// <p/> + /// <p/> + /// PNOTE: Only the reader, fieldname, and cached value are actually + /// tested -- if two cache entries have different parsers or datatypes but + /// the cached values are the same Object (== not just equal()) this method + /// does not consider that a red flag. This allows for subtle variations + /// in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) + /// <p/> + /// </summary> + public static readonly InsanityType VALUEMISMATCH = new InsanityType("VALUEMISMATCH"); + + /// <summary> Indicates an expected bit of "insanity". This may be useful for + /// clients that wish to preserve/log information about insane usage + /// but indicate that it was expected. + /// </summary> + public static readonly InsanityType EXPECTED = new InsanityType("EXPECTED"); + } + } +}
\ No newline at end of file |