Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiguel de Icaza <miguel@gnome.org>2015-06-11 19:34:09 +0300
committerMiguel de Icaza <miguel@gnome.org>2015-06-11 19:34:09 +0300
commit85978b7eb94738f516824341213d5e94060f5284 (patch)
tree879c92ba9e56a74ae2a0cbbaa802344b9c39e7d0 /src/core/Document
Initial commit of lightweight Lucene.Net to be used in Mono
Diffstat (limited to 'src/core/Document')
-rw-r--r--src/core/Document/AbstractField.cs312
-rw-r--r--src/core/Document/CompressionTools.cs150
-rw-r--r--src/core/Document/DateField.cs138
-rw-r--r--src/core/Document/DateTools.cs350
-rw-r--r--src/core/Document/Document.cs382
-rw-r--r--src/core/Document/Field.cs667
-rw-r--r--src/core/Document/FieldSelector.cs37
-rw-r--r--src/core/Document/FieldSelectorResult.cs71
-rw-r--r--src/core/Document/Fieldable.cs205
-rw-r--r--src/core/Document/LoadFirstFieldSelector.cs35
-rw-r--r--src/core/Document/MapFieldSelector.cs68
-rw-r--r--src/core/Document/NumberTools.cs221
-rw-r--r--src/core/Document/NumericField.cs294
-rw-r--r--src/core/Document/SetBasedFieldSelector.cs69
14 files changed, 2999 insertions, 0 deletions
diff --git a/src/core/Document/AbstractField.cs b/src/core/Document/AbstractField.cs
new file mode 100644
index 0000000..a526f1d
--- /dev/null
+++ b/src/core/Document/AbstractField.cs
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using StringHelper = Lucene.Net.Util.StringHelper;
+using PhraseQuery = Lucene.Net.Search.PhraseQuery;
+using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary>
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public abstract class AbstractField : IFieldable
+ {
+
+ protected internal System.String internalName = "body";
+ protected internal bool storeTermVector = false;
+ protected internal bool storeOffsetWithTermVector = false;
+ protected internal bool storePositionWithTermVector = false;
+ protected internal bool internalOmitNorms = false;
+ protected internal bool internalIsStored = false;
+ protected internal bool internalIsIndexed = true;
+ protected internal bool internalIsTokenized = true;
+ protected internal bool internalIsBinary = false;
+ protected internal bool lazy = false;
+ protected internal bool internalOmitTermFreqAndPositions = false;
+ protected internal float internalBoost = 1.0f;
+ // the data object for all different kind of field values
+ protected internal System.Object fieldsData = null;
+ // pre-analyzed tokenStream for indexed fields
+ protected internal TokenStream tokenStream;
+ // length/offset for all primitive types
+ protected internal int internalBinaryLength;
+ protected internal int internalbinaryOffset;
+
+ protected internal AbstractField()
+ {
+ }
+
+ protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ this.internalName = StringHelper.Intern(name); // field names are interned
+
+ this.internalIsStored = store.IsStored();
+ this.internalIsIndexed = index.IsIndexed();
+ this.internalIsTokenized = index.IsAnalyzed();
+ this.internalOmitNorms = index.OmitNorms();
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// <summary>Gets or sets the boost factor for hits for this field.
+ ///
+ /// <p/>The default value is 1.0.
+ ///
+ /// <p/>Note: this value is not stored directly with the document in the index.
+ /// Documents returned from <see cref="Lucene.Net.Index.IndexReader.Document(int)" /> and
+ /// <see cref="Lucene.Net.Search.Searcher.Doc(int)" /> may thus not have the same value present as when
+ /// this field was indexed.
+ /// </summary>
+ public virtual float Boost
+ {
+ get { return internalBoost; }
+ set { this.internalBoost = value; }
+ }
+
+ /// <summary>Returns the name of the field as an interned string.
+ /// For example "date", "title", "body", ...
+ /// </summary>
+ public virtual string Name
+ {
+ get { return internalName; }
+ }
+
+ protected internal virtual void SetStoreTermVector(Field.TermVector termVector)
+ {
+ this.storeTermVector = termVector.IsStored();
+ this.storePositionWithTermVector = termVector.WithPositions();
+ this.storeOffsetWithTermVector = termVector.WithOffsets();
+ }
+
+ /// <summary>True iff the value of the field is to be stored in the index for return
+ /// with search hits. It is an error for this to be true if a field is
+ /// Reader-valued.
+ /// </summary>
+ public bool IsStored
+ {
+ get { return internalIsStored; }
+ }
+
+ /// <summary>True iff the value of the field is to be indexed, so that it may be
+ /// searched on.
+ /// </summary>
+ public bool IsIndexed
+ {
+ get { return internalIsIndexed; }
+ }
+
+ /// <summary>True iff the value of the field should be tokenized as text prior to
+ /// indexing. Un-tokenized fields are indexed as a single word and may not be
+ /// Reader-valued.
+ /// </summary>
+ public bool IsTokenized
+ {
+ get { return internalIsTokenized; }
+ }
+
+ /// <summary>True iff the term or terms used to index this field are stored as a term
+ /// vector, available from <see cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int,String)" />.
+ /// These methods do not provide access to the original content of the field,
+ /// only to terms used to index it. If the original content must be
+ /// preserved, use the <c>stored</c> attribute instead.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int, String)">
+ /// </seealso>
+ public bool IsTermVectorStored
+ {
+ get { return storeTermVector; }
+ }
+
+ /// <summary> True iff terms are stored as term vector together with their offsets
+ /// (start and end position in source text).
+ /// </summary>
+ public virtual bool IsStoreOffsetWithTermVector
+ {
+ get { return storeOffsetWithTermVector; }
+ }
+
+ /// <summary> True iff terms are stored as term vector together with their token positions.</summary>
+ public virtual bool IsStorePositionWithTermVector
+ {
+ get { return storePositionWithTermVector; }
+ }
+
+ /// <summary>True iff the value of the filed is stored as binary </summary>
+ public bool IsBinary
+ {
+ get { return internalIsBinary; }
+ }
+
+
+ /// <summary> Return the raw byte[] for the binary field. Note that
+ /// you must also call <see cref="BinaryLength" /> and <see cref="BinaryOffset" />
+ /// to know which range of bytes in this
+ /// returned array belong to the field.
+ /// </summary>
+ /// <returns> reference to the Field value as byte[]. </returns>
+ public virtual byte[] GetBinaryValue()
+ {
+ return GetBinaryValue(null);
+ }
+
+ public virtual byte[] GetBinaryValue(byte[] result)
+ {
+ if (internalIsBinary || fieldsData is byte[])
+ return (byte[]) fieldsData;
+ else
+ return null;
+ }
+
+ /// <summary> Returns length of byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> length of byte[] segment that represents this Field value </value>
+ public virtual int BinaryLength
+ {
+ get
+ {
+ if (internalIsBinary)
+ {
+ return internalBinaryLength;
+ }
+ return fieldsData is byte[] ? ((byte[]) fieldsData).Length : 0;
+ }
+ }
+
+ /// <summary> Returns offset into byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> index of the first character in byte[] segment that represents this Field value </value>
+ public virtual int BinaryOffset
+ {
+ get { return internalbinaryOffset; }
+ }
+
+ /// <summary>True if norms are omitted for this indexed field </summary>
+ public virtual bool OmitNorms
+ {
+ get { return internalOmitNorms; }
+ set { this.internalOmitNorms = value; }
+ }
+
+ /// <summary>Expert:
+ ///
+ /// If set, omit term freq, positions and payloads from
+ /// postings for this field.
+ ///
+ /// <p/><b>NOTE</b>: While this option reduces storage space
+ /// required in the index, it also means any query
+ /// requiring positional information, such as <see cref="PhraseQuery" />
+ /// or <see cref="SpanQuery" /> subclasses will
+ /// silently fail to find results.
+ /// </summary>
+ public virtual bool OmitTermFreqAndPositions
+ {
+ set { this.internalOmitTermFreqAndPositions = value; }
+ get { return internalOmitTermFreqAndPositions; }
+ }
+
+ public virtual bool IsLazy
+ {
+ get { return lazy; }
+ }
+
+ /// <summary>Prints a Field for human consumption. </summary>
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder result = new System.Text.StringBuilder();
+ if (internalIsStored)
+ {
+ result.Append("stored");
+ }
+ if (internalIsIndexed)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("indexed");
+ }
+ if (internalIsTokenized)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("tokenized");
+ }
+ if (storeTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVector");
+ }
+ if (storeOffsetWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorOffsets");
+ }
+ if (storePositionWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorPosition");
+ }
+ if (internalIsBinary)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("binary");
+ }
+ if (internalOmitNorms)
+ {
+ result.Append(",omitNorms");
+ }
+ if (internalOmitTermFreqAndPositions)
+ {
+ result.Append(",omitTermFreqAndPositions");
+ }
+ if (lazy)
+ {
+ result.Append(",lazy");
+ }
+ result.Append('<');
+ result.Append(internalName);
+ result.Append(':');
+
+ if (fieldsData != null && lazy == false)
+ {
+ result.Append(fieldsData);
+ }
+
+ result.Append('>');
+ return result.ToString();
+ }
+
+ public abstract TokenStream TokenStreamValue { get; }
+ public abstract TextReader ReaderValue { get; }
+ public abstract string StringValue { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/CompressionTools.cs b/src/core/Document/CompressionTools.cs
new file mode 100644
index 0000000..400633f
--- /dev/null
+++ b/src/core/Document/CompressionTools.cs
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+// To enable compression support in Lucene.Net ,
+// you will need to define 'SHARP_ZIP_LIB' and reference the SharpLibZip
+// library. The SharpLibZip library can be downloaded from:
+// http://www.icsharpcode.net/OpenSource/SharpZipLib/
+
+using System;
+using Lucene.Net.Support;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary>Simple utility class providing static methods to
+ /// compress and decompress binary data for stored fields.
+ /// This class uses java.util.zip.Deflater and Inflater
+ /// classes to compress and decompress.
+ /// </summary>
+
+ public class CompressionTools
+ {
+
+ // Export only static methods
+ private CompressionTools()
+ {
+ }
+
+ /// <summary>Compresses the specified byte range using the
+ /// specified compressionLevel (constants are defined in
+ /// java.util.zip.Deflater).
+ /// </summary>
+ public static byte[] Compress(byte[] value_Renamed, int offset, int length, int compressionLevel)
+ {
+ /* Create an expandable byte array to hold the compressed data.
+ * You cannot use an array that's the same size as the orginal because
+ * there is no guarantee that the compressed data will be smaller than
+ * the uncompressed data. */
+ System.IO.MemoryStream bos = new System.IO.MemoryStream(length);
+
+ Deflater compressor = SharpZipLib.CreateDeflater();
+
+ try
+ {
+ compressor.SetLevel(compressionLevel);
+ compressor.SetInput(value_Renamed, offset, length);
+ compressor.Finish();
+
+ // Compress the data
+ byte[] buf = new byte[1024];
+ while (!compressor.IsFinished)
+ {
+ int count = compressor.Deflate(buf);
+ bos.Write(buf, 0, count);
+ }
+ }
+ finally
+ {
+ }
+
+ return bos.ToArray();
+ }
+
+ /// <summary>Compresses the specified byte range, with default BEST_COMPRESSION level </summary>
+ public static byte[] Compress(byte[] value_Renamed, int offset, int length)
+ {
+ return Compress(value_Renamed, offset, length, Deflater.BEST_COMPRESSION);
+ }
+
+ /// <summary>Compresses all bytes in the array, with default BEST_COMPRESSION level </summary>
+ public static byte[] Compress(byte[] value_Renamed)
+ {
+ return Compress(value_Renamed, 0, value_Renamed.Length, Deflater.BEST_COMPRESSION);
+ }
+
+ /// <summary>Compresses the String value, with default BEST_COMPRESSION level </summary>
+ public static byte[] CompressString(System.String value_Renamed)
+ {
+ return CompressString(value_Renamed, Deflater.BEST_COMPRESSION);
+ }
+
+ /// <summary>Compresses the String value using the specified
+ /// compressionLevel (constants are defined in
+ /// java.util.zip.Deflater).
+ /// </summary>
+ public static byte[] CompressString(System.String value_Renamed, int compressionLevel)
+ {
+ UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
+ UnicodeUtil.UTF16toUTF8(value_Renamed, 0, value_Renamed.Length, result);
+ return Compress(result.result, 0, result.length, compressionLevel);
+ }
+
+ /// <summary>Decompress the byte array previously returned by
+ /// compress
+ /// </summary>
+ public static byte[] Decompress(byte[] value_Renamed)
+ {
+ // Create an expandable byte array to hold the decompressed data
+ System.IO.MemoryStream bos = new System.IO.MemoryStream(value_Renamed.Length);
+
+ Inflater decompressor = SharpZipLib.CreateInflater();
+
+ try
+ {
+ decompressor.SetInput(value_Renamed);
+
+ // Decompress the data
+ byte[] buf = new byte[1024];
+ while (!decompressor.IsFinished)
+ {
+ int count = decompressor.Inflate(buf);
+ bos.Write(buf, 0, count);
+ }
+ }
+ finally
+ {
+ }
+
+ return bos.ToArray();
+ }
+
+ /// <summary>Decompress the byte array previously returned by
+ /// compressString back into a String
+ /// </summary>
+ public static System.String DecompressString(byte[] value_Renamed)
+ {
+ UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
+ byte[] bytes = Decompress(value_Renamed);
+ UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.Length, result);
+ return new System.String(result.result, 0, result.length);
+ }
+ }
+}
+
diff --git a/src/core/Document/DateField.cs b/src/core/Document/DateField.cs
new file mode 100644
index 0000000..6179f4c
--- /dev/null
+++ b/src/core/Document/DateField.cs
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using PrefixQuery = Lucene.Net.Search.PrefixQuery;
+using TermRangeQuery = Lucene.Net.Search.TermRangeQuery;
+// for javadoc
+
+namespace Lucene.Net.Documents
+{
+ // for javadoc
+
+ // do not remove in 3.0, needed for reading old indexes!
+
+ /// <summary> Provides support for converting dates to strings and vice-versa.
+ /// The strings are structured so that lexicographic sorting orders by date,
+ /// which makes them suitable for use as field values and search terms.
+ ///
+ /// <p/>Note that this class saves dates with millisecond granularity,
+ /// which is bad for <see cref="TermRangeQuery" /> and <see cref="PrefixQuery" />, as those
+ /// queries are expanded to a BooleanQuery with a potentially large number
+ /// of terms when searching. Thus you might want to use
+ /// <see cref="DateTools" /> instead.
+ ///
+ /// <p/>
+ /// Note: dates before 1970 cannot be used, and therefore cannot be
+ /// indexed when using this class. See <see cref="DateTools" /> for an
+ /// alternative without such a limitation.
+ ///
+ /// <p/>
+ /// Another approach is <see cref="NumericUtils" />, which provides
+ /// a sortable binary representation (prefix encoded) of numeric values, which
+ /// date/time are.
+ /// For indexing a <see cref="DateTime" />, convert it to unix timestamp as
+ /// <c>long</c> and
+ /// index this as a numeric value with <see cref="NumericField" />
+ /// and use <see cref="NumericRangeQuery{T}" /> to query it.
+ ///
+ /// </summary>
+ /// <deprecated> If you build a new index, use <see cref="DateTools" /> or
+ /// <see cref="NumericField" /> instead.
+ /// This class is included for use with existing
+ /// indices and will be removed in a future (possibly Lucene 4.0)
+ /// </deprecated>
+ [Obsolete("If you build a new index, use DateTools or NumericField instead.This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")]
+ public class DateField
+ {
+
+ private DateField()
+ {
+ }
+
+ // make date strings long enough to last a millenium
+ private static int DATE_LEN = Number.ToString(1000L * 365 * 24 * 60 * 60 * 1000, Number.MAX_RADIX).Length;
+
+ public static System.String MIN_DATE_STRING()
+ {
+ return TimeToString(0);
+ }
+
+ public static System.String MAX_DATE_STRING()
+ {
+ char[] buffer = new char[DATE_LEN];
+ char c = Character.ForDigit(Character.MAX_RADIX - 1, Character.MAX_RADIX);
+ for (int i = 0; i < DATE_LEN; i++)
+ buffer[i] = c;
+ return new System.String(buffer);
+ }
+
+ /// <summary> Converts a Date to a string suitable for indexing.</summary>
+ /// <throws> RuntimeException if the date specified in the </throws>
+ /// <summary> method argument is before 1970
+ /// </summary>
+ public static System.String DateToString(System.DateTime date)
+ {
+ TimeSpan ts = date.Subtract(new DateTime(1970, 1, 1));
+ ts = ts.Subtract(TimeZone.CurrentTimeZone.GetUtcOffset(date));
+ return TimeToString(ts.Ticks / TimeSpan.TicksPerMillisecond);
+ }
+ /// <summary> Converts a millisecond time to a string suitable for indexing.</summary>
+ /// <throws> RuntimeException if the time specified in the </throws>
+ /// <summary> method argument is negative, that is, before 1970
+ /// </summary>
+ public static System.String TimeToString(long time)
+ {
+ if (time < 0)
+ throw new System.SystemException("time '" + time + "' is too early, must be >= 0");
+
+ System.String s = Number.ToString(time, Character.MAX_RADIX);
+
+ if (s.Length > DATE_LEN)
+ throw new System.SystemException("time '" + time + "' is too late, length of string " + "representation must be <= " + DATE_LEN);
+
+ // Pad with leading zeros
+ if (s.Length < DATE_LEN)
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder(s);
+ while (sb.Length < DATE_LEN)
+ sb.Insert(0, 0);
+ s = sb.ToString();
+ }
+
+ return s;
+ }
+
+ /// <summary>Converts a string-encoded date into a millisecond time. </summary>
+ public static long StringToTime(System.String s)
+ {
+ return Number.Parse(s, Number.MAX_RADIX);
+ }
+ /// <summary>Converts a string-encoded date into a Date object. </summary>
+ public static System.DateTime StringToDate(System.String s)
+ {
+ long ticks = StringToTime(s) * TimeSpan.TicksPerMillisecond;
+ System.DateTime date = new System.DateTime(1970, 1, 1);
+ date = date.AddTicks(ticks);
+ date = date.Add(TimeZone.CurrentTimeZone.GetUtcOffset(date));
+ return date;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/DateTools.cs b/src/core/Document/DateTools.cs
new file mode 100644
index 0000000..8263df1
--- /dev/null
+++ b/src/core/Document/DateTools.cs
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary> Provides support for converting dates to strings and vice-versa.
+ /// The strings are structured so that lexicographic sorting orders
+ /// them by date, which makes them suitable for use as field values
+ /// and search terms.
+ ///
+ /// <p/>This class also helps you to limit the resolution of your dates. Do not
+ /// save dates with a finer resolution than you really need, as then
+ /// RangeQuery and PrefixQuery will require more memory and become slower.
+ ///
+ /// <p/>Compared to <see cref="DateField" /> the strings generated by the methods
+ /// in this class take slightly more space, unless your selected resolution
+ /// is set to <c>Resolution.DAY</c> or lower.
+ ///
+ /// <p/>
+ /// Another approach is <see cref="NumericUtils" />, which provides
+ /// a sortable binary representation (prefix encoded) of numeric values, which
+ /// date/time are.
+ /// For indexing a <see cref="DateTime" />, convert it to unix timestamp as
+ /// <c>long</c> and
+ /// index this as a numeric value with <see cref="NumericField" />
+ /// and use <see cref="NumericRangeQuery{T}" /> to query it.
+ /// </summary>
+ public class DateTools
+ {
+
+ private static readonly System.String YEAR_FORMAT = "yyyy";
+ private static readonly System.String MONTH_FORMAT = "yyyyMM";
+ private static readonly System.String DAY_FORMAT = "yyyyMMdd";
+ private static readonly System.String HOUR_FORMAT = "yyyyMMddHH";
+ private static readonly System.String MINUTE_FORMAT = "yyyyMMddHHmm";
+ private static readonly System.String SECOND_FORMAT = "yyyyMMddHHmmss";
+ private static readonly System.String MILLISECOND_FORMAT = "yyyyMMddHHmmssfff";
+
+ private static readonly System.Globalization.Calendar calInstance = new System.Globalization.GregorianCalendar();
+
+ // cannot create, the class has static methods only
+ private DateTools()
+ {
+ }
+
+ /// <summary> Converts a Date to a string suitable for indexing.
+ ///
+ /// </summary>
+ /// <param name="date">the date to be converted
+ /// </param>
+ /// <param name="resolution">the desired resolution, see
+ /// <see cref="Round(DateTime, DateTools.Resolution)" />
+ /// </param>
+ /// <returns> a string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
+ /// depending on <c>resolution</c>; using GMT as timezone
+ /// </returns>
+ public static System.String DateToString(System.DateTime date, Resolution resolution)
+ {
+ return TimeToString(date.Ticks / TimeSpan.TicksPerMillisecond, resolution);
+ }
+
+ /// <summary> Converts a millisecond time to a string suitable for indexing.
+ ///
+ /// </summary>
+ /// <param name="time">the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </param>
+ /// <param name="resolution">the desired resolution, see
+ /// <see cref="Round(long, DateTools.Resolution)" />
+ /// </param>
+ /// <returns> a string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
+ /// depending on <c>resolution</c>; using GMT as timezone
+ /// </returns>
+ public static System.String TimeToString(long time, Resolution resolution)
+ {
+ System.DateTime date = new System.DateTime(Round(time, resolution));
+
+ if (resolution == Resolution.YEAR)
+ {
+ return date.ToString(YEAR_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ return date.ToString(MONTH_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ return date.ToString(DAY_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ return date.ToString(HOUR_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ return date.ToString(MINUTE_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ return date.ToString(SECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ return date.ToString(MILLISECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+
+ /// <summary> Converts a string produced by <c>timeToString</c> or
+ /// <c>DateToString</c> back to a time, represented as the
+ /// number of milliseconds since January 1, 1970, 00:00:00 GMT.
+ ///
+ /// </summary>
+ /// <param name="dateString">the date string to be converted
+ /// </param>
+ /// <returns> the number of milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </returns>
+ /// <throws> ParseException if <c>dateString</c> is not in the </throws>
+ /// <summary> expected format
+ /// </summary>
+ public static long StringToTime(System.String dateString)
+ {
+ return StringToDate(dateString).Ticks;
+ }
+
+ /// <summary> Converts a string produced by <c>timeToString</c> or
+ /// <c>DateToString</c> back to a time, represented as a
+ /// Date object.
+ ///
+ /// </summary>
+ /// <param name="dateString">the date string to be converted
+ /// </param>
+ /// <returns> the parsed time as a Date object
+ /// </returns>
+ /// <throws> ParseException if <c>dateString</c> is not in the </throws>
+ /// <summary> expected format
+ /// </summary>
+ public static System.DateTime StringToDate(System.String dateString)
+ {
+ System.DateTime date;
+ if (dateString.Length == 4)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ 1, 1, 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 6)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ 1, 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 8)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 10)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ 0, 0, 0);
+ }
+ else if (dateString.Length == 12)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ 0, 0);
+ }
+ else if (dateString.Length == 14)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ Convert.ToInt16(dateString.Substring(12, 2)),
+ 0);
+ }
+ else if (dateString.Length == 17)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ Convert.ToInt16(dateString.Substring(12, 2)),
+ Convert.ToInt16(dateString.Substring(14, 3)));
+ }
+ else
+ {
+ throw new System.FormatException("Input is not valid date string: " + dateString);
+ }
+ return date;
+ }
+
+ /// <summary> Limit a date's resolution. For example, the date <c>2004-09-21 13:50:11</c>
+ /// will be changed to <c>2004-09-01 00:00:00</c> when using
+ /// <c>Resolution.MONTH</c>.
+ ///
+ /// </summary>
+ /// <param name="date"></param>
+ /// <param name="resolution">The desired resolution of the date to be returned
+ /// </param>
+ /// <returns> the date with all values more precise than <c>resolution</c>
+ /// set to 0 or 1
+ /// </returns>
+ public static System.DateTime Round(System.DateTime date, Resolution resolution)
+ {
+ return new System.DateTime(Round(date.Ticks / TimeSpan.TicksPerMillisecond, resolution));
+ }
+
+ /// <summary> Limit a date's resolution. For example, the date <c>1095767411000</c>
+ /// (which represents 2004-09-21 13:50:11) will be changed to
+ /// <c>1093989600000</c> (2004-09-01 00:00:00) when using
+ /// <c>Resolution.MONTH</c>.
+ ///
+ /// </summary>
+ /// <param name="time">The time in milliseconds (not ticks).</param>
+ /// <param name="resolution">The desired resolution of the date to be returned
+ /// </param>
+ /// <returns> the date with all values more precise than <c>resolution</c>
+ /// set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </returns>
+ public static long Round(long time, Resolution resolution)
+ {
+ System.DateTime dt = new System.DateTime(time * TimeSpan.TicksPerMillisecond);
+
+ if (resolution == Resolution.YEAR)
+ {
+ dt = dt.AddMonths(1 - dt.Month);
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ // don't cut off anything
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+ return dt.Ticks;
+ }
+
+ /// <summary>Specifies the time granularity. </summary>
+ public class Resolution
+ {
+
+ public static readonly Resolution YEAR = new Resolution("year");
+ public static readonly Resolution MONTH = new Resolution("month");
+ public static readonly Resolution DAY = new Resolution("day");
+ public static readonly Resolution HOUR = new Resolution("hour");
+ public static readonly Resolution MINUTE = new Resolution("minute");
+ public static readonly Resolution SECOND = new Resolution("second");
+ public static readonly Resolution MILLISECOND = new Resolution("millisecond");
+
+ private System.String resolution;
+
+ internal Resolution()
+ {
+ }
+
+ internal Resolution(System.String resolution)
+ {
+ this.resolution = resolution;
+ }
+
+ public override System.String ToString()
+ {
+ return resolution;
+ }
+ }
+ static DateTools()
+ {
+ {
+ // times need to be normalized so the value doesn't depend on the
+ // location the index is created/used:
+ // {{Aroush-2.1}}
+ /*
+ YEAR_FORMAT.setTimeZone(GMT);
+ MONTH_FORMAT.setTimeZone(GMT);
+ DAY_FORMAT.setTimeZone(GMT);
+ HOUR_FORMAT.setTimeZone(GMT);
+ MINUTE_FORMAT.setTimeZone(GMT);
+ SECOND_FORMAT.setTimeZone(GMT);
+ MILLISECOND_FORMAT.setTimeZone(GMT);
+ */
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/Document.cs b/src/core/Document/Document.cs
new file mode 100644
index 0000000..f24a46a
--- /dev/null
+++ b/src/core/Document/Document.cs
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+// for javadoc
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ScoreDoc = Lucene.Net.Search.ScoreDoc;
+using Searcher = Lucene.Net.Search.Searcher;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary>Documents are the unit of indexing and search.
+ ///
+ /// A Document is a set of fields. Each field has a name and a textual value.
+ /// A field may be <see cref="IFieldable.IsStored()">stored</see> with the document, in which
+ /// case it is returned with search hits on the document. Thus each document
+ /// should typically contain one or more stored fields which uniquely identify
+ /// it.
+ ///
+ /// <p/>Note that fields which are <i>not</i> <see cref="IFieldable.IsStored()">stored</see> are
+ /// <i>not</i> available in documents retrieved from the index, e.g. with <see cref="ScoreDoc.Doc" />,
+ /// <see cref="Searcher.Doc(int)" /> or <see cref="IndexReader.Document(int)" />.
+ /// </summary>
+
+ [Serializable]
+ public sealed class Document
+ {
+ private class AnonymousClassEnumeration : System.Collections.IEnumerator
+ {
+ public AnonymousClassEnumeration(Document enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(Document enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ iter = Enclosing_Instance.fields.GetEnumerator();
+ }
+ private System.Object tempAuxObj;
+ public bool MoveNext()
+ {
+ bool result = HasMoreElements();
+ if (result)
+ {
+ tempAuxObj = NextElement();
+ }
+ return result;
+ }
+ public void Reset()
+ {
+ tempAuxObj = null;
+ }
+ public System.Object Current
+ {
+ get
+ {
+ return tempAuxObj;
+ }
+
+ }
+ private Document enclosingInstance;
+ public Document Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal System.Collections.IEnumerator iter;
+ public bool HasMoreElements()
+ {
+ return iter.MoveNext();
+ }
+ public System.Object NextElement()
+ {
+ return iter.Current;
+ }
+ }
+ internal System.Collections.Generic.IList<IFieldable> fields = new System.Collections.Generic.List<IFieldable>();
+ private float boost = 1.0f;
+
+ /// <summary>Constructs a new document with no fields. </summary>
+ public Document()
+ {
+ }
+
+
+ /// <summary>Gets or sets, at indexing time, the boost factor.
+ /// <para>
+ /// The default is 1.0
+ /// </para>
+ /// <p/>Note that once a document is indexed this value is no longer available
+ /// from the index. At search time, for retrieved documents, this method always
+ /// returns 1. This however does not mean that the boost value set at indexing
+ /// time was ignored - it was just combined with other indexing time factors and
+ /// stored elsewhere, for better indexing and search performance. (For more
+ /// information see the "norm(t,d)" part of the scoring formula in
+ /// <see cref="Lucene.Net.Search.Similarity">Similarity</see>.)
+ /// </summary>
+ public float Boost
+ {
+ get { return boost; }
+ set { this.boost = value; }
+ }
+
+ /// <summary> <p/>Adds a field to a document. Several fields may be added with
+ /// the same name. In this case, if the fields are indexed, their text is
+ /// treated as though appended for the purposes of search.<p/>
+ /// <p/> Note that add like the removeField(s) methods only makes sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.<p/>
+ /// </summary>
+ public void Add(IFieldable field)
+ {
+ fields.Add(field);
+ }
+
+ /// <summary> <p/>Removes field with the specified name from the document.
+ /// If multiple fields exist with this name, this method removes the first field that has been added.
+ /// If there is no field with the specified name, the document remains unchanged.<p/>
+ /// <p/> Note that the removeField(s) methods like the add method only make sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.<p/>
+ /// </summary>
+ public void RemoveField(System.String name)
+ {
+ System.Collections.Generic.IEnumerator<IFieldable> it = fields.GetEnumerator();
+ while (it.MoveNext())
+ {
+ IFieldable field = it.Current;
+ if (field.Name.Equals(name))
+ {
+ fields.Remove(field);
+ return ;
+ }
+ }
+ }
+
+ /// <summary> <p/>Removes all fields with the given name from the document.
+ /// If there is no field with the specified name, the document remains unchanged.<p/>
+ /// <p/> Note that the removeField(s) methods like the add method only make sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.<p/>
+ /// </summary>
+ public void RemoveFields(System.String name)
+ {
+ for (int i = fields.Count - 1; i >= 0; i--)
+ {
+ IFieldable field = fields[i];
+ if (field.Name.Equals(name))
+ {
+ fields.RemoveAt(i);
+ }
+ }
+ }
+
+ /// <summary>Returns a field with the given name if any exist in this document, or
+ /// null. If multiple fields exists with this name, this method returns the
+ /// first value added.
+ /// Do not use this method with lazy loaded fields.
+ /// </summary>
+ public Field GetField(System.String name)
+ {
+ return (Field) GetFieldable(name);
+ }
+
+
+ /// <summary>Returns a field with the given name if any exist in this document, or
+ /// null. If multiple fields exists with this name, this method returns the
+ /// first value added.
+ /// </summary>
+ public IFieldable GetFieldable(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ return field;
+ }
+ return null;
+ }
+
+ /// <summary>Returns the string value of the field with the given name if any exist in
+ /// this document, or null. If multiple fields exist with this name, this
+ /// method returns the first value added. If only binary fields with this name
+ /// exist, returns null.
+ /// </summary>
+ public System.String Get(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (!field.IsBinary))
+ return field.StringValue;
+ }
+ return null;
+ }
+
+ /// <summary>Returns a List of all the fields in a document.
+ /// <p/>Note that fields which are <i>not</i> <see cref="IFieldable.IsStored()">stored</see> are
+ /// <i>not</i> available in documents retrieved from the
+ /// index, e.g. <see cref="Searcher.Doc(int)" /> or <see cref="IndexReader.Document(int)" />.
+ /// </summary>
+ public System.Collections.Generic.IList<IFieldable> GetFields()
+ {
+ return fields;
+ }
+
+ private static readonly Field[] NO_FIELDS = new Field[0];
+
+ /// <summary> Returns an array of <see cref="Field" />s with the given name.
+ /// Do not use with lazy loaded fields.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>Field[]</c> array
+ /// </returns>
+ public Field[] GetFields(System.String name)
+ {
+ var result = new System.Collections.Generic.List<Field>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ {
+ result.Add((Field)field);
+ }
+ }
+
+ if (result.Count == 0)
+ return NO_FIELDS;
+
+ return result.ToArray();
+ }
+
+
+ private static readonly IFieldable[] NO_FIELDABLES = new IFieldable[0];
+
+ /// <summary> Returns an array of <see cref="IFieldable" />s with the given name.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>Fieldable[]</c> array
+ /// </returns>
+ public IFieldable[] GetFieldables(System.String name)
+ {
+ var result = new System.Collections.Generic.List<IFieldable>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ {
+ result.Add(field);
+ }
+ }
+
+ if (result.Count == 0)
+ return NO_FIELDABLES;
+
+ return result.ToArray();
+ }
+
+
+ private static readonly System.String[] NO_STRINGS = new System.String[0];
+
+ /// <summary> Returns an array of values of the field specified as the method parameter.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>String[]</c> of field values
+ /// </returns>
+ public System.String[] GetValues(System.String name)
+ {
+ var result = new System.Collections.Generic.List<string>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (!field.IsBinary))
+ result.Add(field.StringValue);
+ }
+
+ if (result.Count == 0)
+ return NO_STRINGS;
+
+ return result.ToArray();
+ }
+
+ private static readonly byte[][] NO_BYTES = new byte[0][];
+
+ /// <summary> Returns an array of byte arrays for of the fields that have the name specified
+ /// as the method parameter. This method returns an empty
+ /// array when there are no matching fields. It never
+ /// returns null.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>byte[][]</c> of binary field values
+ /// </returns>
+ public byte[][] GetBinaryValues(System.String name)
+ {
+ var result = new System.Collections.Generic.List<byte[]>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (field.IsBinary))
+ result.Add(field.GetBinaryValue());
+ }
+
+ if (result.Count == 0)
+ return NO_BYTES;
+
+ return result.ToArray();
+ }
+
+ /// <summary> Returns an array of bytes for the first (or only) field that has the name
+ /// specified as the method parameter. This method will return <c>null</c>
+ /// if no binary fields with the specified name are available.
+ /// There may be non-binary fields with the same name.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field.
+ /// </param>
+ /// <returns> a <c>byte[]</c> containing the binary field value or <c>null</c>
+ /// </returns>
+ public byte[] GetBinaryValue(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (field.IsBinary))
+ return field.GetBinaryValue();
+ }
+ return null;
+ }
+
+ /// <summary>Prints the fields of a document for human consumption. </summary>
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("Document<");
+ for (int i = 0; i < fields.Count; i++)
+ {
+ IFieldable field = fields[i];
+ buffer.Append(field.ToString());
+ if (i != fields.Count - 1)
+ buffer.Append(" ");
+ }
+ buffer.Append(">");
+ return buffer.ToString();
+ }
+
+ public System.Collections.Generic.IList<IFieldable> fields_ForNUnit
+ {
+ get { return fields; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/Field.cs b/src/core/Document/Field.cs
new file mode 100644
index 0000000..d39d9f4
--- /dev/null
+++ b/src/core/Document/Field.cs
@@ -0,0 +1,667 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary>A field is a section of a Document. Each field has two parts, a name and a
+ /// value. Values may be free text, provided as a String or as a Reader, or they
+ /// may be atomic keywords, which are not further processed. Such keywords may
+ /// be used to represent dates, urls, etc. Fields are optionally stored in the
+ /// index, so that they may be returned with hits on the document.
+ /// </summary>
+
+ [Serializable]
+ public sealed class Field:AbstractField, IFieldable
+ {
+ /// <summary>Specifies whether and how a field should be stored. </summary>
+ public enum Store
+ {
+ /// <summary>Store the original field value in the index. This is useful for short texts
+ /// like a document's title which should be displayed with the results. The
+ /// value is stored in its original form, i.e. no analyzer is used before it is
+ /// stored.
+ /// </summary>
+ YES,
+
+ /// <summary>Do not store the field value in the index. </summary>
+ NO
+ }
+
+ /// <summary>Specifies whether and how a field should be indexed. </summary>
+
+ public enum Index
+ {
+ /// <summary>Do not index the field value. This field can thus not be searched,
+ /// but one can still access its contents provided it is
+ /// <see cref="Field.Store">stored</see>.
+ /// </summary>
+ NO,
+
+ /// <summary>Index the tokens produced by running the field's
+ /// value through an Analyzer. This is useful for
+ /// common text.
+ /// </summary>
+ ANALYZED,
+
+ /// <summary>Index the field's value without using an Analyzer, so it can be searched.
+ /// As no analyzer is used the value will be stored as a single term. This is
+ /// useful for unique Ids like product numbers.
+ /// </summary>
+ NOT_ANALYZED,
+
+ /// <summary>Expert: Index the field's value without an Analyzer,
+ /// and also disable the storing of norms. Note that you
+ /// can also separately enable/disable norms by setting
+ /// <see cref="AbstractField.OmitNorms" />. No norms means that
+ /// index-time field and document boosting and field
+ /// length normalization are disabled. The benefit is
+ /// less memory usage as norms take up one byte of RAM
+ /// per indexed field for every document in the index,
+ /// during searching. Note that once you index a given
+ /// field <i>with</i> norms enabled, disabling norms will
+ /// have no effect. In other words, for this to have the
+ /// above described effect on a field, all instances of
+ /// that field must be indexed with NOT_ANALYZED_NO_NORMS
+ /// from the beginning.
+ /// </summary>
+ NOT_ANALYZED_NO_NORMS,
+
+ /// <summary>Expert: Index the tokens produced by running the
+ /// field's value through an Analyzer, and also
+ /// separately disable the storing of norms. See
+ /// <see cref="NOT_ANALYZED_NO_NORMS" /> for what norms are
+ /// and why you may want to disable them.
+ /// </summary>
+ ANALYZED_NO_NORMS,
+ }
+
+ /// <summary>Specifies whether and how a field should have term vectors. </summary>
+ public enum TermVector
+ {
+ /// <summary>Do not store term vectors. </summary>
+ NO,
+
+ /// <summary>Store the term vectors of each document. A term vector is a list
+ /// of the document's terms and their number of occurrences in that document.
+ /// </summary>
+ YES,
+
+ /// <summary> Store the term vector + token position information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ WITH_POSITIONS,
+
+ /// <summary> Store the term vector + Token offset information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ WITH_OFFSETS,
+
+ /// <summary> Store the term vector + Token position and offset information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ /// <seealso cref="WITH_POSITIONS">
+ /// </seealso>
+ /// <seealso cref="WITH_OFFSETS">
+ /// </seealso>
+ WITH_POSITIONS_OFFSETS,
+ }
+
+
+ /// <summary>The value of the field as a String, or null. If null, the Reader value or
+ /// binary value is used. Exactly one of stringValue(),
+ /// readerValue(), and getBinaryValue() must be set.
+ /// </summary>
+ public override string StringValue
+ {
+ get { return fieldsData is System.String ? (System.String) fieldsData : null; }
+ }
+
+ /// <summary>The value of the field as a Reader, or null. If null, the String value or
+ /// binary value is used. Exactly one of stringValue(),
+ /// readerValue(), and getBinaryValue() must be set.
+ /// </summary>
+ public override TextReader ReaderValue
+ {
+ get { return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null; }
+ }
+
+ /// <summary>The TokesStream for this field to be used when indexing, or null. If null, the Reader value
+ /// or String value is analyzed to produce the indexed tokens.
+ /// </summary>
+ public override TokenStream TokenStreamValue
+ {
+ get { return tokenStream; }
+ }
+
+
+ /// <summary><p/>Expert: change the value of this field. This can
+ /// be used during indexing to re-use a single Field
+ /// instance to improve indexing speed by avoiding GC cost
+ /// of new'ing and reclaiming Field instances. Typically
+ /// a single <see cref="Document" /> instance is re-used as
+ /// well. This helps most on small documents.<p/>
+ ///
+ /// <p/>Each Field instance should only be used once
+ /// within a single <see cref="Document" /> instance. See <a
+ /// href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
+ /// for details.<p/>
+ /// </summary>
+ public void SetValue(System.String value)
+ {
+ if (internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a String value on a binary field");
+ }
+ fieldsData = value;
+ }
+
+ /// <summary>Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. </summary>
+ public void SetValue(System.IO.TextReader value)
+ {
+ if (internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a Reader value on a binary field");
+ }
+ if (internalIsStored)
+ {
+ throw new System.ArgumentException("cannot set a Reader value on a stored field");
+ }
+ fieldsData = value;
+ }
+
+ /// <summary>Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. </summary>
+ public void SetValue(byte[] value)
+ {
+ if (!internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ internalBinaryLength = value.Length;
+ internalbinaryOffset = 0;
+ }
+
+ /// <summary>Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. </summary>
+ public void SetValue(byte[] value, int offset, int length)
+ {
+ if (!internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ internalBinaryLength = length;
+ internalbinaryOffset = offset;
+ }
+
+ /// <summary>Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
+ /// May be combined with stored values from stringValue() or GetBinaryValue()
+ /// </summary>
+ public void SetTokenStream(TokenStream tokenStream)
+ {
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+ this.tokenStream = tokenStream;
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index. Term vectors will not be stored in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <c>value</c> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <throws> NullPointerException if name or value is <c>null</c> </throws>
+ /// <throws> IllegalArgumentException if the field is neither stored nor indexed </throws>
+ public Field(System.String name, System.String value, Store store, Index index)
+ : this(name, value, store, index, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <c>value</c> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or value is <c>null</c> </throws>
+ /// <throws> IllegalArgumentException in any of the following situations: </throws>
+ /// <summary> <list>
+ /// <item>the field is neither stored nor indexed</item>
+ /// <item>the field is not indexed but termVector is <c>TermVector.YES</c></item>
+ /// </list>
+ /// </summary>
+ public Field(System.String name, System.String value, Store store, Index index, TermVector termVector)
+ : this(name, true, value, store, index, termVector)
+ {
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="internName">Whether to .intern() name or not
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <c>value</c> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or value is <c>null</c> </throws>
+ /// <throws> IllegalArgumentException in any of the following situations: </throws>
+ /// <summary> <list>
+ /// <item>the field is neither stored nor indexed</item>
+ /// <item>the field is not indexed but termVector is <c>TermVector.YES</c></item>
+ /// </list>
+ /// </summary>
+ public Field(System.String name, bool internName, System.String value, Store store, Index index, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (value == null)
+ throw new System.NullReferenceException("value cannot be null");
+ if (name.Length == 0 && value.Length == 0)
+ throw new System.ArgumentException("name and value cannot both be empty");
+ if (index == Index.NO && store == Store.NO)
+ throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
+ if (index == Index.NO && termVector != TermVector.NO)
+ throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
+
+ if (internName)
+ // field names are optionally interned
+ name = StringHelper.Intern(name);
+
+ this.internalName = name;
+
+ this.fieldsData = value;
+
+ this.internalIsStored = store.IsStored();
+
+ this.internalIsIndexed = index.IsIndexed();
+ this.internalIsTokenized = index.IsAnalyzed();
+ this.internalOmitNorms = index.OmitNorms();
+
+ if (index == Index.NO)
+ {
+ this.internalOmitTermFreqAndPositions = false;
+ }
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
+ /// not be stored. The Reader is read only when the Document is added to the index,
+ /// i.e. you may not close the Reader until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="reader">The reader with the content
+ /// </param>
+ /// <throws> NullPointerException if name or reader is <c>null</c> </throws>
+ public Field(System.String name, System.IO.TextReader reader):this(name, reader, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored, optionally with
+ /// storing term vectors. The Reader is read only when the Document is added to the index,
+ /// i.e. you may not close the Reader until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="reader">The reader with the content
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or reader is <c>null</c> </throws>
+ public Field(System.String name, System.IO.TextReader reader, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (reader == null)
+ throw new System.NullReferenceException("reader cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ this.fieldsData = reader;
+
+ this.internalIsStored = false;
+
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
+ /// not be stored. This is useful for pre-analyzed fields.
+ /// The TokenStream is read only when the Document is added to the index,
+ /// i.e. you may not close the TokenStream until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="tokenStream">The TokenStream with the content
+ /// </param>
+ /// <throws> NullPointerException if name or tokenStream is <c>null</c> </throws>
+ public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored, optionally with
+ /// storing term vectors. This is useful for pre-analyzed fields.
+ /// The TokenStream is read only when the Document is added to the index,
+ /// i.e. you may not close the TokenStream until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="tokenStream">The TokenStream with the content
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or tokenStream is <c>null</c> </throws>
+ public Field(System.String name, TokenStream tokenStream, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (tokenStream == null)
+ throw new System.NullReferenceException("tokenStream cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ this.fieldsData = null;
+ this.tokenStream = tokenStream;
+
+ this.internalIsStored = false;
+
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+
+ /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value_Renamed">The binary value
+ /// </param>
+ /// <param name="store">How <c>value</c> should be stored (compressed or not)
+ /// </param>
+ /// <throws> IllegalArgumentException if store is <c>Store.NO</c> </throws>
+ public Field(System.String name, byte[] value_Renamed, Store store):this(name, value_Renamed, 0, value_Renamed.Length, store)
+ {
+ }
+
+ /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value_Renamed">The binary value
+ /// </param>
+ /// <param name="offset">Starting offset in value where this Field's bytes are
+ /// </param>
+ /// <param name="length">Number of bytes to use for this Field, starting at offset
+ /// </param>
+ /// <param name="store">How <c>value</c> should be stored (compressed or not)
+ /// </param>
+ /// <throws> IllegalArgumentException if store is <c>Store.NO</c> </throws>
+ public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store)
+ {
+
+ if (name == null)
+ throw new System.ArgumentException("name cannot be null");
+ if (value_Renamed == null)
+ throw new System.ArgumentException("value cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ fieldsData = value_Renamed;
+
+ if (store == Store.NO)
+ throw new System.ArgumentException("binary values can't be unstored");
+
+ internalIsStored = store.IsStored();
+ internalIsIndexed = false;
+ internalIsTokenized = false;
+ internalOmitTermFreqAndPositions = false;
+ internalOmitNorms = true;
+
+ internalIsBinary = true;
+ internalBinaryLength = length;
+ internalbinaryOffset = offset;
+
+ SetStoreTermVector(TermVector.NO);
+ }
+ }
+
+ public static class FieldExtensions
+ {
+ public static bool IsStored(this Field.Store store)
+ {
+ switch(store)
+ {
+ case Field.Store.YES:
+ return true;
+ case Field.Store.NO:
+ return false;
+ default:
+ throw new ArgumentOutOfRangeException("store", "Invalid value for Field.Store");
+ }
+ }
+
+ public static bool IsIndexed(this Field.Index index)
+ {
+ switch(index)
+ {
+ case Field.Index.NO:
+ return false;
+ case Field.Index.ANALYZED:
+ case Field.Index.NOT_ANALYZED:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool IsAnalyzed(this Field.Index index)
+ {
+ switch (index)
+ {
+ case Field.Index.NO:
+ case Field.Index.NOT_ANALYZED:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ return false;
+ case Field.Index.ANALYZED:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool OmitNorms(this Field.Index index)
+ {
+ switch (index)
+ {
+ case Field.Index.ANALYZED:
+ case Field.Index.NOT_ANALYZED:
+ return false;
+ case Field.Index.NO:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool IsStored(this Field.TermVector tv)
+ {
+ switch(tv)
+ {
+ case Field.TermVector.NO:
+ return false;
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_OFFSETS:
+ case Field.TermVector.WITH_POSITIONS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static bool WithPositions(this Field.TermVector tv)
+ {
+ switch (tv)
+ {
+ case Field.TermVector.NO:
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_OFFSETS:
+ return false;
+ case Field.TermVector.WITH_POSITIONS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static bool WithOffsets(this Field.TermVector tv)
+ {
+ switch (tv)
+ {
+ case Field.TermVector.NO:
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_POSITIONS:
+ return false;
+ case Field.TermVector.WITH_OFFSETS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static Field.Index ToIndex(bool indexed, bool analyed)
+ {
+ return ToIndex(indexed, analyed, false);
+ }
+
+ public static Field.Index ToIndex(bool indexed, bool analyzed, bool omitNorms)
+ {
+
+ // If it is not indexed nothing else matters
+ if (!indexed)
+ {
+ return Field.Index.NO;
+ }
+
+ // typical, non-expert
+ if (!omitNorms)
+ {
+ if (analyzed)
+ {
+ return Field.Index.ANALYZED;
+ }
+ return Field.Index.NOT_ANALYZED;
+ }
+
+ // Expert: Norms omitted
+ if (analyzed)
+ {
+ return Field.Index.ANALYZED_NO_NORMS;
+ }
+ return Field.Index.NOT_ANALYZED_NO_NORMS;
+ }
+
+ /// <summary>
+ /// Get the best representation of a TermVector given the flags.
+ /// </summary>
+ public static Field.TermVector ToTermVector(bool stored, bool withOffsets, bool withPositions)
+ {
+ // If it is not stored, nothing else matters.
+ if (!stored)
+ {
+ return Field.TermVector.NO;
+ }
+
+ if (withOffsets)
+ {
+ if (withPositions)
+ {
+ return Field.TermVector.WITH_POSITIONS_OFFSETS;
+ }
+ return Field.TermVector.WITH_OFFSETS;
+ }
+
+ if (withPositions)
+ {
+ return Field.TermVector.WITH_POSITIONS;
+ }
+ return Field.TermVector.YES;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/FieldSelector.cs b/src/core/Document/FieldSelector.cs
new file mode 100644
index 0000000..f940f08
--- /dev/null
+++ b/src/core/Document/FieldSelector.cs
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary> Similar to a <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/io/FileFilter.html">
+ /// java.io.FileFilter</a>, the FieldSelector allows one to make decisions about
+ /// what Fields get loaded on a <see cref="Document" /> by <see cref="Lucene.Net.Index.IndexReader.Document(int,Lucene.Net.Documents.FieldSelector)" />
+ /// </summary>
+ public interface FieldSelector
+ {
+
+ /// <summary> </summary>
+ /// <param name="fieldName">the field to accept or reject
+ /// </param>
+ /// <returns> an instance of <see cref="FieldSelectorResult" />
+ /// if the <see cref="Field" /> named <c>fieldName</c> should be loaded.
+ /// </returns>
+ FieldSelectorResult Accept(System.String fieldName);
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/FieldSelectorResult.cs b/src/core/Document/FieldSelectorResult.cs
new file mode 100644
index 0000000..7d3a889
--- /dev/null
+++ b/src/core/Document/FieldSelectorResult.cs
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using System.Runtime.InteropServices;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary>Provides information about what should be done with this Field</summary>
+ public enum FieldSelectorResult
+ {
+ /// <summary>
+ ///
+ /// </summary>
+ INVALID, // TODO: This is kinda a kludgy workaround for the fact enums can't be null
+
+ /// <summary> Load this <see cref="Field" /> every time the <see cref="Document" /> is loaded, reading in the data as it is encountered.
+ /// <see cref="Document.GetField(String)" /> and <see cref="Document.GetFieldable(String)" /> should not return null.
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should be called by the Reader.
+ /// </summary>
+ LOAD,
+
+ /// <summary> Lazily load this <see cref="Field" />. This means the <see cref="Field" /> is valid, but it may not actually contain its data until
+ /// invoked. <see cref="Document.GetField(String)" /> SHOULD NOT BE USED. <see cref="Document.GetFieldable(String)" /> is safe to use and should
+ /// return a valid instance of a <see cref="IFieldable" />.
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should be called by the Reader.
+ /// </summary>
+ LAZY_LOAD,
+
+ /// <summary> Do not load the <see cref="Field" />. <see cref="Document.GetField(String)" /> and <see cref="Document.GetFieldable(String)" /> should return null.
+ /// <see cref="Document.Add(IFieldable)" /> is not called.
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should not be called by the Reader.
+ /// </summary>
+ NO_LOAD,
+
+ /// <summary> Load this field as in the <see cref="LOAD" /> case, but immediately return from <see cref="Field" /> loading for the <see cref="Document" />. Thus, the
+ /// Document may not have its complete set of Fields. <see cref="Document.GetField(String)" /> and <see cref="Document.GetFieldable(String)" /> should
+ /// both be valid for this <see cref="Field" />
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should be called by the Reader.
+ /// </summary>
+ LOAD_AND_BREAK,
+
+ /// <summary>Expert: Load the size of this <see cref="Field" /> rather than its value.
+ /// Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
+ /// The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
+ /// </summary>
+ SIZE,
+
+ /// <summary>Expert: Like <see cref="SIZE" /> but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded </summary>
+ SIZE_AND_BREAK
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/Fieldable.cs b/src/core/Document/Fieldable.cs
new file mode 100644
index 0000000..89d37d1
--- /dev/null
+++ b/src/core/Document/Fieldable.cs
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using FieldInvertState = Lucene.Net.Index.FieldInvertState;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary> Synonymous with <see cref="Field" />.
+ ///
+ /// <p/><bold>WARNING</bold>: This interface may change within minor versions, despite Lucene's backward compatibility requirements.
+ /// This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards
+ /// compatibility promises remain intact. For example, Lucene can still
+ /// read and write indices created within the same major version.
+ /// <p/>
+ ///
+ ///
+ /// </summary>
+ public interface IFieldable
+ {
+ /// <summary>Gets or sets the boost factor for hits for this field. This value will be
+ /// multiplied into the score of all hits on this this field of this
+ /// document.
+ ///
+ /// <p/>The boost is multiplied by <see cref="Lucene.Net.Documents.Document.Boost" /> of the document
+ /// containing this field. If a document has multiple fields with the same
+ /// name, all such values are multiplied together. This product is then
+ /// used to compute the norm factor for the field. By
+ /// default, in the <see cref="Lucene.Net.Search.Similarity.ComputeNorm(String,Lucene.Net.Index.FieldInvertState)"/>
+ /// method, the boost value is multiplied
+ /// by the <see cref="Lucene.Net.Search.Similarity.LengthNorm(String,int)"/>
+ /// and then rounded by <see cref="Lucene.Net.Search.Similarity.EncodeNorm(float)" /> before it is stored in the
+ /// index. One should attempt to ensure that this product does not overflow
+ /// the range of that encoding.
+ ///
+ /// <p/>The default value is 1.0.
+ ///
+ /// <p/>Note: this value is not stored directly with the document in the index.
+ /// Documents returned from <see cref="Lucene.Net.Index.IndexReader.Document(int)" /> and
+ /// <see cref="Lucene.Net.Search.Searcher.Doc(int)" /> may thus not have the same value present as when
+ /// this field was indexed.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Documents.Document.Boost">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Search.Similarity.ComputeNorm(String, FieldInvertState)">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Search.Similarity.EncodeNorm(float)">
+ /// </seealso>
+ float Boost { get; set; }
+
+ /// <summary>Returns the name of the field as an interned string.
+ /// For example "date", "title", "body", ...
+ /// </summary>
+ string Name { get; }
+
+ /// <summary>The value of the field as a String, or null.
+ /// <p/>
+ /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value
+ /// unless isBinary()==true, in which case GetBinaryValue() will be used.
+ ///
+ /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
+ /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
+ /// else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
+ /// </summary>
+ string StringValue { get; }
+
+ /// <summary>The value of the field as a Reader, which can be used at index time to generate indexed tokens.</summary>
+ /// <seealso cref="StringValue()">
+ /// </seealso>
+ TextReader ReaderValue { get; }
+
+ /// <summary>The TokenStream for this field to be used when indexing, or null.</summary>
+ /// <seealso cref="StringValue()">
+ /// </seealso>
+ TokenStream TokenStreamValue { get; }
+
+ /// <summary>True if the value of the field is to be stored in the index for return
+ /// with search hits.
+ /// </summary>
+ bool IsStored { get; }
+
+ /// <summary>True if the value of the field is to be indexed, so that it may be
+ /// searched on.
+ /// </summary>
+ bool IsIndexed { get; }
+
+ /// <summary>True if the value of the field should be tokenized as text prior to
+ /// indexing. Un-tokenized fields are indexed as a single word and may not be
+ /// Reader-valued.
+ /// </summary>
+ bool IsTokenized { get; }
+
+ /// <summary>True if the term or terms used to index this field are stored as a term
+ /// vector, available from <see cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int,String)" />.
+ /// These methods do not provide access to the original content of the field,
+ /// only to terms used to index it. If the original content must be
+ /// preserved, use the <c>stored</c> attribute instead.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int, String)">
+ /// </seealso>
+ bool IsTermVectorStored { get; }
+
+ /// <summary> True if terms are stored as term vector together with their offsets
+ /// (start and end positon in source text).
+ /// </summary>
+ bool IsStoreOffsetWithTermVector { get; }
+
+ /// <summary> True if terms are stored as term vector together with their token positions.</summary>
+ bool IsStorePositionWithTermVector { get; }
+
+ /// <summary>True if the value of the field is stored as binary </summary>
+ bool IsBinary { get; }
+
+ /// <summary>
+ /// True if norms are omitted for this indexed field.
+ /// <para>
+ /// Expert:
+ /// If set, omit normalization factors associated with this indexed field.
+ /// This effectively disables indexing boosts and length normalization for this field.
+ /// </para>
+ /// </summary>
+ bool OmitNorms { get; set; }
+
+
+ /// <summary> Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
+ /// it's values via <see cref="StringValue()" /> or <see cref="GetBinaryValue()" /> is only valid as long as the <see cref="Lucene.Net.Index.IndexReader" /> that
+ /// retrieved the <see cref="Document" /> is still open.
+ ///
+ /// </summary>
+ /// <value> true if this field can be loaded lazily </value>
+ bool IsLazy { get; }
+
+ /// <summary> Returns offset into byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> index of the first character in byte[] segment that represents this Field value </value>
+ int BinaryOffset { get; }
+
+ /// <summary> Returns length of byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> length of byte[] segment that represents this Field value </value>
+ int BinaryLength { get; }
+
+ /// <summary> Return the raw byte[] for the binary field. Note that
+ /// you must also call <see cref="BinaryLength" /> and <see cref="BinaryOffset" />
+ /// to know which range of bytes in this
+ /// returned array belong to the field.
+ /// </summary>
+ /// <returns> reference to the Field value as byte[]. </returns>
+ byte[] GetBinaryValue();
+
+ /// <summary> Return the raw byte[] for the binary field. Note that
+ /// you must also call <see cref="BinaryLength" /> and <see cref="BinaryOffset" />
+ /// to know which range of bytes in this
+ /// returned array belong to the field.<p/>
+ /// About reuse: if you pass in the result byte[] and it is
+ /// used, likely the underlying implementation will hold
+ /// onto this byte[] and return it in future calls to
+ /// <see cref="GetBinaryValue()" /> or <see cref="GetBinaryValue()" />.
+ /// So if you subsequently re-use the same byte[] elsewhere
+ /// it will alter this Fieldable's value.
+ /// </summary>
+ /// <param name="result"> User defined buffer that will be used if
+ /// possible. If this is null or not large enough, a new
+ /// buffer is allocated
+ /// </param>
+ /// <returns> reference to the Field value as byte[].
+ /// </returns>
+ byte[] GetBinaryValue(byte[] result);
+
+ /// Expert:
+ /// <para>
+ /// If set, omit term freq, positions and payloads from
+ /// postings for this field.
+ /// </para>
+ /// <para>
+ /// <b>NOTE</b>: While this option reduces storage space
+ /// required in the index, it also means any query
+ /// requiring positional information, such as
+ /// <see cref="Lucene.Net.Search.PhraseQuery"/> or
+ /// <see cref="Lucene.Net.Search.Spans.SpanQuery"/>
+ /// subclasses will silently fail to find results.
+ /// </para>
+ bool OmitTermFreqAndPositions { set; get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/LoadFirstFieldSelector.cs b/src/core/Document/LoadFirstFieldSelector.cs
new file mode 100644
index 0000000..4f353f6
--- /dev/null
+++ b/src/core/Document/LoadFirstFieldSelector.cs
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+namespace Lucene.Net.Documents
+{
+
+ /// <summary> Load the First field and break.
+ /// <p/>
+ /// See <see cref="FieldSelectorResult.LOAD_AND_BREAK" />
+ /// </summary>
+ [Serializable]
+ public class LoadFirstFieldSelector : FieldSelector
+ {
+
+ public virtual FieldSelectorResult Accept(System.String fieldName)
+ {
+ return FieldSelectorResult.LOAD_AND_BREAK;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/MapFieldSelector.cs b/src/core/Document/MapFieldSelector.cs
new file mode 100644
index 0000000..92a8959
--- /dev/null
+++ b/src/core/Document/MapFieldSelector.cs
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary>A <see cref="FieldSelector" /> based on a Map of field names to <see cref="FieldSelectorResult" />s</summary>
+ [Serializable]
+ public class MapFieldSelector : FieldSelector
+ {
+ internal IDictionary<string, FieldSelectorResult> fieldSelections;
+
+ /// <summary>Create a a MapFieldSelector</summary>
+ /// <param name="fieldSelections">maps from field names (String) to <see cref="FieldSelectorResult" />s
+ /// </param>
+ public MapFieldSelector(IDictionary<string, FieldSelectorResult> fieldSelections)
+ {
+ this.fieldSelections = fieldSelections;
+ }
+
+ /// <summary>Create a a MapFieldSelector</summary>
+ /// <param name="fields">fields to LOAD. List of Strings. All other fields are NO_LOAD.
+ /// </param>
+ public MapFieldSelector(IList<string> fields)
+ {
+ fieldSelections = new HashMap<string, FieldSelectorResult>(fields.Count * 5 / 3);
+ foreach(var field in fields)
+ fieldSelections[field] = FieldSelectorResult.LOAD;
+ }
+
+ /// <summary>Create a a MapFieldSelector</summary>
+ /// <param name="fields">fields to LOAD. All other fields are NO_LOAD.
+ /// </param>
+ public MapFieldSelector(params System.String[] fields)
+ : this(fields.ToList()) // TODO: this is slow
+ {
+ }
+
+ /// <summary>Load field according to its associated value in fieldSelections</summary>
+ /// <param name="field">a field name
+ /// </param>
+ /// <returns> the fieldSelections value that field maps to or NO_LOAD if none.
+ /// </returns>
+ public virtual FieldSelectorResult Accept(System.String field)
+ {
+ FieldSelectorResult selection = fieldSelections[field];
+ return selection != FieldSelectorResult.INVALID ? selection : FieldSelectorResult.NO_LOAD; // TODO: See FieldSelectorResult
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/NumberTools.cs b/src/core/Document/NumberTools.cs
new file mode 100644
index 0000000..f877120
--- /dev/null
+++ b/src/core/Document/NumberTools.cs
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Documents
+{
+
+ // do not remove this class in 3.0, it may be needed to decode old indexes!
+
+ /// <summary> Provides support for converting longs to Strings, and back again. The strings
+ /// are structured so that lexicographic sorting order is preserved.
+ ///
+ /// <p/>
+ /// That is, if l1 is less than l2 for any two longs l1 and l2, then
+ /// NumberTools.longToString(l1) is lexicographically less than
+ /// NumberTools.longToString(l2). (Similarly for "greater than" and "equals".)
+ ///
+ /// <p/>
+ /// This class handles <b>all</b> long values (unlike
+ /// <see cref="Lucene.Net.Documents.DateField" />).
+ ///
+ /// </summary>
+ /// <deprecated> For new indexes use <see cref="NumericUtils" /> instead, which
+ /// provides a sortable binary representation (prefix encoded) of numeric
+ /// values.
+ /// To index and efficiently query numeric values use <see cref="NumericField" />
+ /// and <see cref="NumericRangeQuery{T}" />.
+ /// This class is included for use with existing
+ /// indices and will be removed in a future release (possibly Lucene 4.0).
+ /// </deprecated>
+ [Obsolete("For new indexes use NumericUtils instead, which provides a sortable binary representation (prefix encoded) of numeric values. To index and efficiently query numeric values use NumericField and NumericRangeQuery. This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")]
+ public class NumberTools
+ {
+
+ private const int RADIX = 36;
+
+ private const char NEGATIVE_PREFIX = '-';
+
+ // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
+ private const char POSITIVE_PREFIX = '0';
+
+ //NB: this must be less than
+ /// <summary> Equivalent to longToString(Long.MIN_VALUE)</summary>
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000";
+#else
+ public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000000";
+#endif
+
+ /// <summary> Equivalent to longToString(Long.MAX_VALUE)</summary>
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7";
+#else
+ public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "7fffffffffffffff";
+#endif
+
+ /// <summary> The length of (all) strings returned by <see cref="LongToString" /></summary>
+ public static readonly int STR_SIZE = MIN_STRING_VALUE.Length;
+
+ /// <summary> Converts a long to a String suitable for indexing.</summary>
+ public static System.String LongToString(long l)
+ {
+
+ if (l == System.Int64.MinValue)
+ {
+ // special case, because long is not symmetric around zero
+ return MIN_STRING_VALUE;
+ }
+
+ System.Text.StringBuilder buf = new System.Text.StringBuilder(STR_SIZE);
+
+ if (l < 0)
+ {
+ buf.Append(NEGATIVE_PREFIX);
+ l = System.Int64.MaxValue + l + 1;
+ }
+ else
+ {
+ buf.Append(POSITIVE_PREFIX);
+ }
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ System.String num = ToString(l);
+#else
+ System.String num = System.Convert.ToString(l, RADIX);
+#endif
+
+ int padLen = STR_SIZE - num.Length - buf.Length;
+ while (padLen-- > 0)
+ {
+ buf.Append('0');
+ }
+ buf.Append(num);
+
+ return buf.ToString();
+ }
+
+ /// <summary> Converts a String that was returned by <see cref="LongToString" /> back to a
+ /// long.
+ ///
+ /// </summary>
+ /// <throws> IllegalArgumentException </throws>
+ /// <summary> if the input is null
+ /// </summary>
+ /// <throws> NumberFormatException </throws>
+ /// <summary> if the input does not parse (it was not a String returned by
+ /// longToString()).
+ /// </summary>
+ public static long StringToLong(System.String str)
+ {
+ if (str == null)
+ {
+ throw new System.NullReferenceException("string cannot be null");
+ }
+ if (str.Length != STR_SIZE)
+ {
+ throw new System.FormatException("string is the wrong size");
+ }
+
+ if (str.Equals(MIN_STRING_VALUE))
+ {
+ return System.Int64.MinValue;
+ }
+
+ char prefix = str[0];
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ long l = ToLong(str.Substring(1));
+#else
+ long l = System.Convert.ToInt64(str.Substring(1), RADIX);
+#endif
+
+ if (prefix == POSITIVE_PREFIX)
+ {
+ // nop
+ }
+ else if (prefix == NEGATIVE_PREFIX)
+ {
+ l = l - System.Int64.MaxValue - 1;
+ }
+ else
+ {
+ throw new System.FormatException("string does not begin with the correct prefix");
+ }
+
+ return l;
+ }
+
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ #region BASE36 OPS
+ static System.String digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+ static long[] powersOf36 =
+ {
+ 1L,
+ 36L,
+ 36L*36L,
+ 36L*36L*36L,
+ 36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L
+ };
+
+ public static System.String ToString(long lval)
+ {
+ if (lval == 0)
+ {
+ return "0";
+ }
+
+ int maxStrLen = powersOf36.Length;
+ long curval = lval;
+
+ char[] tb = new char[maxStrLen];
+ int outpos = 0;
+ for (int i = 0; i < maxStrLen; i++)
+ {
+ long pval = powersOf36[maxStrLen - i - 1];
+ int pos = (int)(curval / pval);
+ tb[outpos++] = digits.Substring(pos, 1).ToCharArray()[0];
+ curval = curval % pval;
+ }
+ if (outpos == 0)
+ tb[outpos++] = '0';
+ return new System.String(tb, 0, outpos).TrimStart('0');
+ }
+
+ public static long ToLong(System.String t)
+ {
+ long ival = 0;
+ char[] tb = t.ToCharArray();
+ for (int i = 0; i < tb.Length; i++)
+ {
+ ival += powersOf36[i] * digits.IndexOf(tb[tb.Length - i - 1]);
+ }
+ return ival;
+ }
+ #endregion
+#endif
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/NumericField.cs b/src/core/Document/NumericField.cs
new file mode 100644
index 0000000..e77dee4
--- /dev/null
+++ b/src/core/Document/NumericField.cs
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Search;
+using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using FieldCache = Lucene.Net.Search.FieldCache;
+using SortField = Lucene.Net.Search.SortField;
+
+namespace Lucene.Net.Documents
+{
+ // javadocs
+
+ /// <summary> <p/>This class provides a <see cref="Field" /> that enables indexing
+ /// of numeric values for efficient range filtering and
+ /// sorting. Here's an example usage, adding an int value:
+ /// <code>
+ /// document.add(new NumericField(name).setIntValue(value));
+ /// </code>
+ ///
+ /// For optimal performance, re-use the
+ /// <c>NumericField</c> and <see cref="Document" /> instance for more than
+ /// one document:
+ ///
+ /// <code>
+ /// NumericField field = new NumericField(name);
+ /// Document document = new Document();
+ /// document.add(field);
+ ///
+ /// for(all documents) {
+ /// ...
+ /// field.setIntValue(value)
+ /// writer.addDocument(document);
+ /// ...
+ /// }
+ /// </code>
+ ///
+ /// <p/>The .Net native types <c>int</c>, <c>long</c>,
+ /// <c>float</c> and <c>double</c> are
+ /// directly supported. However, any value that can be
+ /// converted into these native types can also be indexed.
+ /// For example, date/time values represented by a
+ /// <see cref="System.DateTime" /> can be translated into a long
+ /// value using the <c>java.util.Date.getTime</c> method. If you
+ /// don't need millisecond precision, you can quantize the
+ /// value, either by dividing the result of
+ /// <c>java.util.Date.getTime</c> or using the separate getters
+ /// (for year, month, etc.) to construct an <c>int</c> or
+ /// <c>long</c> value.<p/>
+ ///
+ /// <p/>To perform range querying or filtering against a
+ /// <c>NumericField</c>, use <see cref="NumericRangeQuery{T}" /> or <see cref="NumericRangeFilter{T}" />
+ ///. To sort according to a
+ /// <c>NumericField</c>, use the normal numeric sort types, eg
+ /// <see cref="SortField.INT" /> <c>NumericField</c> values
+ /// can also be loaded directly from <see cref="FieldCache" />.<p/>
+ ///
+ /// <p/>By default, a <c>NumericField</c>'s value is not stored but
+ /// is indexed for range filtering and sorting. You can use
+ /// the <see cref="NumericField(String,Field.Store,bool)" />
+ /// constructor if you need to change these defaults.<p/>
+ ///
+ /// <p/>You may add the same field name as a <c>NumericField</c> to
+ /// the same document more than once. Range querying and
+ /// filtering will be the logical OR of all values; so a range query
+ /// will hit all documents that have at least one value in
+ /// the range. However sort behavior is not defined. If you need to sort,
+ /// you should separately index a single-valued <c>NumericField</c>.<p/>
+ ///
+ /// <p/>A <c>NumericField</c> will consume somewhat more disk space
+ /// in the index than an ordinary single-valued field.
+ /// However, for a typical index that includes substantial
+ /// textual content per document, this increase will likely
+ /// be in the noise. <p/>
+ ///
+ /// <p/>Within Lucene, each numeric value is indexed as a
+ /// <em>trie</em> structure, where each term is logically
+ /// assigned to larger and larger pre-defined brackets (which
+ /// are simply lower-precision representations of the value).
+ /// The step size between each successive bracket is called the
+ /// <c>precisionStep</c>, measured in bits. Smaller
+ /// <c>precisionStep</c> values result in larger number
+ /// of brackets, which consumes more disk space in the index
+ /// but may result in faster range search performance. The
+ /// default value, 4, was selected for a reasonable tradeoff
+ /// of disk space consumption versus performance. You can
+ /// use the expert constructor <see cref="NumericField(String,int,Field.Store,bool)" />
+ /// if you'd
+ /// like to change the value. Note that you must also
+ /// specify a congruent value when creating <see cref="NumericRangeQuery{T}" />
+ /// or <see cref="NumericRangeFilter{T}" />.
+ /// For low cardinality fields larger precision steps are good.
+ /// If the cardinality is &lt; 100, it is fair
+ /// to use <see cref="int.MaxValue" />, which produces one
+ /// term per value.
+ ///
+ /// <p/>For more information on the internals of numeric trie
+ /// indexing, including the <a
+ /// href="../search/NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>
+ /// configuration, see <see cref="NumericRangeQuery{T}" />. The format of
+ /// indexed values is described in <see cref="NumericUtils" />.
+ ///
+ /// <p/>If you only need to sort by numeric value, and never
+ /// run range querying/filtering, you can index using a
+ /// <c>precisionStep</c> of <see cref="int.MaxValue" />.
+ /// This will minimize disk space consumed. <p/>
+ ///
+ /// <p/>More advanced users can instead use <see cref="NumericTokenStream" />
+ /// directly, when indexing numbers. This
+ /// class is a wrapper around this token stream type for
+ /// easier, more intuitive usage.<p/>
+ ///
+ /// <p/><b>NOTE:</b> This class is only used during
+ /// indexing. When retrieving the stored field value from a
+ /// <see cref="Document" /> instance after search, you will get a
+ /// conventional <see cref="IFieldable" /> instance where the numeric
+ /// values are returned as <see cref="String" />s (according to
+ /// <c>toString(value)</c> of the used data type).
+ ///
+ /// <p/><font color="red"><b>NOTE:</b> This API is
+ /// experimental and might change in incompatible ways in the
+ /// next release.</font>
+ ///
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+ [Serializable]
+ public sealed class NumericField:AbstractField
+ {
+
+ new private readonly NumericTokenStream tokenStream;
+
+ /// <summary> Creates a field for numeric values using the default <c>precisionStep</c>
+ /// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// This constructor creates an indexed, but not stored field.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ public NumericField(System.String name):this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true)
+ {
+ }
+
+ /// <summary> Creates a field for numeric values using the default <c>precisionStep</c>
+ /// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ /// <param name="store">if the field should be stored in plain text form
+ /// (according to <c>toString(value)</c> of the used data type)
+ /// </param>
+ /// <param name="index">if the field should be indexed using <see cref="NumericTokenStream" />
+ /// </param>
+ public NumericField(System.String name, Field.Store store, bool index):this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index)
+ {
+ }
+
+ /// <summary> Creates a field for numeric values with the specified
+ /// <c>precisionStep</c>. The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// This constructor creates an indexed, but not stored field.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ /// <param name="precisionStep">the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
+ /// </param>
+ public NumericField(System.String name, int precisionStep):this(name, precisionStep, Field.Store.NO, true)
+ {
+ }
+
+ /// <summary> Creates a field for numeric values with the specified
+ /// <c>precisionStep</c>. The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ /// <param name="precisionStep">the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
+ /// </param>
+ /// <param name="store">if the field should be stored in plain text form
+ /// (according to <c>toString(value)</c> of the used data type)
+ /// </param>
+ /// <param name="index">if the field should be indexed using <see cref="NumericTokenStream" />
+ /// </param>
+ public NumericField(System.String name, int precisionStep, Field.Store store, bool index):base(name, store, index?Field.Index.ANALYZED_NO_NORMS:Field.Index.NO, Field.TermVector.NO)
+ {
+ OmitTermFreqAndPositions = true;
+ tokenStream = new NumericTokenStream(precisionStep);
+ }
+
+ /// <summary>Returns a <see cref="NumericTokenStream" /> for indexing the numeric value. </summary>
+ public override TokenStream TokenStreamValue
+ {
+ get { return IsIndexed ? tokenStream : null; }
+ }
+
+ /// <summary>Returns always <c>null</c> for numeric fields </summary>
+ public override byte[] GetBinaryValue(byte[] result)
+ {
+ return null;
+ }
+
+ /// <summary>Returns always <c>null</c> for numeric fields </summary>
+ public override TextReader ReaderValue
+ {
+ get { return null; }
+ }
+
+ /// <summary>Returns the numeric value as a string (how it is stored, when <see cref="Field.Store.YES" /> is chosen). </summary>
+ public override string StringValue
+ {
+ get { return (fieldsData == null) ? null : fieldsData.ToString(); }
+ }
+
+ /// <summary>Returns the current numeric value as a subclass of <see cref="Number" />, <c>null</c> if not yet initialized. </summary>
+ public ValueType NumericValue
+ {
+ get { return (System.ValueType) fieldsData; }
+ }
+
+ /// <summary> Initializes the field with the supplied <c>long</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).SetLongValue(value))</c>
+ /// </returns>
+ public NumericField SetLongValue(long value_Renamed)
+ {
+ tokenStream.SetLongValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// <summary> Initializes the field with the supplied <c>int</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).setIntValue(value))</c>
+ /// </returns>
+ public NumericField SetIntValue(int value_Renamed)
+ {
+ tokenStream.SetIntValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// <summary> Initializes the field with the supplied <c>double</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</c>
+ /// </returns>
+ public NumericField SetDoubleValue(double value_Renamed)
+ {
+ tokenStream.SetDoubleValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// <summary> Initializes the field with the supplied <c>float</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).setFloatValue(value))</c>
+ /// </returns>
+ public NumericField SetFloatValue(float value_Renamed)
+ {
+ tokenStream.SetFloatValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/SetBasedFieldSelector.cs b/src/core/Document/SetBasedFieldSelector.cs
new file mode 100644
index 0000000..14e3e02
--- /dev/null
+++ b/src/core/Document/SetBasedFieldSelector.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary> Declare what fields to load normally and what fields to load lazily
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public class SetBasedFieldSelector : FieldSelector
+ {
+ private ISet<string> fieldsToLoad;
+ private ISet<string> lazyFieldsToLoad;
+
+ /// <summary> Pass in the Set of <see cref="Field" /> names to load and the Set of <see cref="Field" /> names to load lazily. If both are null, the
+ /// Document will not have any <see cref="Field" /> on it.
+ /// </summary>
+ /// <param name="fieldsToLoad">A Set of <see cref="String" /> field names to load. May be empty, but not null
+ /// </param>
+ /// <param name="lazyFieldsToLoad">A Set of <see cref="String" /> field names to load lazily. May be empty, but not null
+ /// </param>
+ public SetBasedFieldSelector(ISet<string> fieldsToLoad, ISet<string> lazyFieldsToLoad)
+ {
+ this.fieldsToLoad = fieldsToLoad;
+ this.lazyFieldsToLoad = lazyFieldsToLoad;
+ }
+
+ /// <summary> Indicate whether to load the field with the given name or not. If the <see cref="AbstractField.Name()" /> is not in either of the
+ /// initializing Sets, then <see cref="Lucene.Net.Documents.FieldSelectorResult.NO_LOAD" /> is returned. If a Field name
+ /// is in both <c>fieldsToLoad</c> and <c>lazyFieldsToLoad</c>, lazy has precedence.
+ ///
+ /// </summary>
+ /// <param name="fieldName">The <see cref="Field" /> name to check
+ /// </param>
+ /// <returns> The <see cref="FieldSelectorResult" />
+ /// </returns>
+ public virtual FieldSelectorResult Accept(System.String fieldName)
+ {
+ FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
+ if (fieldsToLoad.Contains(fieldName) == true)
+ {
+ result = FieldSelectorResult.LOAD;
+ }
+ if (lazyFieldsToLoad.Contains(fieldName) == true)
+ {
+ result = FieldSelectorResult.LAZY_LOAD;
+ }
+ return result;
+ }
+ }
+} \ No newline at end of file