Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiguel de Icaza <miguel@gnome.org>2015-06-11 19:34:09 +0300
committerMiguel de Icaza <miguel@gnome.org>2015-06-11 19:34:09 +0300
commit85978b7eb94738f516824341213d5e94060f5284 (patch)
tree879c92ba9e56a74ae2a0cbbaa802344b9c39e7d0
Initial commit of lightweight Lucene.Net to be used in Mono
-rw-r--r--README.md6
-rw-r--r--src/core/Analysis/ASCIIFoldingFilter.cs3285
-rw-r--r--src/core/Analysis/Analyzer.cs171
-rw-r--r--src/core/Analysis/BaseCharFilter.cs105
-rw-r--r--src/core/Analysis/CachingTokenFilter.cs86
-rw-r--r--src/core/Analysis/CharArraySet.cs517
-rw-r--r--src/core/Analysis/CharFilter.cs95
-rw-r--r--src/core/Analysis/CharReader.cs94
-rw-r--r--src/core/Analysis/CharStream.cs45
-rw-r--r--src/core/Analysis/CharTokenizer.cs135
-rw-r--r--src/core/Analysis/ISOLatin1AccentFilter.cs344
-rw-r--r--src/core/Analysis/KeywordAnalyzer.cs54
-rw-r--r--src/core/Analysis/KeywordTokenizer.cs99
-rw-r--r--src/core/Analysis/LengthFilter.cs60
-rw-r--r--src/core/Analysis/LetterTokenizer.cs57
-rw-r--r--src/core/Analysis/LowerCaseFilter.cs49
-rw-r--r--src/core/Analysis/LowerCaseTokenizer.cs60
-rw-r--r--src/core/Analysis/MappingCharFilter.cs166
-rw-r--r--src/core/Analysis/NormalizeCharMap.cs68
-rw-r--r--src/core/Analysis/NumericTokenStream.cs270
-rw-r--r--src/core/Analysis/PerFieldAnalyzerWrapper.cs135
-rw-r--r--src/core/Analysis/PorterStemFilter.cs62
-rw-r--r--src/core/Analysis/PorterStemmer.cs746
-rw-r--r--src/core/Analysis/SimpleAnalyzer.cs45
-rw-r--r--src/core/Analysis/Standard/StandardAnalyzer.cs174
-rw-r--r--src/core/Analysis/Standard/StandardFilter.cs88
-rw-r--r--src/core/Analysis/Standard/StandardTokenizer.cs232
-rw-r--r--src/core/Analysis/Standard/StandardTokenizerImpl.cs707
-rw-r--r--src/core/Analysis/StopAnalyzer.cs141
-rw-r--r--src/core/Analysis/StopFilter.cs178
-rw-r--r--src/core/Analysis/TeeSinkTokenFilter.cs266
-rw-r--r--src/core/Analysis/Token.cs852
-rw-r--r--src/core/Analysis/TokenFilter.cs72
-rw-r--r--src/core/Analysis/TokenStream.cs162
-rw-r--r--src/core/Analysis/Tokenattributes/FlagsAttribute.cs85
-rw-r--r--src/core/Analysis/Tokenattributes/IFlagsAttribute.cs41
-rw-r--r--src/core/Analysis/Tokenattributes/IOffsetAttribute.cs48
-rw-r--r--src/core/Analysis/Tokenattributes/IPayloadAttribute.cs31
-rw-r--r--src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs59
-rw-r--r--src/core/Analysis/Tokenattributes/ITermAttribute.cs104
-rw-r--r--src/core/Analysis/Tokenattributes/ITypeAttribute.cs30
-rw-r--r--src/core/Analysis/Tokenattributes/OffsetAttribute.cs106
-rw-r--r--src/core/Analysis/Tokenattributes/PayloadAttribute.cs100
-rw-r--r--src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs107
-rw-r--r--src/core/Analysis/Tokenattributes/TermAttribute.cs268
-rw-r--r--src/core/Analysis/Tokenattributes/TypeAttribute.cs85
-rw-r--r--src/core/Analysis/Tokenizer.cs112
-rw-r--r--src/core/Analysis/WhitespaceAnalyzer.cs43
-rw-r--r--src/core/Analysis/WhitespaceTokenizer.cs55
-rw-r--r--src/core/Analysis/WordlistLoader.cs146
-rw-r--r--src/core/Document/AbstractField.cs312
-rw-r--r--src/core/Document/CompressionTools.cs150
-rw-r--r--src/core/Document/DateField.cs138
-rw-r--r--src/core/Document/DateTools.cs350
-rw-r--r--src/core/Document/Document.cs382
-rw-r--r--src/core/Document/Field.cs667
-rw-r--r--src/core/Document/FieldSelector.cs37
-rw-r--r--src/core/Document/FieldSelectorResult.cs71
-rw-r--r--src/core/Document/Fieldable.cs205
-rw-r--r--src/core/Document/LoadFirstFieldSelector.cs35
-rw-r--r--src/core/Document/MapFieldSelector.cs68
-rw-r--r--src/core/Document/NumberTools.cs221
-rw-r--r--src/core/Document/NumericField.cs294
-rw-r--r--src/core/Document/SetBasedFieldSelector.cs69
-rw-r--r--src/core/Index/AbstractAllTermDocs.cs118
-rw-r--r--src/core/Index/AllTermDocs.cs45
-rw-r--r--src/core/Index/BufferedDeletes.cs196
-rw-r--r--src/core/Index/ByteBlockPool.cs172
-rw-r--r--src/core/Index/ByteSliceReader.cs185
-rw-r--r--src/core/Index/ByteSliceWriter.cs97
-rw-r--r--src/core/Index/CharBlockPool.cs69
-rw-r--r--src/core/Index/CheckIndex.cs1017
-rw-r--r--src/core/Index/CompoundFileReader.cs317
-rw-r--r--src/core/Index/CompoundFileWriter.cs275
-rw-r--r--src/core/Index/ConcurrentMergeScheduler.cs504
-rw-r--r--src/core/Index/CorruptIndexException.cs36
-rw-r--r--src/core/Index/DefaultSkipListReader.cs128
-rw-r--r--src/core/Index/DefaultSkipListWriter.cs143
-rw-r--r--src/core/Index/DirectoryReader.cs1548
-rw-r--r--src/core/Index/DocConsumer.cs31
-rw-r--r--src/core/Index/DocConsumerPerThread.cs37
-rw-r--r--src/core/Index/DocFieldConsumer.cs56
-rw-r--r--src/core/Index/DocFieldConsumerPerField.cs30
-rw-r--r--src/core/Index/DocFieldConsumerPerThread.cs30
-rw-r--r--src/core/Index/DocFieldConsumers.cs221
-rw-r--r--src/core/Index/DocFieldConsumersPerField.cs56
-rw-r--r--src/core/Index/DocFieldConsumersPerThread.cs82
-rw-r--r--src/core/Index/DocFieldProcessor.cs92
-rw-r--r--src/core/Index/DocFieldProcessorPerField.cs49
-rw-r--r--src/core/Index/DocFieldProcessorPerThread.cs478
-rw-r--r--src/core/Index/DocInverter.cs97
-rw-r--r--src/core/Index/DocInverterPerField.cs235
-rw-r--r--src/core/Index/DocInverterPerThread.cs107
-rw-r--r--src/core/Index/DocumentsWriter.cs2075
-rw-r--r--src/core/Index/DocumentsWriterThreadState.cs56
-rw-r--r--src/core/Index/FieldInfo.cs136
-rw-r--r--src/core/Index/FieldInfos.cs491
-rw-r--r--src/core/Index/FieldInvertState.cs110
-rw-r--r--src/core/Index/FieldReaderException.cs90
-rw-r--r--src/core/Index/FieldSortedTermVectorMapper.cs78
-rw-r--r--src/core/Index/FieldsReader.cs641
-rw-r--r--src/core/Index/FieldsWriter.cs290
-rw-r--r--src/core/Index/FilterIndexReader.cs388
-rw-r--r--src/core/Index/FormatPostingsDocsConsumer.cs36
-rw-r--r--src/core/Index/FormatPostingsDocsWriter.cs134
-rw-r--r--src/core/Index/FormatPostingsFieldsConsumer.cs39
-rw-r--r--src/core/Index/FormatPostingsFieldsWriter.cs71
-rw-r--r--src/core/Index/FormatPostingsPositionsConsumer.cs32
-rw-r--r--src/core/Index/FormatPostingsPositionsWriter.cs101
-rw-r--r--src/core/Index/FormatPostingsTermsConsumer.cs52
-rw-r--r--src/core/Index/FormatPostingsTermsWriter.cs77
-rw-r--r--src/core/Index/FreqProxFieldMergeState.cs117
-rw-r--r--src/core/Index/FreqProxTermsWriter.cs303
-rw-r--r--src/core/Index/FreqProxTermsWriterPerField.cs196
-rw-r--r--src/core/Index/FreqProxTermsWriterPerThread.cs52
-rw-r--r--src/core/Index/IndexCommit.cs119
-rw-r--r--src/core/Index/IndexDeletionPolicy.cs99
-rw-r--r--src/core/Index/IndexFileDeleter.cs808
-rw-r--r--src/core/Index/IndexFileNameFilter.cs107
-rw-r--r--src/core/Index/IndexFileNames.cs165
-rw-r--r--src/core/Index/IndexReader.cs1374
-rw-r--r--src/core/Index/IndexWriter.cs5928
-rw-r--r--src/core/Index/IntBlockPool.cs79
-rw-r--r--src/core/Index/InvertedDocConsumer.cs53
-rw-r--r--src/core/Index/InvertedDocConsumerPerField.cs46
-rw-r--r--src/core/Index/InvertedDocConsumerPerThread.cs30
-rw-r--r--src/core/Index/InvertedDocEndConsumer.cs32
-rw-r--r--src/core/Index/InvertedDocEndConsumerPerField.cs28
-rw-r--r--src/core/Index/InvertedDocEndConsumerPerThread.cs30
-rw-r--r--src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs51
-rw-r--r--src/core/Index/LogByteSizeMergePolicy.cs99
-rw-r--r--src/core/Index/LogDocMergePolicy.cs69
-rw-r--r--src/core/Index/LogMergePolicy.cs580
-rw-r--r--src/core/Index/MergeDocIDRemapper.cs127
-rw-r--r--src/core/Index/MergePolicy.cs309
-rw-r--r--src/core/Index/MergeScheduler.cs58
-rw-r--r--src/core/Index/MultiLevelSkipListReader.cs341
-rw-r--r--src/core/Index/MultiLevelSkipListWriter.cs171
-rw-r--r--src/core/Index/MultiReader.cs494
-rw-r--r--src/core/Index/MultipleTermPositions.cs256
-rw-r--r--src/core/Index/NormsWriter.cs206
-rw-r--r--src/core/Index/NormsWriterPerField.cs90
-rw-r--r--src/core/Index/NormsWriterPerThread.cs55
-rw-r--r--src/core/Index/ParallelReader.cs822
-rw-r--r--src/core/Index/Payload.cs217
-rw-r--r--src/core/Index/PositionBasedTermVectorMapper.cs176
-rw-r--r--src/core/Index/RawPostingList.cs46
-rw-r--r--src/core/Index/ReadOnlyDirectoryReader.cs45
-rw-r--r--src/core/Index/ReadOnlySegmentReader.cs42
-rw-r--r--src/core/Index/ReusableStringReader.cs136
-rw-r--r--src/core/Index/SegmentInfo.cs875
-rw-r--r--src/core/Index/SegmentInfos.cs1074
-rw-r--r--src/core/Index/SegmentMergeInfo.cs108
-rw-r--r--src/core/Index/SegmentMergeQueue.cs47
-rw-r--r--src/core/Index/SegmentMerger.cs934
-rw-r--r--src/core/Index/SegmentReader.cs1692
-rw-r--r--src/core/Index/SegmentTermDocs.cs282
-rw-r--r--src/core/Index/SegmentTermEnum.cs247
-rw-r--r--src/core/Index/SegmentTermPositionVector.cs73
-rw-r--r--src/core/Index/SegmentTermPositions.cs226
-rw-r--r--src/core/Index/SegmentTermVector.cs102
-rw-r--r--src/core/Index/SegmentWriteState.cs53
-rw-r--r--src/core/Index/SerialMergeScheduler.cs49
-rw-r--r--src/core/Index/SnapshotDeletionPolicy.cs203
-rw-r--r--src/core/Index/SortedTermVectorMapper.cs133
-rw-r--r--src/core/Index/StaleReaderException.cs39
-rw-r--r--src/core/Index/StoredFieldsWriter.cs266
-rw-r--r--src/core/Index/StoredFieldsWriterPerThread.cs93
-rw-r--r--src/core/Index/Term.cs168
-rw-r--r--src/core/Index/TermBuffer.cs166
-rw-r--r--src/core/Index/TermDocs.cs86
-rw-r--r--src/core/Index/TermEnum.cs53
-rw-r--r--src/core/Index/TermFreqVector.cs73
-rw-r--r--src/core/Index/TermInfo.cs69
-rw-r--r--src/core/Index/TermInfosReader.cs325
-rw-r--r--src/core/Index/TermInfosWriter.cs250
-rw-r--r--src/core/Index/TermPositionVector.cs50
-rw-r--r--src/core/Index/TermPositions.cs79
-rw-r--r--src/core/Index/TermVectorEntry.cs108
-rw-r--r--src/core/Index/TermVectorEntryFreqSortedComparator.cs45
-rw-r--r--src/core/Index/TermVectorMapper.cs112
-rw-r--r--src/core/Index/TermVectorOffsetInfo.cs134
-rw-r--r--src/core/Index/TermVectorsReader.cs731
-rw-r--r--src/core/Index/TermVectorsTermsWriter.cs380
-rw-r--r--src/core/Index/TermVectorsTermsWriterPerField.cs290
-rw-r--r--src/core/Index/TermVectorsTermsWriterPerThread.cs106
-rw-r--r--src/core/Index/TermVectorsWriter.cs246
-rw-r--r--src/core/Index/TermsHash.cs278
-rw-r--r--src/core/Index/TermsHashConsumer.cs40
-rw-r--r--src/core/Index/TermsHashConsumerPerField.cs38
-rw-r--r--src/core/Index/TermsHashConsumerPerThread.cs30
-rw-r--r--src/core/Index/TermsHashPerField.cs639
-rw-r--r--src/core/Index/TermsHashPerThread.cs140
-rw-r--r--src/core/LZOCompressor.cs135
-rw-r--r--src/core/LucenePackage.cs40
-rw-r--r--src/core/Messages/INLSException.cs36
-rw-r--r--src/core/Messages/Message.cs36
-rw-r--r--src/core/Messages/MessageImpl.cs81
-rw-r--r--src/core/Messages/NLS.cs254
-rw-r--r--src/core/QueryParser/CharStream.cs124
-rw-r--r--src/core/QueryParser/FastCharStream.cs159
-rw-r--r--src/core/QueryParser/MultiFieldQueryParser.cs370
-rw-r--r--src/core/QueryParser/ParseException.cs244
-rw-r--r--src/core/QueryParser/QueryParser.cs2095
-rw-r--r--src/core/QueryParser/QueryParserConstants.cs143
-rw-r--r--src/core/QueryParser/QueryParserTokenManager.cs1462
-rw-r--r--src/core/QueryParser/Token.cs133
-rw-r--r--src/core/QueryParser/TokenMgrError.cs169
-rw-r--r--src/core/Search/BooleanClause.cs102
-rw-r--r--src/core/Search/BooleanQuery.cs599
-rw-r--r--src/core/Search/BooleanScorer.cs405
-rw-r--r--src/core/Search/BooleanScorer2.cs417
-rw-r--r--src/core/Search/CachingSpanFilter.cs124
-rw-r--r--src/core/Search/CachingWrapperFilter.cs279
-rw-r--r--src/core/Search/Collector.cs176
-rw-r--r--src/core/Search/ComplexExplanation.cs76
-rw-r--r--src/core/Search/ConjunctionScorer.cs147
-rw-r--r--src/core/Search/ConstantScoreQuery.cs236
-rw-r--r--src/core/Search/DefaultSimilarity.cs108
-rw-r--r--src/core/Search/DisjunctionMaxQuery.cs344
-rw-r--r--src/core/Search/DisjunctionMaxScorer.cs215
-rw-r--r--src/core/Search/DisjunctionSumScorer.cs278
-rw-r--r--src/core/Search/DocIdSet.cs112
-rw-r--r--src/core/Search/DocIdSetIterator.cs90
-rw-r--r--src/core/Search/ExactPhraseScorer.cs67
-rw-r--r--src/core/Search/Explanation.cs168
-rw-r--r--src/core/Search/FieldCache.cs708
-rw-r--r--src/core/Search/FieldCacheImpl.cs858
-rw-r--r--src/core/Search/FieldCacheRangeFilter.cs964
-rw-r--r--src/core/Search/FieldCacheTermsFilter.cs223
-rw-r--r--src/core/Search/FieldComparator.cs1065
-rw-r--r--src/core/Search/FieldComparatorSource.cs45
-rw-r--r--src/core/Search/FieldDoc.cs113
-rw-r--r--src/core/Search/FieldDocSortedHitQueue.cs148
-rw-r--r--src/core/Search/FieldValueHitQueue.cs235
-rw-r--r--src/core/Search/Filter.cs54
-rw-r--r--src/core/Search/FilterManager.cs203
-rw-r--r--src/core/Search/FilteredDocIdSet.cs107
-rw-r--r--src/core/Search/FilteredDocIdSetIterator.cs96
-rw-r--r--src/core/Search/FilteredQuery.cs293
-rw-r--r--src/core/Search/FilteredTermEnum.cs127
-rw-r--r--src/core/Search/Function/ByteFieldSource.cs136
-rw-r--r--src/core/Search/Function/CustomScoreProvider.cs175
-rw-r--r--src/core/Search/Function/CustomScoreQuery.cs579
-rw-r--r--src/core/Search/Function/DocValues.cs206
-rw-r--r--src/core/Search/Function/FieldCacheSource.cs110
-rw-r--r--src/core/Search/Function/FieldScoreQuery.cs139
-rw-r--r--src/core/Search/Function/FloatFieldSource.cs131
-rw-r--r--src/core/Search/Function/IntFieldSource.cs136
-rw-r--r--src/core/Search/Function/OrdFieldSource.cs146
-rw-r--r--src/core/Search/Function/ReverseOrdFieldSource.cs158
-rw-r--r--src/core/Search/Function/ShortFieldSource.cs136
-rw-r--r--src/core/Search/Function/ValueSource.cs69
-rw-r--r--src/core/Search/Function/ValueSourceQuery.cs235
-rw-r--r--src/core/Search/FuzzyQuery.cs256
-rw-r--r--src/core/Search/FuzzyTermEnum.cs318
-rw-r--r--src/core/Search/HitQueue.cs95
-rw-r--r--src/core/Search/IndexSearcher.cs343
-rw-r--r--src/core/Search/MatchAllDocsQuery.cs198
-rw-r--r--src/core/Search/MultiPhraseQuery.cs496
-rw-r--r--src/core/Search/MultiSearcher.cs458
-rw-r--r--src/core/Search/MultiTermQuery.cs465
-rw-r--r--src/core/Search/MultiTermQueryWrapperFilter.cs161
-rw-r--r--src/core/Search/NumericRangeFilter.cs185
-rw-r--r--src/core/Search/NumericRangeQuery.cs665
-rw-r--r--src/core/Search/ParallelMultiSearcher.cs217
-rw-r--r--src/core/Search/Payloads/AveragePayloadFunction.cs63
-rw-r--r--src/core/Search/Payloads/MaxPayloadFunction.cs69
-rw-r--r--src/core/Search/Payloads/MinPayloadFunction.cs67
-rw-r--r--src/core/Search/Payloads/PayloadFunction.cs78
-rw-r--r--src/core/Search/Payloads/PayloadNearQuery.cs284
-rw-r--r--src/core/Search/Payloads/PayloadSpanUtil.cs211
-rw-r--r--src/core/Search/Payloads/PayloadTermQuery.cs255
-rw-r--r--src/core/Search/PhrasePositions.cs93
-rw-r--r--src/core/Search/PhraseQuery.cs370
-rw-r--r--src/core/Search/PhraseQueue.cs44
-rw-r--r--src/core/Search/PhraseScorer.cs224
-rw-r--r--src/core/Search/PositiveScoresOnlyCollector.cs66
-rw-r--r--src/core/Search/PrefixFilter.cs51
-rw-r--r--src/core/Search/PrefixQuery.cs100
-rw-r--r--src/core/Search/PrefixTermEnum.cs71
-rw-r--r--src/core/Search/Query.cs257
-rw-r--r--src/core/Search/QueryTermVector.cs167
-rw-r--r--src/core/Search/QueryWrapperFilter.cs106
-rw-r--r--src/core/Search/ReqExclScorer.cs140
-rw-r--r--src/core/Search/ReqOptSumScorer.cs87
-rw-r--r--src/core/Search/ScoreCachingWrappingScorer.cs88
-rw-r--r--src/core/Search/ScoreDoc.cs50
-rw-r--r--src/core/Search/Scorer.cs106
-rw-r--r--src/core/Search/Searchable.cs169
-rw-r--r--src/core/Search/Searcher.cs192
-rw-r--r--src/core/Search/Similarity.cs697
-rw-r--r--src/core/Search/SimilarityDelegator.cs80
-rw-r--r--src/core/Search/SingleTermEnum.cs70
-rw-r--r--src/core/Search/SloppyPhraseScorer.cs244
-rw-r--r--src/core/Search/Sort.cs214
-rw-r--r--src/core/Search/SortField.cs512
-rw-r--r--src/core/Search/SpanFilter.cs47
-rw-r--r--src/core/Search/SpanFilterResult.cs116
-rw-r--r--src/core/Search/SpanQueryFilter.cs109
-rw-r--r--src/core/Search/Spans/FieldMaskingSpanQuery.cs162
-rw-r--r--src/core/Search/Spans/NearSpansOrdered.cs436
-rw-r--r--src/core/Search/Spans/NearSpansUnordered.cs415
-rw-r--r--src/core/Search/Spans/SpanFirstQuery.cs211
-rw-r--r--src/core/Search/Spans/SpanNearQuery.cs230
-rw-r--r--src/core/Search/Spans/SpanNotQuery.cs260
-rw-r--r--src/core/Search/Spans/SpanOrQuery.cs345
-rw-r--r--src/core/Search/Spans/SpanQuery.cs45
-rw-r--r--src/core/Search/Spans/SpanScorer.cs130
-rw-r--r--src/core/Search/Spans/SpanTermQuery.cs100
-rw-r--r--src/core/Search/Spans/SpanWeight.cs138
-rw-r--r--src/core/Search/Spans/Spans.cs92
-rw-r--r--src/core/Search/Spans/TermSpans.cs126
-rw-r--r--src/core/Search/TermQuery.cs237
-rw-r--r--src/core/Search/TermRangeFilter.cs137
-rw-r--r--src/core/Search/TermRangeQuery.cs238
-rw-r--r--src/core/Search/TermRangeTermEnum.cs161
-rw-r--r--src/core/Search/TermScorer.cs188
-rw-r--r--src/core/Search/TimeLimitingCollector.cs234
-rw-r--r--src/core/Search/TopDocs.cs71
-rw-r--r--src/core/Search/TopDocsCollector.cs155
-rw-r--r--src/core/Search/TopFieldCollector.cs1137
-rw-r--r--src/core/Search/TopFieldDocs.cs47
-rw-r--r--src/core/Search/TopScoreDocCollector.cs177
-rw-r--r--src/core/Search/Weight.cs127
-rw-r--r--src/core/Search/WildcardQuery.cs136
-rw-r--r--src/core/Search/WildcardTermEnum.cs196
-rw-r--r--src/core/Store/AlreadyClosedException.cs47
-rw-r--r--src/core/Store/BufferedIndexInput.cs241
-rw-r--r--src/core/Store/BufferedIndexOutput.cs165
-rw-r--r--src/core/Store/CheckSumIndexInput.cs89
-rw-r--r--src/core/Store/CheckSumIndexOutput.cs115
-rw-r--r--src/core/Store/Directory.cs264
-rw-r--r--src/core/Store/FSDirectory.cs533
-rw-r--r--src/core/Store/FSLockFactory.cs52
-rw-r--r--src/core/Store/FileSwitchDirectory.cs167
-rw-r--r--src/core/Store/IndexInput.cs290
-rw-r--r--src/core/Store/IndexOutput.cs285
-rw-r--r--src/core/Store/Lock.cs163
-rw-r--r--src/core/Store/LockFactory.cs71
-rw-r--r--src/core/Store/LockObtainFailedException.cs41
-rw-r--r--src/core/Store/LockReleaseFailedException.cs35
-rw-r--r--src/core/Store/LockStressTest.cs128
-rw-r--r--src/core/Store/LockVerifyServer.cs110
-rw-r--r--src/core/Store/MMapDirectory.cs535
-rw-r--r--src/core/Store/NIOFSDirectory.cs269
-rw-r--r--src/core/Store/NativeFSLockFactory.cs440
-rw-r--r--src/core/Store/NoLockFactory.cs76
-rw-r--r--src/core/Store/NoSuchDirectoryException.cs34
-rw-r--r--src/core/Store/RAMDirectory.cs262
-rw-r--r--src/core/Store/RAMFile.cs147
-rw-r--r--src/core/Store/RAMInputStream.cs138
-rw-r--r--src/core/Store/RAMOutputStream.cs191
-rw-r--r--src/core/Store/SimpleFSDirectory.cs319
-rw-r--r--src/core/Store/SimpleFSLockFactory.cs232
-rw-r--r--src/core/Store/SingleInstanceLockFactory.cs107
-rw-r--r--src/core/Store/VerifyingLockFactory.cs165
-rw-r--r--src/core/Support/AppSettings.cs159
-rw-r--r--src/core/Support/AttributeImplItem.cs41
-rw-r--r--src/core/Support/BitSetSupport.cs88
-rw-r--r--src/core/Support/BuildType.cs32
-rw-r--r--src/core/Support/CRC32.cs83
-rw-r--r--src/core/Support/Character.cs81
-rw-r--r--src/core/Support/CloseableThreadLocalProfiler.cs45
-rw-r--r--src/core/Support/CollectionsHelper.cs339
-rw-r--r--src/core/Support/Compare.cs49
-rw-r--r--src/core/Support/Compatibility/ConcurrentDictionary.cs312
-rw-r--r--src/core/Support/Compatibility/Func.cs29
-rw-r--r--src/core/Support/Compatibility/ISet.cs59
-rw-r--r--src/core/Support/Compatibility/SetFactory.cs42
-rw-r--r--src/core/Support/Compatibility/SortedSet.cs187
-rw-r--r--src/core/Support/Compatibility/ThreadLocal.cs55
-rw-r--r--src/core/Support/Compatibility/WrappedHashSet.cs44
-rw-r--r--src/core/Support/Cryptography.cs45
-rw-r--r--src/core/Support/Deflater.cs97
-rw-r--r--src/core/Support/Double.cs44
-rw-r--r--src/core/Support/EquatableList.cs339
-rw-r--r--src/core/Support/FileSupport.cs121
-rw-r--r--src/core/Support/GeneralKeyedCollection.cs96
-rw-r--r--src/core/Support/HashMap.cs449
-rw-r--r--src/core/Support/IChecksum.cs32
-rw-r--r--src/core/Support/IThreadRunnable.cs36
-rw-r--r--src/core/Support/Inflater.cs71
-rw-r--r--src/core/Support/Number.cs252
-rw-r--r--src/core/Support/OS.cs62
-rw-r--r--src/core/Support/SharpZipLib.cs51
-rw-r--r--src/core/Support/Single.cs131
-rw-r--r--src/core/Support/TextSupport.cs49
-rw-r--r--src/core/Support/ThreadClass.cs315
-rw-r--r--src/core/Support/ThreadLock.cs82
-rw-r--r--src/core/Support/WeakDictionary.cs296
-rw-r--r--src/core/Util/ArrayUtil.cs282
-rw-r--r--src/core/Util/Attribute.cs131
-rw-r--r--src/core/Util/AttributeSource.cs510
-rw-r--r--src/core/Util/AverageGuessMemoryModel.cs90
-rw-r--r--src/core/Util/BitUtil.cs894
-rw-r--r--src/core/Util/BitVector.cs315
-rw-r--r--src/core/Util/Cache/Cache.cs129
-rw-r--r--src/core/Util/Cache/SimpleLRUCache.cs166
-rw-r--r--src/core/Util/Cache/SimpleMapCache.cs141
-rw-r--r--src/core/Util/CloseableThreadLocal.cs205
-rw-r--r--src/core/Util/Constants.cs107
-rw-r--r--src/core/Util/DocIdBitSet.cs87
-rw-r--r--src/core/Util/FieldCacheSanityChecker.cs439
-rw-r--r--src/core/Util/IAttribute.cs27
-rw-r--r--src/core/Util/IdentityDictionary.cs64
-rw-r--r--src/core/Util/IndexableBinaryStringTools.cs342
-rw-r--r--src/core/Util/MapOfSets.cs76
-rw-r--r--src/core/Util/MemoryModel.cs44
-rw-r--r--src/core/Util/NumericUtils.cs488
-rw-r--r--src/core/Util/OpenBitSet.cs944
-rw-r--r--src/core/Util/OpenBitSetDISI.cs112
-rw-r--r--src/core/Util/OpenBitSetIterator.cs233
-rw-r--r--src/core/Util/PriorityQueue.cs280
-rw-r--r--src/core/Util/RamUsageEstimator.cs220
-rw-r--r--src/core/Util/ReaderUtil.cs122
-rw-r--r--src/core/Util/ScorerDocQueue.cs275
-rw-r--r--src/core/Util/SimpleStringInterner.cs95
-rw-r--r--src/core/Util/SmallFloat.cs152
-rw-r--r--src/core/Util/SortedVIntList.cs289
-rw-r--r--src/core/Util/SorterTemplate.cs224
-rw-r--r--src/core/Util/StringHelper.cs89
-rw-r--r--src/core/Util/StringInterner.cs44
-rw-r--r--src/core/Util/ToStringUtils.cs40
-rw-r--r--src/core/Util/UnicodeUtil.cs505
-rw-r--r--src/core/Util/Version.cs86
426 files changed, 100956 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..250cada
--- /dev/null
+++ b/README.md
@@ -0,0 +1,6 @@
+This is a subset of Lucene.Net as used by Mono's Monodoc tool.
+
+This module is a checout of:
+
+ git://github.com/mono/lucene.net.git
+ 88fb67b07621dfed054d8d75fd50672fb26349df
diff --git a/src/core/Analysis/ASCIIFoldingFilter.cs b/src/core/Analysis/ASCIIFoldingFilter.cs
new file mode 100644
index 0000000..6133870
--- /dev/null
+++ b/src/core/Analysis/ASCIIFoldingFilter.cs
@@ -0,0 +1,3285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> This class converts alphabetic, numeric, and symbolic Unicode characters
+ /// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
+ /// block) into their ASCII equivalents, if one exists.
+ ///
+ /// Characters from the following Unicode blocks are converted; however, only
+ /// those characters with reasonable ASCII alternatives are converted:
+ ///
+ /// <list type="bullet">
+ /// <item>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a></item>
+ /// <item>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a></item>
+ /// <item>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a></item>
+ /// <item>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a></item>
+ /// <item>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a></item>
+ /// <item>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a></item>
+ /// <item>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a></item>
+ /// <item>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a></item>
+ /// <item>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a></item>
+ /// <item>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a></item>
+ /// <item>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a></item>
+ /// <item>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a></item>
+ /// <item>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a></item>
+ /// <item>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a></item>
+ /// <item>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a></item>
+ /// <item>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a></item>
+ /// </list>
+ ///
+ /// See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
+ ///
+ /// The set of character conversions supported by this class is a superset of
+ /// those supported by Lucene's <see cref="ISOLatin1AccentFilter" /> which strips
+ /// accents from Latin1 characters. For example, '&#192;' will be replaced by
+ /// 'a'.
+ /// </summary>
+ public sealed class ASCIIFoldingFilter : TokenFilter
+ {
+ public ASCIIFoldingFilter(TokenStream input):base(input)
+ {
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ private char[] output = new char[512];
+ private int outputPos;
+ private ITermAttribute termAtt;
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ char[] buffer = termAtt.TermBuffer();
+ int length = termAtt.TermLength();
+
+ // If no characters actually require rewriting then we
+ // just return token as-is:
+ for (int i = 0; i < length; ++i)
+ {
+ char c = buffer[i];
+ if (c >= '\u0080')
+ {
+ FoldToASCII(buffer, length);
+ termAtt.SetTermBuffer(output, 0, outputPos);
+ break;
+ }
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ /// <summary> Converts characters above ASCII to their ASCII equivalents. For example,
+ /// accents are removed from accented characters.
+ /// </summary>
+ /// <param name="input">The string to fold
+ /// </param>
+ /// <param name="length">The number of characters in the input string
+ /// </param>
+ public void FoldToASCII(char[] input, int length)
+ {
+ // Worst-case length required:
+ int maxSizeNeeded = 4 * length;
+ if (output.Length < maxSizeNeeded)
+ {
+ output = new char[ArrayUtil.GetNextSize(maxSizeNeeded)];
+ }
+
+ outputPos = 0;
+
+ for (int pos = 0; pos < length; ++pos)
+ {
+ char c = input[pos];
+
+ // Quick test: if it's not in range then just keep current character
+ if (c < '\u0080')
+ {
+ output[outputPos++] = c;
+ }
+ else
+ {
+ switch (c)
+ {
+
+ case '\u00C0':
+ // À [LATIN CAPITAL LETTER A WITH GRAVE]
+ case '\u00C1':
+ // � [LATIN CAPITAL LETTER A WITH ACUTE]
+ case '\u00C2':
+ // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+ case '\u00C3':
+ // Ã [LATIN CAPITAL LETTER A WITH TILDE]
+ case '\u00C4':
+ // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
+ case '\u00C5':
+ // Ã… [LATIN CAPITAL LETTER A WITH RING ABOVE]
+ case '\u0100':
+ // Ā [LATIN CAPITAL LETTER A WITH MACRON]
+ case '\u0102':
+ // Ä‚ [LATIN CAPITAL LETTER A WITH BREVE]
+ case '\u0104':
+ // Ä„ [LATIN CAPITAL LETTER A WITH OGONEK]
+ case '\u018F':
+ // � http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
+ case '\u01CD':
+ // � [LATIN CAPITAL LETTER A WITH CARON]
+ case '\u01DE':
+ // Çž [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+ case '\u01E0':
+ // Ç  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+ case '\u01FA':
+ // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+ case '\u0200':
+ // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+ case '\u0202':
+ // È‚ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+ case '\u0226':
+ // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+ case '\u023A':
+ // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
+ case '\u1D00':
+ // á´€ [LATIN LETTER SMALL CAPITAL A]
+ case '\u1E00':
+ // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
+ case '\u1EA0':
+ // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
+ case '\u1EA2':
+ // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+ case '\u1EA4':
+ // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EA6':
+ // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EA8':
+ // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EAA':
+ // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+ case '\u1EAC':
+ // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EAE':
+ // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+ case '\u1EB0':
+ // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+ case '\u1EB2':
+ // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+ case '\u1EB4':
+ // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+ case '\u1EB6':
+ // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+ case '\u24B6':
+ // â’¶ [CIRCLED LATIN CAPITAL LETTER A]
+ case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A]
+ output[outputPos++] = 'A';
+ break;
+
+ case '\u00E0':
+ // à [LATIN SMALL LETTER A WITH GRAVE]
+ case '\u00E1':
+ // á [LATIN SMALL LETTER A WITH ACUTE]
+ case '\u00E2':
+ // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+ case '\u00E3':
+ // ã [LATIN SMALL LETTER A WITH TILDE]
+ case '\u00E4':
+ // ä [LATIN SMALL LETTER A WITH DIAERESIS]
+ case '\u00E5':
+ // å [LATIN SMALL LETTER A WITH RING ABOVE]
+ case '\u0101':
+ // � [LATIN SMALL LETTER A WITH MACRON]
+ case '\u0103':
+ // ă [LATIN SMALL LETTER A WITH BREVE]
+ case '\u0105':
+ // Ä… [LATIN SMALL LETTER A WITH OGONEK]
+ case '\u01CE':
+ // ÇŽ [LATIN SMALL LETTER A WITH CARON]
+ case '\u01DF':
+ // ÇŸ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+ case '\u01E1':
+ // Ç¡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+ case '\u01FB':
+ // Ç» [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+ case '\u0201':
+ // � [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+ case '\u0203':
+ // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
+ case '\u0227':
+ // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
+ case '\u0250':
+ // � [LATIN SMALL LETTER TURNED A]
+ case '\u0259':
+ // É™ [LATIN SMALL LETTER SCHWA]
+ case '\u025A':
+ // Éš [LATIN SMALL LETTER SCHWA WITH HOOK]
+ case '\u1D8F':
+ // � [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+ case '\u1D95':
+ // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+ case '\u1E01':
+ // ạ [LATIN SMALL LETTER A WITH RING BELOW]
+ case '\u1E9A':
+ // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+ case '\u1EA1':
+ // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
+ case '\u1EA3':
+ // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
+ case '\u1EA5':
+ // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EA7':
+ // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EA9':
+ // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EAB':
+ // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+ case '\u1EAD':
+ // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EAF':
+ // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+ case '\u1EB1':
+ // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+ case '\u1EB3':
+ // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+ case '\u1EB5':
+ // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+ case '\u1EB7':
+ // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+ case '\u2090':
+ // � [LATIN SUBSCRIPT SMALL LETTER A]
+ case '\u2094':
+ // �? [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+ case '\u24D0':
+ // � [CIRCLED LATIN SMALL LETTER A]
+ case '\u2C65':
+ // â±¥ [LATIN SMALL LETTER A WITH STROKE]
+ case '\u2C6F':
+ // Ɐ [LATIN CAPITAL LETTER TURNED A]
+ case '\uFF41': // � [FULLWIDTH LATIN SMALL LETTER A]
+ output[outputPos++] = 'a';
+ break;
+
+ case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA]
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'A';
+ break;
+
+ case '\u00C6':
+ // Æ [LATIN CAPITAL LETTER AE]
+ case '\u01E2':
+ // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
+ case '\u01FC':
+ // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
+ case '\u1D01': // á´� [LATIN LETTER SMALL CAPITAL AE]
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'E';
+ break;
+
+ case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO]
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'O';
+ break;
+
+ case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU]
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'U';
+ break;
+
+ case '\uA738':
+ // Ꜹ [LATIN CAPITAL LETTER AV]
+ case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'V';
+ break;
+
+ case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY]
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'Y';
+ break;
+
+ case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'a';
+ output[outputPos++] = ')';
+ break;
+
+ case '\uA733': // ꜳ [LATIN SMALL LETTER AA]
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'a';
+ break;
+
+ case '\u00E6':
+ // æ [LATIN SMALL LETTER AE]
+ case '\u01E3':
+ // ǣ [LATIN SMALL LETTER AE WITH MACRON]
+ case '\u01FD':
+ // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
+ case '\u1D02': // á´‚ [LATIN SMALL LETTER TURNED AE]
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'e';
+ break;
+
+ case '\uA735': // ꜵ [LATIN SMALL LETTER AO]
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'o';
+ break;
+
+ case '\uA737': // ꜷ [LATIN SMALL LETTER AU]
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'u';
+ break;
+
+ case '\uA739':
+ // ꜹ [LATIN SMALL LETTER AV]
+ case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'v';
+ break;
+
+ case '\uA73D': // ꜽ [LATIN SMALL LETTER AY]
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'y';
+ break;
+
+ case '\u0181':
+ // � [LATIN CAPITAL LETTER B WITH HOOK]
+ case '\u0182':
+ // Æ‚ [LATIN CAPITAL LETTER B WITH TOPBAR]
+ case '\u0243':
+ // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
+ case '\u0299':
+ // Ê™ [LATIN LETTER SMALL CAPITAL B]
+ case '\u1D03':
+ // á´ƒ [LATIN LETTER SMALL CAPITAL BARRED B]
+ case '\u1E02':
+ // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+ case '\u1E04':
+ // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
+ case '\u1E06':
+ // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
+ case '\u24B7':
+ // â’· [CIRCLED LATIN CAPITAL LETTER B]
+ case '\uFF22': // ï¼¢ [FULLWIDTH LATIN CAPITAL LETTER B]
+ output[outputPos++] = 'B';
+ break;
+
+ case '\u0180':
+ // ƀ [LATIN SMALL LETTER B WITH STROKE]
+ case '\u0183':
+ // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
+ case '\u0253':
+ // É“ [LATIN SMALL LETTER B WITH HOOK]
+ case '\u1D6C':
+ // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+ case '\u1D80':
+ // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
+ case '\u1E03':
+ // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
+ case '\u1E05':
+ // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
+ case '\u1E07':
+ // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
+ case '\u24D1':
+ // â“‘ [CIRCLED LATIN SMALL LETTER B]
+ case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B]
+ output[outputPos++] = 'b';
+ break;
+
+ case '\u249D': // â’� [PARENTHESIZED LATIN SMALL LETTER B]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'b';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00C7':
+ // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
+ case '\u0106':
+ // Ć [LATIN CAPITAL LETTER C WITH ACUTE]
+ case '\u0108':
+ // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+ case '\u010A':
+ // ÄŠ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
+ case '\u010C':
+ // Č [LATIN CAPITAL LETTER C WITH CARON]
+ case '\u0187':
+ // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
+ case '\u023B':
+ // È» [LATIN CAPITAL LETTER C WITH STROKE]
+ case '\u0297':
+ // Ê— [LATIN LETTER STRETCHED C]
+ case '\u1D04':
+ // á´„ [LATIN LETTER SMALL CAPITAL C]
+ case '\u1E08':
+ // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+ case '\u24B8':
+ // â’¸ [CIRCLED LATIN CAPITAL LETTER C]
+ case '\uFF23': // ï¼£ [FULLWIDTH LATIN CAPITAL LETTER C]
+ output[outputPos++] = 'C';
+ break;
+
+ case '\u00E7':
+ // ç [LATIN SMALL LETTER C WITH CEDILLA]
+ case '\u0107':
+ // ć [LATIN SMALL LETTER C WITH ACUTE]
+ case '\u0109':
+ // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+ case '\u010B':
+ // Ä‹ [LATIN SMALL LETTER C WITH DOT ABOVE]
+ case '\u010D':
+ // � [LATIN SMALL LETTER C WITH CARON]
+ case '\u0188':
+ // ƈ [LATIN SMALL LETTER C WITH HOOK]
+ case '\u023C':
+ // ȼ [LATIN SMALL LETTER C WITH STROKE]
+ case '\u0255':
+ // É• [LATIN SMALL LETTER C WITH CURL]
+ case '\u1E09':
+ // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+ case '\u2184':
+ // ↄ [LATIN SMALL LETTER REVERSED C]
+ case '\u24D2':
+ // â“’ [CIRCLED LATIN SMALL LETTER C]
+ case '\uA73E':
+ // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+ case '\uA73F':
+ // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
+ case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C]
+ output[outputPos++] = 'c';
+ break;
+
+ case '\u249E': // â’ž [PARENTHESIZED LATIN SMALL LETTER C]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'c';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00D0':
+ // � [LATIN CAPITAL LETTER ETH]
+ case '\u010E':
+ // ÄŽ [LATIN CAPITAL LETTER D WITH CARON]
+ case '\u0110':
+ // � [LATIN CAPITAL LETTER D WITH STROKE]
+ case '\u0189':
+ // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
+ case '\u018A':
+ // ÆŠ [LATIN CAPITAL LETTER D WITH HOOK]
+ case '\u018B':
+ // Æ‹ [LATIN CAPITAL LETTER D WITH TOPBAR]
+ case '\u1D05':
+ // á´… [LATIN LETTER SMALL CAPITAL D]
+ case '\u1D06':
+ // á´† [LATIN LETTER SMALL CAPITAL ETH]
+ case '\u1E0A':
+ // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
+ case '\u1E0C':
+ // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
+ case '\u1E0E':
+ // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
+ case '\u1E10':
+ // � [LATIN CAPITAL LETTER D WITH CEDILLA]
+ case '\u1E12':
+ // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+ case '\u24B9':
+ // â’¹ [CIRCLED LATIN CAPITAL LETTER D]
+ case '\uA779':
+ // � [LATIN CAPITAL LETTER INSULAR D]
+ case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D]
+ output[outputPos++] = 'D';
+ break;
+
+ case '\u00F0':
+ // ð [LATIN SMALL LETTER ETH]
+ case '\u010F':
+ // � [LATIN SMALL LETTER D WITH CARON]
+ case '\u0111':
+ // Ä‘ [LATIN SMALL LETTER D WITH STROKE]
+ case '\u018C':
+ // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
+ case '\u0221':
+ // È¡ [LATIN SMALL LETTER D WITH CURL]
+ case '\u0256':
+ // É– [LATIN SMALL LETTER D WITH TAIL]
+ case '\u0257':
+ // É— [LATIN SMALL LETTER D WITH HOOK]
+ case '\u1D6D':
+ // áµ­ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+ case '\u1D81':
+ // � [LATIN SMALL LETTER D WITH PALATAL HOOK]
+ case '\u1D91':
+ // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+ case '\u1E0B':
+ // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
+ case '\u1E0D':
+ // � [LATIN SMALL LETTER D WITH DOT BELOW]
+ case '\u1E0F':
+ // � [LATIN SMALL LETTER D WITH LINE BELOW]
+ case '\u1E11':
+ // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
+ case '\u1E13':
+ // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+ case '\u24D3':
+ // â““ [CIRCLED LATIN SMALL LETTER D]
+ case '\uA77A':
+ // � [LATIN SMALL LETTER INSULAR D]
+ case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D]
+ output[outputPos++] = 'd';
+ break;
+
+ case '\u01C4':
+ // Ç„ [LATIN CAPITAL LETTER DZ WITH CARON]
+ case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ]
+ output[outputPos++] = 'D';
+ output[outputPos++] = 'Z';
+ break;
+
+ case '\u01C5':
+ // Ç… [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+ case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+ output[outputPos++] = 'D';
+ output[outputPos++] = 'z';
+ break;
+
+ case '\u249F': // â’Ÿ [PARENTHESIZED LATIN SMALL LETTER D]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'd';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH]
+ output[outputPos++] = 'd';
+ output[outputPos++] = 'b';
+ break;
+
+ case '\u01C6':
+ // dž [LATIN SMALL LETTER DZ WITH CARON]
+ case '\u01F3':
+ // dz [LATIN SMALL LETTER DZ]
+ case '\u02A3':
+ // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
+ case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+ output[outputPos++] = 'd';
+ output[outputPos++] = 'z';
+ break;
+
+ case '\u00C8':
+ // È [LATIN CAPITAL LETTER E WITH GRAVE]
+ case '\u00C9':
+ // É [LATIN CAPITAL LETTER E WITH ACUTE]
+ case '\u00CA':
+ // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+ case '\u00CB':
+ // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
+ case '\u0112':
+ // Ä’ [LATIN CAPITAL LETTER E WITH MACRON]
+ case '\u0114':
+ // �? [LATIN CAPITAL LETTER E WITH BREVE]
+ case '\u0116':
+ // Ä– [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+ case '\u0118':
+ // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
+ case '\u011A':
+ // Äš [LATIN CAPITAL LETTER E WITH CARON]
+ case '\u018E':
+ // ÆŽ [LATIN CAPITAL LETTER REVERSED E]
+ case '\u0190':
+ // � [LATIN CAPITAL LETTER OPEN E]
+ case '\u0204':
+ // È„ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+ case '\u0206':
+ // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+ case '\u0228':
+ // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
+ case '\u0246':
+ // Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
+ case '\u1D07':
+ // á´‡ [LATIN LETTER SMALL CAPITAL E]
+ case '\u1E14':
+ // �? [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+ case '\u1E16':
+ // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+ case '\u1E18':
+ // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+ case '\u1E1A':
+ // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+ case '\u1E1C':
+ // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+ case '\u1EB8':
+ // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
+ case '\u1EBA':
+ // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+ case '\u1EBC':
+ // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
+ case '\u1EBE':
+ // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EC0':
+ // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EC2':
+ // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EC4':
+ // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+ case '\u1EC6':
+ // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u24BA':
+ // â’º [CIRCLED LATIN CAPITAL LETTER E]
+ case '\u2C7B':
+ // â±» [LATIN LETTER SMALL CAPITAL TURNED E]
+ case '\uFF25': // ï¼¥ [FULLWIDTH LATIN CAPITAL LETTER E]
+ output[outputPos++] = 'E';
+ break;
+
+ case '\u00E8':
+ // è [LATIN SMALL LETTER E WITH GRAVE]
+ case '\u00E9':
+ // é [LATIN SMALL LETTER E WITH ACUTE]
+ case '\u00EA':
+ // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+ case '\u00EB':
+ // ë [LATIN SMALL LETTER E WITH DIAERESIS]
+ case '\u0113':
+ // Ä“ [LATIN SMALL LETTER E WITH MACRON]
+ case '\u0115':
+ // Ä• [LATIN SMALL LETTER E WITH BREVE]
+ case '\u0117':
+ // Ä— [LATIN SMALL LETTER E WITH DOT ABOVE]
+ case '\u0119':
+ // Ä™ [LATIN SMALL LETTER E WITH OGONEK]
+ case '\u011B':
+ // Ä› [LATIN SMALL LETTER E WITH CARON]
+ case '\u01DD':
+ // � [LATIN SMALL LETTER TURNED E]
+ case '\u0205':
+ // È… [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+ case '\u0207':
+ // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
+ case '\u0229':
+ // È© [LATIN SMALL LETTER E WITH CEDILLA]
+ case '\u0247':
+ // ɇ [LATIN SMALL LETTER E WITH STROKE]
+ case '\u0258':
+ // ɘ [LATIN SMALL LETTER REVERSED E]
+ case '\u025B':
+ // É› [LATIN SMALL LETTER OPEN E]
+ case '\u025C':
+ // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
+ case '\u025D':
+ // � [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+ case '\u025E':
+ // Éž [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+ case '\u029A':
+ // Êš [LATIN SMALL LETTER CLOSED OPEN E]
+ case '\u1D08':
+ // á´ˆ [LATIN SMALL LETTER TURNED OPEN E]
+ case '\u1D92':
+ // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+ case '\u1D93':
+ // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+ case '\u1D94':
+ // �? [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+ case '\u1E15':
+ // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+ case '\u1E17':
+ // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+ case '\u1E19':
+ // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+ case '\u1E1B':
+ // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
+ case '\u1E1D':
+ // � [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+ case '\u1EB9':
+ // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
+ case '\u1EBB':
+ // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
+ case '\u1EBD':
+ // ẽ [LATIN SMALL LETTER E WITH TILDE]
+ case '\u1EBF':
+ // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+ case '\u1EC1':
+ // � [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+ case '\u1EC3':
+ // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1EC5':
+ // á»… [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+ case '\u1EC7':
+ // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u2091':
+ // â‚‘ [LATIN SUBSCRIPT SMALL LETTER E]
+ case '\u24D4':
+ // �? [CIRCLED LATIN SMALL LETTER E]
+ case '\u2C78':
+ // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
+ case '\uFF45': // ï½… [FULLWIDTH LATIN SMALL LETTER E]
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u24A0': // â’  [PARENTHESIZED LATIN SMALL LETTER E]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'e';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0191':
+ // Æ‘ [LATIN CAPITAL LETTER F WITH HOOK]
+ case '\u1E1E':
+ // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+ case '\u24BB':
+ // â’» [CIRCLED LATIN CAPITAL LETTER F]
+ case '\uA730':
+ // ꜰ [LATIN LETTER SMALL CAPITAL F]
+ case '\uA77B':
+ // � [LATIN CAPITAL LETTER INSULAR F]
+ case '\uA7FB':
+ // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
+ case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F]
+ output[outputPos++] = 'F';
+ break;
+
+ case '\u0192':
+ // Æ’ [LATIN SMALL LETTER F WITH HOOK]
+ case '\u1D6E':
+ // áµ® [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+ case '\u1D82':
+ // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
+ case '\u1E1F':
+ // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
+ case '\u1E9B':
+ // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+ case '\u24D5':
+ // â“• [CIRCLED LATIN SMALL LETTER F]
+ case '\uA77C':
+ // � [LATIN SMALL LETTER INSULAR F]
+ case '\uFF46': // f [FULLWIDTH LATIN SMALL LETTER F]
+ output[outputPos++] = 'f';
+ break;
+
+ case '\u24A1': // â’¡ [PARENTHESIZED LATIN SMALL LETTER F]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'f';
+ output[outputPos++] = ')';
+ break;
+
+ case '\uFB00': // ff [LATIN SMALL LIGATURE FF]
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'f';
+ break;
+
+ case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI]
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'i';
+ break;
+
+ case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL]
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'l';
+ break;
+
+ case '\uFB01': // � [LATIN SMALL LIGATURE FI]
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'i';
+ break;
+
+ case '\uFB02': // fl [LATIN SMALL LIGATURE FL]
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'l';
+ break;
+
+ case '\u011C':
+ // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+ case '\u011E':
+ // Äž [LATIN CAPITAL LETTER G WITH BREVE]
+ case '\u0120':
+ // Ä  [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+ case '\u0122':
+ // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
+ case '\u0193':
+ // Æ“ [LATIN CAPITAL LETTER G WITH HOOK]
+ case '\u01E4':
+ // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
+ case '\u01E5':
+ // ǥ [LATIN SMALL LETTER G WITH STROKE]
+ case '\u01E6':
+ // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
+ case '\u01E7':
+ // ǧ [LATIN SMALL LETTER G WITH CARON]
+ case '\u01F4':
+ // Ç´ [LATIN CAPITAL LETTER G WITH ACUTE]
+ case '\u0262':
+ // ɢ [LATIN LETTER SMALL CAPITAL G]
+ case '\u029B':
+ // Ê› [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+ case '\u1E20':
+ // Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
+ case '\u24BC':
+ // â’¼ [CIRCLED LATIN CAPITAL LETTER G]
+ case '\uA77D':
+ // � [LATIN CAPITAL LETTER INSULAR G]
+ case '\uA77E':
+ // � [LATIN CAPITAL LETTER TURNED INSULAR G]
+ case '\uFF27': // G [FULLWIDTH LATIN CAPITAL LETTER G]
+ output[outputPos++] = 'G';
+ break;
+
+ case '\u011D':
+ // � [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+ case '\u011F':
+ // ÄŸ [LATIN SMALL LETTER G WITH BREVE]
+ case '\u0121':
+ // Ä¡ [LATIN SMALL LETTER G WITH DOT ABOVE]
+ case '\u0123':
+ // ģ [LATIN SMALL LETTER G WITH CEDILLA]
+ case '\u01F5':
+ // ǵ [LATIN SMALL LETTER G WITH ACUTE]
+ case '\u0260':
+ // É  [LATIN SMALL LETTER G WITH HOOK]
+ case '\u0261':
+ // É¡ [LATIN SMALL LETTER SCRIPT G]
+ case '\u1D77':
+ // áµ· [LATIN SMALL LETTER TURNED G]
+ case '\u1D79':
+ // áµ¹ [LATIN SMALL LETTER INSULAR G]
+ case '\u1D83':
+ // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
+ case '\u1E21':
+ // ḡ [LATIN SMALL LETTER G WITH MACRON]
+ case '\u24D6':
+ // â“– [CIRCLED LATIN SMALL LETTER G]
+ case '\uA77F':
+ // � [LATIN SMALL LETTER TURNED INSULAR G]
+ case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G]
+ output[outputPos++] = 'g';
+ break;
+
+ case '\u24A2': // â’¢ [PARENTHESIZED LATIN SMALL LETTER G]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'g';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0124':
+ // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+ case '\u0126':
+ // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
+ case '\u021E':
+ // Èž [LATIN CAPITAL LETTER H WITH CARON]
+ case '\u029C':
+ // ʜ [LATIN LETTER SMALL CAPITAL H]
+ case '\u1E22':
+ // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+ case '\u1E24':
+ // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
+ case '\u1E26':
+ // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
+ case '\u1E28':
+ // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
+ case '\u1E2A':
+ // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+ case '\u24BD':
+ // â’½ [CIRCLED LATIN CAPITAL LETTER H]
+ case '\u2C67':
+ // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
+ case '\u2C75':
+ // â±µ [LATIN CAPITAL LETTER HALF H]
+ case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H]
+ output[outputPos++] = 'H';
+ break;
+
+ case '\u0125':
+ // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+ case '\u0127':
+ // ħ [LATIN SMALL LETTER H WITH STROKE]
+ case '\u021F':
+ // ÈŸ [LATIN SMALL LETTER H WITH CARON]
+ case '\u0265':
+ // ɥ [LATIN SMALL LETTER TURNED H]
+ case '\u0266':
+ // ɦ [LATIN SMALL LETTER H WITH HOOK]
+ case '\u02AE':
+ // Ê® [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+ case '\u02AF':
+ // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+ case '\u1E23':
+ // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
+ case '\u1E25':
+ // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
+ case '\u1E27':
+ // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
+ case '\u1E29':
+ // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
+ case '\u1E2B':
+ // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
+ case '\u1E96':
+ // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
+ case '\u24D7':
+ // â“— [CIRCLED LATIN SMALL LETTER H]
+ case '\u2C68':
+ // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
+ case '\u2C76':
+ // ⱶ [LATIN SMALL LETTER HALF H]
+ case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H]
+ output[outputPos++] = 'h';
+ break;
+
+ case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
+ output[outputPos++] = 'H';
+ output[outputPos++] = 'V';
+ break;
+
+ case '\u24A3': // â’£ [PARENTHESIZED LATIN SMALL LETTER H]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'h';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0195': // Æ• [LATIN SMALL LETTER HV]
+ output[outputPos++] = 'h';
+ output[outputPos++] = 'v';
+ break;
+
+ case '\u00CC':
+ // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
+ case '\u00CD':
+ // � [LATIN CAPITAL LETTER I WITH ACUTE]
+ case '\u00CE':
+ // ÃŽ [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+ case '\u00CF':
+ // � [LATIN CAPITAL LETTER I WITH DIAERESIS]
+ case '\u0128':
+ // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
+ case '\u012A':
+ // Ī [LATIN CAPITAL LETTER I WITH MACRON]
+ case '\u012C':
+ // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
+ case '\u012E':
+ // Ä® [LATIN CAPITAL LETTER I WITH OGONEK]
+ case '\u0130':
+ // Ä° [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+ case '\u0196':
+ // Æ– [LATIN CAPITAL LETTER IOTA]
+ case '\u0197':
+ // Æ— [LATIN CAPITAL LETTER I WITH STROKE]
+ case '\u01CF':
+ // � [LATIN CAPITAL LETTER I WITH CARON]
+ case '\u0208':
+ // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+ case '\u020A':
+ // ÈŠ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+ case '\u026A':
+ // ɪ [LATIN LETTER SMALL CAPITAL I]
+ case '\u1D7B':
+ // áµ» [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+ case '\u1E2C':
+ // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+ case '\u1E2E':
+ // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+ case '\u1EC8':
+ // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+ case '\u1ECA':
+ // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
+ case '\u24BE':
+ // â’¾ [CIRCLED LATIN CAPITAL LETTER I]
+ case '\uA7FE':
+ // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
+ case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I]
+ output[outputPos++] = 'I';
+ break;
+
+ case '\u00EC':
+ // ì [LATIN SMALL LETTER I WITH GRAVE]
+ case '\u00ED':
+ // í [LATIN SMALL LETTER I WITH ACUTE]
+ case '\u00EE':
+ // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+ case '\u00EF':
+ // ï [LATIN SMALL LETTER I WITH DIAERESIS]
+ case '\u0129':
+ // Ä© [LATIN SMALL LETTER I WITH TILDE]
+ case '\u012B':
+ // Ä« [LATIN SMALL LETTER I WITH MACRON]
+ case '\u012D':
+ // Ä­ [LATIN SMALL LETTER I WITH BREVE]
+ case '\u012F':
+ // į [LATIN SMALL LETTER I WITH OGONEK]
+ case '\u0131':
+ // ı [LATIN SMALL LETTER DOTLESS I]
+ case '\u01D0':
+ // � [LATIN SMALL LETTER I WITH CARON]
+ case '\u0209':
+ // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+ case '\u020B':
+ // È‹ [LATIN SMALL LETTER I WITH INVERTED BREVE]
+ case '\u0268':
+ // ɨ [LATIN SMALL LETTER I WITH STROKE]
+ case '\u1D09':
+ // á´‰ [LATIN SMALL LETTER TURNED I]
+ case '\u1D62':
+ // áµ¢ [LATIN SUBSCRIPT SMALL LETTER I]
+ case '\u1D7C':
+ // áµ¼ [LATIN SMALL LETTER IOTA WITH STROKE]
+ case '\u1D96':
+ // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+ case '\u1E2D':
+ // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
+ case '\u1E2F':
+ // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+ case '\u1EC9':
+ // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
+ case '\u1ECB':
+ // ị [LATIN SMALL LETTER I WITH DOT BELOW]
+ case '\u2071':
+ // � [SUPERSCRIPT LATIN SMALL LETTER I]
+ case '\u24D8':
+ // ⓘ [CIRCLED LATIN SMALL LETTER I]
+ case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I]
+ output[outputPos++] = 'i';
+ break;
+
+ case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ]
+ output[outputPos++] = 'I';
+ output[outputPos++] = 'J';
+ break;
+
+ case '\u24A4': // â’¤ [PARENTHESIZED LATIN SMALL LETTER I]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'i';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0133': // ij [LATIN SMALL LIGATURE IJ]
+ output[outputPos++] = 'i';
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u0134':
+ // Ä´ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+ case '\u0248':
+ // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
+ case '\u1D0A':
+ // á´Š [LATIN LETTER SMALL CAPITAL J]
+ case '\u24BF':
+ // â’¿ [CIRCLED LATIN CAPITAL LETTER J]
+ case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J]
+ output[outputPos++] = 'J';
+ break;
+
+ case '\u0135':
+ // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+ case '\u01F0':
+ // Ç° [LATIN SMALL LETTER J WITH CARON]
+ case '\u0237':
+ // È· [LATIN SMALL LETTER DOTLESS J]
+ case '\u0249':
+ // ɉ [LATIN SMALL LETTER J WITH STROKE]
+ case '\u025F':
+ // ÉŸ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+ case '\u0284':
+ // Ê„ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+ case '\u029D':
+ // � [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+ case '\u24D9':
+ // â“™ [CIRCLED LATIN SMALL LETTER J]
+ case '\u2C7C':
+ // â±¼ [LATIN SUBSCRIPT SMALL LETTER J]
+ case '\uFF4A': // j [FULLWIDTH LATIN SMALL LETTER J]
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u24A5': // â’¥ [PARENTHESIZED LATIN SMALL LETTER J]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'j';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0136':
+ // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
+ case '\u0198':
+ // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
+ case '\u01E8':
+ // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
+ case '\u1D0B':
+ // á´‹ [LATIN LETTER SMALL CAPITAL K]
+ case '\u1E30':
+ // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
+ case '\u1E32':
+ // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
+ case '\u1E34':
+ // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
+ case '\u24C0':
+ // â“€ [CIRCLED LATIN CAPITAL LETTER K]
+ case '\u2C69':
+ // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
+ case '\uA740':
+ // � [LATIN CAPITAL LETTER K WITH STROKE]
+ case '\uA742':
+ // � [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+ case '\uA744':
+ // � [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+ case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K]
+ output[outputPos++] = 'K';
+ break;
+
+ case '\u0137':
+ // Ä· [LATIN SMALL LETTER K WITH CEDILLA]
+ case '\u0199':
+ // Æ™ [LATIN SMALL LETTER K WITH HOOK]
+ case '\u01E9':
+ // Ç© [LATIN SMALL LETTER K WITH CARON]
+ case '\u029E':
+ // Êž [LATIN SMALL LETTER TURNED K]
+ case '\u1D84':
+ // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
+ case '\u1E31':
+ // ḱ [LATIN SMALL LETTER K WITH ACUTE]
+ case '\u1E33':
+ // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
+ case '\u1E35':
+ // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
+ case '\u24DA':
+ // â“š [CIRCLED LATIN SMALL LETTER K]
+ case '\u2C6A':
+ // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
+ case '\uA741':
+ // � [LATIN SMALL LETTER K WITH STROKE]
+ case '\uA743':
+ // � [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+ case '\uA745':
+ // � [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+ case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K]
+ output[outputPos++] = 'k';
+ break;
+
+ case '\u24A6': // â’¦ [PARENTHESIZED LATIN SMALL LETTER K]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'k';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0139':
+ // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
+ case '\u013B':
+ // Ä» [LATIN CAPITAL LETTER L WITH CEDILLA]
+ case '\u013D':
+ // Ľ [LATIN CAPITAL LETTER L WITH CARON]
+ case '\u013F':
+ // Ä¿ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+ case '\u0141':
+ // � [LATIN CAPITAL LETTER L WITH STROKE]
+ case '\u023D':
+ // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
+ case '\u029F':
+ // ÊŸ [LATIN LETTER SMALL CAPITAL L]
+ case '\u1D0C':
+ // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+ case '\u1E36':
+ // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
+ case '\u1E38':
+ // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+ case '\u1E3A':
+ // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
+ case '\u1E3C':
+ // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+ case '\u24C1':
+ // � [CIRCLED LATIN CAPITAL LETTER L]
+ case '\u2C60':
+ // â±  [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+ case '\u2C62':
+ // â±¢ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+ case '\uA746':
+ // � [LATIN CAPITAL LETTER BROKEN L]
+ case '\uA748':
+ // � [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+ case '\uA780':
+ // Ꞁ [LATIN CAPITAL LETTER TURNED L]
+ case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L]
+ output[outputPos++] = 'L';
+ break;
+
+ case '\u013A':
+ // ĺ [LATIN SMALL LETTER L WITH ACUTE]
+ case '\u013C':
+ // ļ [LATIN SMALL LETTER L WITH CEDILLA]
+ case '\u013E':
+ // ľ [LATIN SMALL LETTER L WITH CARON]
+ case '\u0140':
+ // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
+ case '\u0142':
+ // Å‚ [LATIN SMALL LETTER L WITH STROKE]
+ case '\u019A':
+ // Æš [LATIN SMALL LETTER L WITH BAR]
+ case '\u0234':
+ // È´ [LATIN SMALL LETTER L WITH CURL]
+ case '\u026B':
+ // É« [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+ case '\u026C':
+ // ɬ [LATIN SMALL LETTER L WITH BELT]
+ case '\u026D':
+ // É­ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+ case '\u1D85':
+ // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
+ case '\u1E37':
+ // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
+ case '\u1E39':
+ // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+ case '\u1E3B':
+ // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
+ case '\u1E3D':
+ // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+ case '\u24DB':
+ // â“› [CIRCLED LATIN SMALL LETTER L]
+ case '\u2C61':
+ // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
+ case '\uA747':
+ // � [LATIN SMALL LETTER BROKEN L]
+ case '\uA749':
+ // � [LATIN SMALL LETTER L WITH HIGH STROKE]
+ case '\uA781':
+ // � [LATIN SMALL LETTER TURNED L]
+ case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L]
+ output[outputPos++] = 'l';
+ break;
+
+ case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ]
+ output[outputPos++] = 'L';
+ output[outputPos++] = 'J';
+ break;
+
+ case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+ output[outputPos++] = 'L';
+ output[outputPos++] = 'L';
+ break;
+
+ case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+ output[outputPos++] = 'L';
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u24A7': // â’§ [PARENTHESIZED LATIN SMALL LETTER L]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'l';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u01C9': // lj [LATIN SMALL LETTER LJ]
+ output[outputPos++] = 'l';
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u1EFB': // á»» [LATIN SMALL LETTER MIDDLE-WELSH LL]
+ output[outputPos++] = 'l';
+ output[outputPos++] = 'l';
+ break;
+
+ case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH]
+ output[outputPos++] = 'l';
+ output[outputPos++] = 's';
+ break;
+
+ case '\u02AB': // Ê« [LATIN SMALL LETTER LZ DIGRAPH]
+ output[outputPos++] = 'l';
+ output[outputPos++] = 'z';
+ break;
+
+ case '\u019C':
+ // Ɯ [LATIN CAPITAL LETTER TURNED M]
+ case '\u1D0D':
+ // á´� [LATIN LETTER SMALL CAPITAL M]
+ case '\u1E3E':
+ // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
+ case '\u1E40':
+ // á¹€ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+ case '\u1E42':
+ // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
+ case '\u24C2':
+ // â“‚ [CIRCLED LATIN CAPITAL LETTER M]
+ case '\u2C6E':
+ // â±® [LATIN CAPITAL LETTER M WITH HOOK]
+ case '\uA7FD':
+ // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
+ case '\uA7FF':
+ // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+ case '\uFF2D': // ï¼­ [FULLWIDTH LATIN CAPITAL LETTER M]
+ output[outputPos++] = 'M';
+ break;
+
+ case '\u026F':
+ // ɯ [LATIN SMALL LETTER TURNED M]
+ case '\u0270':
+ // É° [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+ case '\u0271':
+ // ɱ [LATIN SMALL LETTER M WITH HOOK]
+ case '\u1D6F':
+ // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+ case '\u1D86':
+ // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
+ case '\u1E3F':
+ // ḿ [LATIN SMALL LETTER M WITH ACUTE]
+ case '\u1E41':
+ // � [LATIN SMALL LETTER M WITH DOT ABOVE]
+ case '\u1E43':
+ // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
+ case '\u24DC':
+ // ⓜ [CIRCLED LATIN SMALL LETTER M]
+ case '\uFF4D': // � [FULLWIDTH LATIN SMALL LETTER M]
+ output[outputPos++] = 'm';
+ break;
+
+ case '\u24A8': // â’¨ [PARENTHESIZED LATIN SMALL LETTER M]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'm';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00D1':
+ // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
+ case '\u0143':
+ // Ã…Æ’ [LATIN CAPITAL LETTER N WITH ACUTE]
+ case '\u0145':
+ // Å… [LATIN CAPITAL LETTER N WITH CEDILLA]
+ case '\u0147':
+ // Ň [LATIN CAPITAL LETTER N WITH CARON]
+ case '\u014A':
+ // Ã…Å  http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
+ case '\u019D':
+ // � [LATIN CAPITAL LETTER N WITH LEFT HOOK]
+ case '\u01F8':
+ // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
+ case '\u0220':
+ // È  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
+ case '\u0274':
+ // É´ [LATIN LETTER SMALL CAPITAL N]
+ case '\u1D0E':
+ // á´Ž [LATIN LETTER SMALL CAPITAL REVERSED N]
+ case '\u1E44':
+ // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
+ case '\u1E46':
+ // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
+ case '\u1E48':
+ // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
+ case '\u1E4A':
+ // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
+ case '\u24C3':
+ // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
+ case '\uFF2E': // ï¼® [FULLWIDTH LATIN CAPITAL LETTER N]
+ output[outputPos++] = 'N';
+ break;
+
+ case '\u00F1':
+ // ñ [LATIN SMALL LETTER N WITH TILDE]
+ case '\u0144':
+ // Å„ [LATIN SMALL LETTER N WITH ACUTE]
+ case '\u0146':
+ // ņ [LATIN SMALL LETTER N WITH CEDILLA]
+ case '\u0148':
+ // ň [LATIN SMALL LETTER N WITH CARON]
+ case '\u0149':
+ // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
+ case '\u014B':
+ // Å‹ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
+ case '\u019E':
+ // Æž [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
+ case '\u01F9':
+ // ǹ [LATIN SMALL LETTER N WITH GRAVE]
+ case '\u0235':
+ // ȵ [LATIN SMALL LETTER N WITH CURL]
+ case '\u0272':
+ // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
+ case '\u0273':
+ // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
+ case '\u1D70':
+ // áµ° [LATIN SMALL LETTER N WITH MIDDLE TILDE]
+ case '\u1D87':
+ // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
+ case '\u1E45':
+ // á¹… [LATIN SMALL LETTER N WITH DOT ABOVE]
+ case '\u1E47':
+ // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
+ case '\u1E49':
+ // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
+ case '\u1E4B':
+ // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
+ case '\u207F':
+ // � [SUPERSCRIPT LATIN SMALL LETTER N]
+ case '\u24DD':
+ // � [CIRCLED LATIN SMALL LETTER N]
+ case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N]
+ output[outputPos++] = 'n';
+ break;
+
+ case '\u01CA': // ÇŠ [LATIN CAPITAL LETTER NJ]
+ output[outputPos++] = 'N';
+ output[outputPos++] = 'J';
+ break;
+
+ case '\u01CB': // Ç‹ [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
+ output[outputPos++] = 'N';
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u24A9': // â’© [PARENTHESIZED LATIN SMALL LETTER N]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'n';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u01CC': // nj [LATIN SMALL LETTER NJ]
+ output[outputPos++] = 'n';
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u00D2':
+ // Ã’ [LATIN CAPITAL LETTER O WITH GRAVE]
+ case '\u00D3':
+ // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
+ case '\u00D4':
+ // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
+ case '\u00D5':
+ // Õ [LATIN CAPITAL LETTER O WITH TILDE]
+ case '\u00D6':
+ // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
+ case '\u00D8':
+ // Ø [LATIN CAPITAL LETTER O WITH STROKE]
+ case '\u014C':
+ // Ã…Å’ [LATIN CAPITAL LETTER O WITH MACRON]
+ case '\u014E':
+ // ÅŽ [LATIN CAPITAL LETTER O WITH BREVE]
+ case '\u0150':
+ // � [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
+ case '\u0186':
+ // Ɔ [LATIN CAPITAL LETTER OPEN O]
+ case '\u019F':
+ // ÆŸ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
+ case '\u01A0':
+ // Æ  [LATIN CAPITAL LETTER O WITH HORN]
+ case '\u01D1':
+ // Ç‘ [LATIN CAPITAL LETTER O WITH CARON]
+ case '\u01EA':
+ // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
+ case '\u01EC':
+ // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
+ case '\u01FE':
+ // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
+ case '\u020C':
+ // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
+ case '\u020E':
+ // ÈŽ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
+ case '\u022A':
+ // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
+ case '\u022C':
+ // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
+ case '\u022E':
+ // È® [LATIN CAPITAL LETTER O WITH DOT ABOVE]
+ case '\u0230':
+ // È° [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
+ case '\u1D0F':
+ // á´� [LATIN LETTER SMALL CAPITAL O]
+ case '\u1D10':
+ // á´� [LATIN LETTER SMALL CAPITAL OPEN O]
+ case '\u1E4C':
+ // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
+ case '\u1E4E':
+ // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
+ case '\u1E50':
+ // � [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
+ case '\u1E52':
+ // á¹’ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
+ case '\u1ECC':
+ // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
+ case '\u1ECE':
+ // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
+ case '\u1ED0':
+ // � [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
+ case '\u1ED2':
+ // á»’ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
+ case '\u1ED4':
+ // �? [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1ED6':
+ // á»– [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
+ case '\u1ED8':
+ // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EDA':
+ // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
+ case '\u1EDC':
+ // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
+ case '\u1EDE':
+ // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
+ case '\u1EE0':
+ // á»  [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
+ case '\u1EE2':
+ // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
+ case '\u24C4':
+ // â“„ [CIRCLED LATIN CAPITAL LETTER O]
+ case '\uA74A':
+ // � [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
+ case '\uA74C':
+ // � [LATIN CAPITAL LETTER O WITH LOOP]
+ case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O]
+ output[outputPos++] = 'O';
+ break;
+
+ case '\u00F2':
+ // ò [LATIN SMALL LETTER O WITH GRAVE]
+ case '\u00F3':
+ // ó [LATIN SMALL LETTER O WITH ACUTE]
+ case '\u00F4':
+ // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
+ case '\u00F5':
+ // õ [LATIN SMALL LETTER O WITH TILDE]
+ case '\u00F6':
+ // ö [LATIN SMALL LETTER O WITH DIAERESIS]
+ case '\u00F8':
+ // ø [LATIN SMALL LETTER O WITH STROKE]
+ case '\u014D':
+ // � [LATIN SMALL LETTER O WITH MACRON]
+ case '\u014F':
+ // � [LATIN SMALL LETTER O WITH BREVE]
+ case '\u0151':
+ // Å‘ [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
+ case '\u01A1':
+ // Æ¡ [LATIN SMALL LETTER O WITH HORN]
+ case '\u01D2':
+ // Ç’ [LATIN SMALL LETTER O WITH CARON]
+ case '\u01EB':
+ // Ç« [LATIN SMALL LETTER O WITH OGONEK]
+ case '\u01ED':
+ // Ç­ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
+ case '\u01FF':
+ // Ç¿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
+ case '\u020D':
+ // � [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
+ case '\u020F':
+ // � [LATIN SMALL LETTER O WITH INVERTED BREVE]
+ case '\u022B':
+ // È« [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
+ case '\u022D':
+ // È­ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
+ case '\u022F':
+ // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
+ case '\u0231':
+ // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
+ case '\u0254':
+ // �? [LATIN SMALL LETTER OPEN O]
+ case '\u0275':
+ // ɵ [LATIN SMALL LETTER BARRED O]
+ case '\u1D16':
+ // á´– [LATIN SMALL LETTER TOP HALF O]
+ case '\u1D17':
+ // á´— [LATIN SMALL LETTER BOTTOM HALF O]
+ case '\u1D97':
+ // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
+ case '\u1E4D':
+ // � [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
+ case '\u1E4F':
+ // � [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
+ case '\u1E51':
+ // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
+ case '\u1E53':
+ // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
+ case '\u1ECD':
+ // � [LATIN SMALL LETTER O WITH DOT BELOW]
+ case '\u1ECF':
+ // � [LATIN SMALL LETTER O WITH HOOK ABOVE]
+ case '\u1ED1':
+ // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
+ case '\u1ED3':
+ // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
+ case '\u1ED5':
+ // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+ case '\u1ED7':
+ // á»— [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
+ case '\u1ED9':
+ // á»™ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+ case '\u1EDB':
+ // á»› [LATIN SMALL LETTER O WITH HORN AND ACUTE]
+ case '\u1EDD':
+ // � [LATIN SMALL LETTER O WITH HORN AND GRAVE]
+ case '\u1EDF':
+ // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
+ case '\u1EE1':
+ // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
+ case '\u1EE3':
+ // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
+ case '\u2092':
+ // â‚’ [LATIN SUBSCRIPT SMALL LETTER O]
+ case '\u24DE':
+ // â“ž [CIRCLED LATIN SMALL LETTER O]
+ case '\u2C7A':
+ // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
+ case '\uA74B':
+ // � [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
+ case '\uA74D':
+ // � [LATIN SMALL LETTER O WITH LOOP]
+ case '\uFF4F': // � [FULLWIDTH LATIN SMALL LETTER O]
+ output[outputPos++] = 'o';
+ break;
+
+ case '\u0152':
+ // Å’ [LATIN CAPITAL LIGATURE OE]
+ case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE]
+ output[outputPos++] = 'O';
+ output[outputPos++] = 'E';
+ break;
+
+ case '\uA74E': // � [LATIN CAPITAL LETTER OO]
+ output[outputPos++] = 'O';
+ output[outputPos++] = 'O';
+ break;
+
+ case '\u0222':
+ // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
+ case '\u1D15': // á´• [LATIN LETTER SMALL CAPITAL OU]
+ output[outputPos++] = 'O';
+ output[outputPos++] = 'U';
+ break;
+
+ case '\u24AA': // â’ª [PARENTHESIZED LATIN SMALL LETTER O]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'o';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0153':
+ // Å“ [LATIN SMALL LIGATURE OE]
+ case '\u1D14': // á´�? [LATIN SMALL LETTER TURNED OE]
+ output[outputPos++] = 'o';
+ output[outputPos++] = 'e';
+ break;
+
+ case '\uA74F': // � [LATIN SMALL LETTER OO]
+ output[outputPos++] = 'o';
+ output[outputPos++] = 'o';
+ break;
+
+ case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
+ output[outputPos++] = 'o';
+ output[outputPos++] = 'u';
+ break;
+
+ case '\u01A4':
+ // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
+ case '\u1D18':
+ // á´˜ [LATIN LETTER SMALL CAPITAL P]
+ case '\u1E54':
+ // �? [LATIN CAPITAL LETTER P WITH ACUTE]
+ case '\u1E56':
+ // á¹– [LATIN CAPITAL LETTER P WITH DOT ABOVE]
+ case '\u24C5':
+ // â“… [CIRCLED LATIN CAPITAL LETTER P]
+ case '\u2C63':
+ // â±£ [LATIN CAPITAL LETTER P WITH STROKE]
+ case '\uA750':
+ // � [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
+ case '\uA752':
+ // � [LATIN CAPITAL LETTER P WITH FLOURISH]
+ case '\uA754':
+ // �? [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
+ case '\uFF30': // ï¼° [FULLWIDTH LATIN CAPITAL LETTER P]
+ output[outputPos++] = 'P';
+ break;
+
+ case '\u01A5':
+ // ƥ [LATIN SMALL LETTER P WITH HOOK]
+ case '\u1D71':
+ // áµ± [LATIN SMALL LETTER P WITH MIDDLE TILDE]
+ case '\u1D7D':
+ // áµ½ [LATIN SMALL LETTER P WITH STROKE]
+ case '\u1D88':
+ // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
+ case '\u1E55':
+ // ṕ [LATIN SMALL LETTER P WITH ACUTE]
+ case '\u1E57':
+ // á¹— [LATIN SMALL LETTER P WITH DOT ABOVE]
+ case '\u24DF':
+ // â“Ÿ [CIRCLED LATIN SMALL LETTER P]
+ case '\uA751':
+ // � [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
+ case '\uA753':
+ // � [LATIN SMALL LETTER P WITH FLOURISH]
+ case '\uA755':
+ // � [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
+ case '\uA7FC':
+ // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
+ case '\uFF50': // � [FULLWIDTH LATIN SMALL LETTER P]
+ output[outputPos++] = 'p';
+ break;
+
+ case '\u24AB': // â’« [PARENTHESIZED LATIN SMALL LETTER P]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'p';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u024A':
+ // ÉŠ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
+ case '\u24C6':
+ // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
+ case '\uA756':
+ // � [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
+ case '\uA758':
+ // � [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
+ case '\uFF31': // ï¼± [FULLWIDTH LATIN CAPITAL LETTER Q]
+ output[outputPos++] = 'Q';
+ break;
+
+ case '\u0138':
+ // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
+ case '\u024B':
+ // É‹ [LATIN SMALL LETTER Q WITH HOOK TAIL]
+ case '\u02A0':
+ // Ê  [LATIN SMALL LETTER Q WITH HOOK]
+ case '\u24E0':
+ // â“  [CIRCLED LATIN SMALL LETTER Q]
+ case '\uA757':
+ // � [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
+ case '\uA759':
+ // � [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
+ case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q]
+ output[outputPos++] = 'q';
+ break;
+
+ case '\u24AC': // â’¬ [PARENTHESIZED LATIN SMALL LETTER Q]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'q';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH]
+ output[outputPos++] = 'q';
+ output[outputPos++] = 'p';
+ break;
+
+ case '\u0154':
+ // �? [LATIN CAPITAL LETTER R WITH ACUTE]
+ case '\u0156':
+ // Å– [LATIN CAPITAL LETTER R WITH CEDILLA]
+ case '\u0158':
+ // Ř [LATIN CAPITAL LETTER R WITH CARON]
+ case '\u0210':
+ // È’ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
+ case '\u0212':
+ // È’ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
+ case '\u024C':
+ // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
+ case '\u0280':
+ // ʀ [LATIN LETTER SMALL CAPITAL R]
+ case '\u0281':
+ // � [LATIN LETTER SMALL CAPITAL INVERTED R]
+ case '\u1D19':
+ // á´™ [LATIN LETTER SMALL CAPITAL REVERSED R]
+ case '\u1D1A':
+ // á´š [LATIN LETTER SMALL CAPITAL TURNED R]
+ case '\u1E58':
+ // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
+ case '\u1E5A':
+ // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
+ case '\u1E5C':
+ // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
+ case '\u1E5E':
+ // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
+ case '\u24C7':
+ // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
+ case '\u2C64':
+ // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
+ case '\uA75A':
+ // � [LATIN CAPITAL LETTER R ROTUNDA]
+ case '\uA782':
+ // êž‚ [LATIN CAPITAL LETTER INSULAR R]
+ case '\uFF32': // ï¼² [FULLWIDTH LATIN CAPITAL LETTER R]
+ output[outputPos++] = 'R';
+ break;
+
+ case '\u0155':
+ // Å• [LATIN SMALL LETTER R WITH ACUTE]
+ case '\u0157':
+ // Å— [LATIN SMALL LETTER R WITH CEDILLA]
+ case '\u0159':
+ // Ã…â„¢ [LATIN SMALL LETTER R WITH CARON]
+ case '\u0211':
+ // È‘ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
+ case '\u0213':
+ // È“ [LATIN SMALL LETTER R WITH INVERTED BREVE]
+ case '\u024D':
+ // � [LATIN SMALL LETTER R WITH STROKE]
+ case '\u027C':
+ // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
+ case '\u027D':
+ // ɽ [LATIN SMALL LETTER R WITH TAIL]
+ case '\u027E':
+ // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
+ case '\u027F':
+ // É¿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
+ case '\u1D63':
+ // áµ£ [LATIN SUBSCRIPT SMALL LETTER R]
+ case '\u1D72':
+ // áµ² [LATIN SMALL LETTER R WITH MIDDLE TILDE]
+ case '\u1D73':
+ // áµ³ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
+ case '\u1D89':
+ // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
+ case '\u1E59':
+ // á¹™ [LATIN SMALL LETTER R WITH DOT ABOVE]
+ case '\u1E5B':
+ // á¹› [LATIN SMALL LETTER R WITH DOT BELOW]
+ case '\u1E5D':
+ // � [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
+ case '\u1E5F':
+ // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
+ case '\u24E1':
+ // â“¡ [CIRCLED LATIN SMALL LETTER R]
+ case '\uA75B':
+ // � [LATIN SMALL LETTER R ROTUNDA]
+ case '\uA783':
+ // ꞃ [LATIN SMALL LETTER INSULAR R]
+ case '\uFF52': // ï½’ [FULLWIDTH LATIN SMALL LETTER R]
+ output[outputPos++] = 'r';
+ break;
+
+ case '\u24AD': // â’­ [PARENTHESIZED LATIN SMALL LETTER R]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'r';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u015A':
+ // Ã…Å¡ [LATIN CAPITAL LETTER S WITH ACUTE]
+ case '\u015C':
+ // Ã…Å“ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
+ case '\u015E':
+ // Åž [LATIN CAPITAL LETTER S WITH CEDILLA]
+ case '\u0160':
+ // Å  [LATIN CAPITAL LETTER S WITH CARON]
+ case '\u0218':
+ // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
+ case '\u1E60':
+ // á¹  [LATIN CAPITAL LETTER S WITH DOT ABOVE]
+ case '\u1E62':
+ // á¹¢ [LATIN CAPITAL LETTER S WITH DOT BELOW]
+ case '\u1E64':
+ // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
+ case '\u1E66':
+ // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
+ case '\u1E68':
+ // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
+ case '\u24C8':
+ // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
+ case '\uA731':
+ // ꜱ [LATIN LETTER SMALL CAPITAL S]
+ case '\uA785':
+ // êž… [LATIN SMALL LETTER INSULAR S]
+ case '\uFF33': // ï¼³ [FULLWIDTH LATIN CAPITAL LETTER S]
+ output[outputPos++] = 'S';
+ break;
+
+ case '\u015B':
+ // Å› [LATIN SMALL LETTER S WITH ACUTE]
+ case '\u015D':
+ // � [LATIN SMALL LETTER S WITH CIRCUMFLEX]
+ case '\u015F':
+ // ÅŸ [LATIN SMALL LETTER S WITH CEDILLA]
+ case '\u0161':
+ // Å¡ [LATIN SMALL LETTER S WITH CARON]
+ case '\u017F':
+ // Å¿ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
+ case '\u0219':
+ // È™ [LATIN SMALL LETTER S WITH COMMA BELOW]
+ case '\u023F':
+ // È¿ [LATIN SMALL LETTER S WITH SWASH TAIL]
+ case '\u0282':
+ // Ê‚ [LATIN SMALL LETTER S WITH HOOK]
+ case '\u1D74':
+ // áµ´ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
+ case '\u1D8A':
+ // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
+ case '\u1E61':
+ // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
+ case '\u1E63':
+ // á¹£ [LATIN SMALL LETTER S WITH DOT BELOW]
+ case '\u1E65':
+ // á¹¥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
+ case '\u1E67':
+ // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
+ case '\u1E69':
+ // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
+ case '\u1E9C':
+ // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
+ case '\u1E9D':
+ // � [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
+ case '\u24E2':
+ // â“¢ [CIRCLED LATIN SMALL LETTER S]
+ case '\uA784':
+ // êž„ [LATIN CAPITAL LETTER INSULAR S]
+ case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S]
+ output[outputPos++] = 's';
+ break;
+
+ case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S]
+ output[outputPos++] = 'S';
+ output[outputPos++] = 'S';
+ break;
+
+ case '\u24AE': // â’® [PARENTHESIZED LATIN SMALL LETTER S]
+ output[outputPos++] = '(';
+ output[outputPos++] = 's';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00DF': // ß [LATIN SMALL LETTER SHARP S]
+ output[outputPos++] = 's';
+ output[outputPos++] = 's';
+ break;
+
+ case '\uFB06': // st [LATIN SMALL LIGATURE ST]
+ output[outputPos++] = 's';
+ output[outputPos++] = 't';
+ break;
+
+ case '\u0162':
+ // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
+ case '\u0164':
+ // Ť [LATIN CAPITAL LETTER T WITH CARON]
+ case '\u0166':
+ // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
+ case '\u01AC':
+ // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
+ case '\u01AE':
+ // Æ® [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
+ case '\u021A':
+ // Èš [LATIN CAPITAL LETTER T WITH COMMA BELOW]
+ case '\u023E':
+ // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
+ case '\u1D1B':
+ // á´› [LATIN LETTER SMALL CAPITAL T]
+ case '\u1E6A':
+ // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
+ case '\u1E6C':
+ // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
+ case '\u1E6E':
+ // á¹® [LATIN CAPITAL LETTER T WITH LINE BELOW]
+ case '\u1E70':
+ // á¹° [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
+ case '\u24C9':
+ // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
+ case '\uA786':
+ // Ꞇ [LATIN CAPITAL LETTER INSULAR T]
+ case '\uFF34': // ï¼´ [FULLWIDTH LATIN CAPITAL LETTER T]
+ output[outputPos++] = 'T';
+ break;
+
+ case '\u0163':
+ // ţ [LATIN SMALL LETTER T WITH CEDILLA]
+ case '\u0165':
+ // Ã…Â¥ [LATIN SMALL LETTER T WITH CARON]
+ case '\u0167':
+ // ŧ [LATIN SMALL LETTER T WITH STROKE]
+ case '\u01AB':
+ // Æ« [LATIN SMALL LETTER T WITH PALATAL HOOK]
+ case '\u01AD':
+ // Æ­ [LATIN SMALL LETTER T WITH HOOK]
+ case '\u021B':
+ // È› [LATIN SMALL LETTER T WITH COMMA BELOW]
+ case '\u0236':
+ // ȶ [LATIN SMALL LETTER T WITH CURL]
+ case '\u0287':
+ // ʇ [LATIN SMALL LETTER TURNED T]
+ case '\u0288':
+ // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
+ case '\u1D75':
+ // áµµ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
+ case '\u1E6B':
+ // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
+ case '\u1E6D':
+ // á¹­ [LATIN SMALL LETTER T WITH DOT BELOW]
+ case '\u1E6F':
+ // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
+ case '\u1E71':
+ // á¹± [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
+ case '\u1E97':
+ // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
+ case '\u24E3':
+ // â“£ [CIRCLED LATIN SMALL LETTER T]
+ case '\u2C66':
+ // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
+ case '\uFF54': // �? [FULLWIDTH LATIN SMALL LETTER T]
+ output[outputPos++] = 't';
+ break;
+
+ case '\u00DE':
+ // Þ [LATIN CAPITAL LETTER THORN]
+ case '\uA766': // � [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
+ output[outputPos++] = 'T';
+ output[outputPos++] = 'H';
+ break;
+
+ case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ]
+ output[outputPos++] = 'T';
+ output[outputPos++] = 'Z';
+ break;
+
+ case '\u24AF': // â’¯ [PARENTHESIZED LATIN SMALL LETTER T]
+ output[outputPos++] = '(';
+ output[outputPos++] = 't';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
+ output[outputPos++] = 't';
+ output[outputPos++] = 'c';
+ break;
+
+ case '\u00FE':
+ // þ [LATIN SMALL LETTER THORN]
+ case '\u1D7A':
+ // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
+ case '\uA767': // � [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
+ output[outputPos++] = 't';
+ output[outputPos++] = 'h';
+ break;
+
+ case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH]
+ output[outputPos++] = 't';
+ output[outputPos++] = 's';
+ break;
+
+ case '\uA729': // ꜩ [LATIN SMALL LETTER TZ]
+ output[outputPos++] = 't';
+ output[outputPos++] = 'z';
+ break;
+
+ case '\u00D9':
+ // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
+ case '\u00DA':
+ // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
+ case '\u00DB':
+ // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
+ case '\u00DC':
+ // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
+ case '\u0168':
+ // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
+ case '\u016A':
+ // Ū [LATIN CAPITAL LETTER U WITH MACRON]
+ case '\u016C':
+ // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
+ case '\u016E':
+ // Å® [LATIN CAPITAL LETTER U WITH RING ABOVE]
+ case '\u0170':
+ // Å° [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
+ case '\u0172':
+ // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
+ case '\u01AF':
+ // Ư [LATIN CAPITAL LETTER U WITH HORN]
+ case '\u01D3':
+ // Ç“ [LATIN CAPITAL LETTER U WITH CARON]
+ case '\u01D5':
+ // Ç• [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
+ case '\u01D7':
+ // Ç— [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
+ case '\u01D9':
+ // Ç™ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
+ case '\u01DB':
+ // Ç› [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
+ case '\u0214':
+ // �? [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
+ case '\u0216':
+ // È– [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
+ case '\u0244':
+ // É„ [LATIN CAPITAL LETTER U BAR]
+ case '\u1D1C':
+ // ᴜ [LATIN LETTER SMALL CAPITAL U]
+ case '\u1D7E':
+ // áµ¾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
+ case '\u1E72':
+ // á¹² [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
+ case '\u1E74':
+ // á¹´ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
+ case '\u1E76':
+ // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
+ case '\u1E78':
+ // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
+ case '\u1E7A':
+ // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
+ case '\u1EE4':
+ // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
+ case '\u1EE6':
+ // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
+ case '\u1EE8':
+ // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
+ case '\u1EEA':
+ // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
+ case '\u1EEC':
+ // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
+ case '\u1EEE':
+ // á»® [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
+ case '\u1EF0':
+ // á»° [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
+ case '\u24CA':
+ // â“Š [CIRCLED LATIN CAPITAL LETTER U]
+ case '\uFF35': // ï¼µ [FULLWIDTH LATIN CAPITAL LETTER U]
+ output[outputPos++] = 'U';
+ break;
+
+ case '\u00F9':
+ // ù [LATIN SMALL LETTER U WITH GRAVE]
+ case '\u00FA':
+ // ú [LATIN SMALL LETTER U WITH ACUTE]
+ case '\u00FB':
+ // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
+ case '\u00FC':
+ // ü [LATIN SMALL LETTER U WITH DIAERESIS]
+ case '\u0169':
+ // Å© [LATIN SMALL LETTER U WITH TILDE]
+ case '\u016B':
+ // Å« [LATIN SMALL LETTER U WITH MACRON]
+ case '\u016D':
+ // Å­ [LATIN SMALL LETTER U WITH BREVE]
+ case '\u016F':
+ // ů [LATIN SMALL LETTER U WITH RING ABOVE]
+ case '\u0171':
+ // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
+ case '\u0173':
+ // ų [LATIN SMALL LETTER U WITH OGONEK]
+ case '\u01B0':
+ // Æ° [LATIN SMALL LETTER U WITH HORN]
+ case '\u01D4':
+ // �? [LATIN SMALL LETTER U WITH CARON]
+ case '\u01D6':
+ // Ç– [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
+ case '\u01D8':
+ // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
+ case '\u01DA':
+ // Çš [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
+ case '\u01DC':
+ // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
+ case '\u0215':
+ // È• [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
+ case '\u0217':
+ // È— [LATIN SMALL LETTER U WITH INVERTED BREVE]
+ case '\u0289':
+ // ʉ [LATIN SMALL LETTER U BAR]
+ case '\u1D64':
+ // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
+ case '\u1D99':
+ // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
+ case '\u1E73':
+ // á¹³ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
+ case '\u1E75':
+ // á¹µ [LATIN SMALL LETTER U WITH TILDE BELOW]
+ case '\u1E77':
+ // á¹· [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
+ case '\u1E79':
+ // á¹¹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
+ case '\u1E7B':
+ // á¹» [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
+ case '\u1EE5':
+ // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
+ case '\u1EE7':
+ // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
+ case '\u1EE9':
+ // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
+ case '\u1EEB':
+ // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
+ case '\u1EED':
+ // á»­ [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
+ case '\u1EEF':
+ // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
+ case '\u1EF1':
+ // á»± [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
+ case '\u24E4':
+ // ⓤ [CIRCLED LATIN SMALL LETTER U]
+ case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U]
+ output[outputPos++] = 'u';
+ break;
+
+ case '\u24B0': // â’° [PARENTHESIZED LATIN SMALL LETTER U]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'u';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE]
+ output[outputPos++] = 'u';
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u01B2':
+ // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
+ case '\u0245':
+ // É… [LATIN CAPITAL LETTER TURNED V]
+ case '\u1D20':
+ // á´  [LATIN LETTER SMALL CAPITAL V]
+ case '\u1E7C':
+ // á¹¼ [LATIN CAPITAL LETTER V WITH TILDE]
+ case '\u1E7E':
+ // á¹¾ [LATIN CAPITAL LETTER V WITH DOT BELOW]
+ case '\u1EFC':
+ // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
+ case '\u24CB':
+ // â“‹ [CIRCLED LATIN CAPITAL LETTER V]
+ case '\uA75E':
+ // � [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
+ case '\uA768':
+ // � [LATIN CAPITAL LETTER VEND]
+ case '\uFF36': // V [FULLWIDTH LATIN CAPITAL LETTER V]
+ output[outputPos++] = 'V';
+ break;
+
+ case '\u028B':
+ // Ê‹ [LATIN SMALL LETTER V WITH HOOK]
+ case '\u028C':
+ // ʌ [LATIN SMALL LETTER TURNED V]
+ case '\u1D65':
+ // áµ¥ [LATIN SUBSCRIPT SMALL LETTER V]
+ case '\u1D8C':
+ // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
+ case '\u1E7D':
+ // á¹½ [LATIN SMALL LETTER V WITH TILDE]
+ case '\u1E7F':
+ // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
+ case '\u24E5':
+ // â“¥ [CIRCLED LATIN SMALL LETTER V]
+ case '\u2C71':
+ // â±± [LATIN SMALL LETTER V WITH RIGHT HOOK]
+ case '\u2C74':
+ // â±´ [LATIN SMALL LETTER V WITH CURL]
+ case '\uA75F':
+ // � [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
+ case '\uFF56': // ï½– [FULLWIDTH LATIN SMALL LETTER V]
+ output[outputPos++] = 'v';
+ break;
+
+ case '\uA760': // � [LATIN CAPITAL LETTER VY]
+ output[outputPos++] = 'V';
+ output[outputPos++] = 'Y';
+ break;
+
+ case '\u24B1': // â’± [PARENTHESIZED LATIN SMALL LETTER V]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'v';
+ output[outputPos++] = ')';
+ break;
+
+ case '\uA761': // � [LATIN SMALL LETTER VY]
+ output[outputPos++] = 'v';
+ output[outputPos++] = 'y';
+ break;
+
+ case '\u0174':
+ // Å´ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
+ case '\u01F7':
+ // Ç· http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
+ case '\u1D21':
+ // á´¡ [LATIN LETTER SMALL CAPITAL W]
+ case '\u1E80':
+ // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
+ case '\u1E82':
+ // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
+ case '\u1E84':
+ // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
+ case '\u1E86':
+ // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
+ case '\u1E88':
+ // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
+ case '\u24CC':
+ // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
+ case '\u2C72':
+ // â±² [LATIN CAPITAL LETTER W WITH HOOK]
+ case '\uFF37': // ï¼· [FULLWIDTH LATIN CAPITAL LETTER W]
+ output[outputPos++] = 'W';
+ break;
+
+ case '\u0175':
+ // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
+ case '\u01BF':
+ // Æ¿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
+ case '\u028D':
+ // � [LATIN SMALL LETTER TURNED W]
+ case '\u1E81':
+ // � [LATIN SMALL LETTER W WITH GRAVE]
+ case '\u1E83':
+ // ẃ [LATIN SMALL LETTER W WITH ACUTE]
+ case '\u1E85':
+ // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
+ case '\u1E87':
+ // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
+ case '\u1E89':
+ // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
+ case '\u1E98':
+ // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
+ case '\u24E6':
+ // ⓦ [CIRCLED LATIN SMALL LETTER W]
+ case '\u2C73':
+ // â±³ [LATIN SMALL LETTER W WITH HOOK]
+ case '\uFF57': // ï½— [FULLWIDTH LATIN SMALL LETTER W]
+ output[outputPos++] = 'w';
+ break;
+
+ case '\u24B2': // â’² [PARENTHESIZED LATIN SMALL LETTER W]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'w';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u1E8A':
+ // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
+ case '\u1E8C':
+ // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
+ case '\u24CD':
+ // � [CIRCLED LATIN CAPITAL LETTER X]
+ case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X]
+ output[outputPos++] = 'X';
+ break;
+
+ case '\u1D8D':
+ // � [LATIN SMALL LETTER X WITH PALATAL HOOK]
+ case '\u1E8B':
+ // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
+ case '\u1E8D':
+ // � [LATIN SMALL LETTER X WITH DIAERESIS]
+ case '\u2093':
+ // â‚“ [LATIN SUBSCRIPT SMALL LETTER X]
+ case '\u24E7':
+ // ⓧ [CIRCLED LATIN SMALL LETTER X]
+ case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X]
+ output[outputPos++] = 'x';
+ break;
+
+ case '\u24B3': // â’³ [PARENTHESIZED LATIN SMALL LETTER X]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'x';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00DD':
+ // � [LATIN CAPITAL LETTER Y WITH ACUTE]
+ case '\u0176':
+ // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
+ case '\u0178':
+ // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
+ case '\u01B3':
+ // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
+ case '\u0232':
+ // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
+ case '\u024E':
+ // ÉŽ [LATIN CAPITAL LETTER Y WITH STROKE]
+ case '\u028F':
+ // � [LATIN LETTER SMALL CAPITAL Y]
+ case '\u1E8E':
+ // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
+ case '\u1EF2':
+ // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
+ case '\u1EF4':
+ // á»´ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
+ case '\u1EF6':
+ // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
+ case '\u1EF8':
+ // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
+ case '\u1EFE':
+ // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
+ case '\u24CE':
+ // â“Ž [CIRCLED LATIN CAPITAL LETTER Y]
+ case '\uFF39': // ï¼¹ [FULLWIDTH LATIN CAPITAL LETTER Y]
+ output[outputPos++] = 'Y';
+ break;
+
+ case '\u00FD':
+ // ý [LATIN SMALL LETTER Y WITH ACUTE]
+ case '\u00FF':
+ // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
+ case '\u0177':
+ // Å· [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
+ case '\u01B4':
+ // Æ´ [LATIN SMALL LETTER Y WITH HOOK]
+ case '\u0233':
+ // ȳ [LATIN SMALL LETTER Y WITH MACRON]
+ case '\u024F':
+ // � [LATIN SMALL LETTER Y WITH STROKE]
+ case '\u028E':
+ // ÊŽ [LATIN SMALL LETTER TURNED Y]
+ case '\u1E8F':
+ // � [LATIN SMALL LETTER Y WITH DOT ABOVE]
+ case '\u1E99':
+ // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
+ case '\u1EF3':
+ // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
+ case '\u1EF5':
+ // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
+ case '\u1EF7':
+ // á»· [LATIN SMALL LETTER Y WITH HOOK ABOVE]
+ case '\u1EF9':
+ // ỹ [LATIN SMALL LETTER Y WITH TILDE]
+ case '\u1EFF':
+ // ỿ [LATIN SMALL LETTER Y WITH LOOP]
+ case '\u24E8':
+ // ⓨ [CIRCLED LATIN SMALL LETTER Y]
+ case '\uFF59': // ï½™ [FULLWIDTH LATIN SMALL LETTER Y]
+ output[outputPos++] = 'y';
+ break;
+
+ case '\u24B4': // â’´ [PARENTHESIZED LATIN SMALL LETTER Y]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'y';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u0179':
+ // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
+ case '\u017B':
+ // Å» [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
+ case '\u017D':
+ // Ž [LATIN CAPITAL LETTER Z WITH CARON]
+ case '\u01B5':
+ // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
+ case '\u021C':
+ // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
+ case '\u0224':
+ // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
+ case '\u1D22':
+ // á´¢ [LATIN LETTER SMALL CAPITAL Z]
+ case '\u1E90':
+ // � [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
+ case '\u1E92':
+ // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
+ case '\u1E94':
+ // �? [LATIN CAPITAL LETTER Z WITH LINE BELOW]
+ case '\u24CF':
+ // � [CIRCLED LATIN CAPITAL LETTER Z]
+ case '\u2C6B':
+ // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
+ case '\uA762':
+ // � [LATIN CAPITAL LETTER VISIGOTHIC Z]
+ case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z]
+ output[outputPos++] = 'Z';
+ break;
+
+ case '\u017A':
+ // ź [LATIN SMALL LETTER Z WITH ACUTE]
+ case '\u017C':
+ // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
+ case '\u017E':
+ // ž [LATIN SMALL LETTER Z WITH CARON]
+ case '\u01B6':
+ // ƶ [LATIN SMALL LETTER Z WITH STROKE]
+ case '\u021D':
+ // � http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
+ case '\u0225':
+ // ȥ [LATIN SMALL LETTER Z WITH HOOK]
+ case '\u0240':
+ // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
+ case '\u0290':
+ // � [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
+ case '\u0291':
+ // Ê‘ [LATIN SMALL LETTER Z WITH CURL]
+ case '\u1D76':
+ // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
+ case '\u1D8E':
+ // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
+ case '\u1E91':
+ // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
+ case '\u1E93':
+ // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
+ case '\u1E95':
+ // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
+ case '\u24E9':
+ // â“© [CIRCLED LATIN SMALL LETTER Z]
+ case '\u2C6C':
+ // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
+ case '\uA763':
+ // � [LATIN SMALL LETTER VISIGOTHIC Z]
+ case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z]
+ output[outputPos++] = 'z';
+ break;
+
+ case '\u24B5': // â’µ [PARENTHESIZED LATIN SMALL LETTER Z]
+ output[outputPos++] = '(';
+ output[outputPos++] = 'z';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2070':
+ // � [SUPERSCRIPT ZERO]
+ case '\u2080':
+ // â‚€ [SUBSCRIPT ZERO]
+ case '\u24EA':
+ // ⓪ [CIRCLED DIGIT ZERO]
+ case '\u24FF':
+ // â“¿ [NEGATIVE CIRCLED DIGIT ZERO]
+ case '\uFF10': // � [FULLWIDTH DIGIT ZERO]
+ output[outputPos++] = '0';
+ break;
+
+ case '\u00B9':
+ // ¹ [SUPERSCRIPT ONE]
+ case '\u2081':
+ // � [SUBSCRIPT ONE]
+ case '\u2460':
+ // â‘  [CIRCLED DIGIT ONE]
+ case '\u24F5':
+ // ⓵ [DOUBLE CIRCLED DIGIT ONE]
+ case '\u2776':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
+ case '\u2780':
+ // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
+ case '\u278A':
+ // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
+ case '\uFF11': // 1 [FULLWIDTH DIGIT ONE]
+ output[outputPos++] = '1';
+ break;
+
+ case '\u2488': // â’ˆ [DIGIT ONE FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2474': // â‘´ [PARENTHESIZED DIGIT ONE]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00B2':
+ // ² [SUPERSCRIPT TWO]
+ case '\u2082':
+ // â‚‚ [SUBSCRIPT TWO]
+ case '\u2461':
+ // â‘¡ [CIRCLED DIGIT TWO]
+ case '\u24F6':
+ // ⓶ [DOUBLE CIRCLED DIGIT TWO]
+ case '\u2777':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
+ case '\u2781':
+ // � [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
+ case '\u278B':
+ // âž‹ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
+ case '\uFF12': // ï¼’ [FULLWIDTH DIGIT TWO]
+ output[outputPos++] = '2';
+ break;
+
+ case '\u2489': // â’‰ [DIGIT TWO FULL STOP]
+ output[outputPos++] = '2';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO]
+ output[outputPos++] = '(';
+ output[outputPos++] = '2';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00B3':
+ // ³ [SUPERSCRIPT THREE]
+ case '\u2083':
+ // ₃ [SUBSCRIPT THREE]
+ case '\u2462':
+ // â‘¢ [CIRCLED DIGIT THREE]
+ case '\u24F7':
+ // â“· [DOUBLE CIRCLED DIGIT THREE]
+ case '\u2778':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
+ case '\u2782':
+ // âž‚ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
+ case '\u278C':
+ // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
+ case '\uFF13': // 3 [FULLWIDTH DIGIT THREE]
+ output[outputPos++] = '3';
+ break;
+
+ case '\u248A': // â’Š [DIGIT THREE FULL STOP]
+ output[outputPos++] = '3';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2476': // ⑶ [PARENTHESIZED DIGIT THREE]
+ output[outputPos++] = '(';
+ output[outputPos++] = '3';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2074':
+ // � [SUPERSCRIPT FOUR]
+ case '\u2084':
+ // â‚„ [SUBSCRIPT FOUR]
+ case '\u2463':
+ // â‘£ [CIRCLED DIGIT FOUR]
+ case '\u24F8':
+ // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
+ case '\u2779':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
+ case '\u2783':
+ // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
+ case '\u278D':
+ // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
+ case '\uFF14': // �? [FULLWIDTH DIGIT FOUR]
+ output[outputPos++] = '4';
+ break;
+
+ case '\u248B': // â’‹ [DIGIT FOUR FULL STOP]
+ output[outputPos++] = '4';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2477': // â‘· [PARENTHESIZED DIGIT FOUR]
+ output[outputPos++] = '(';
+ output[outputPos++] = '4';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2075':
+ // � [SUPERSCRIPT FIVE]
+ case '\u2085':
+ // â‚… [SUBSCRIPT FIVE]
+ case '\u2464':
+ // ⑤ [CIRCLED DIGIT FIVE]
+ case '\u24F9':
+ // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
+ case '\u277A':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
+ case '\u2784':
+ // âž„ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
+ case '\u278E':
+ // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
+ case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE]
+ output[outputPos++] = '5';
+ break;
+
+ case '\u248C': // ⒌ [DIGIT FIVE FULL STOP]
+ output[outputPos++] = '5';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE]
+ output[outputPos++] = '(';
+ output[outputPos++] = '5';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2076':
+ // � [SUPERSCRIPT SIX]
+ case '\u2086':
+ // ₆ [SUBSCRIPT SIX]
+ case '\u2465':
+ // â‘¥ [CIRCLED DIGIT SIX]
+ case '\u24FA':
+ // ⓺ [DOUBLE CIRCLED DIGIT SIX]
+ case '\u277B':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
+ case '\u2785':
+ // âž… [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
+ case '\u278F':
+ // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
+ case '\uFF16': // ï¼– [FULLWIDTH DIGIT SIX]
+ output[outputPos++] = '6';
+ break;
+
+ case '\u248D': // â’� [DIGIT SIX FULL STOP]
+ output[outputPos++] = '6';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX]
+ output[outputPos++] = '(';
+ output[outputPos++] = '6';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2077':
+ // � [SUPERSCRIPT SEVEN]
+ case '\u2087':
+ // ₇ [SUBSCRIPT SEVEN]
+ case '\u2466':
+ // ⑦ [CIRCLED DIGIT SEVEN]
+ case '\u24FB':
+ // â“» [DOUBLE CIRCLED DIGIT SEVEN]
+ case '\u277C':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
+ case '\u2786':
+ // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
+ case '\u2790':
+ // � [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
+ case '\uFF17': // ï¼— [FULLWIDTH DIGIT SEVEN]
+ output[outputPos++] = '7';
+ break;
+
+ case '\u248E': // â’Ž [DIGIT SEVEN FULL STOP]
+ output[outputPos++] = '7';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '7';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2078':
+ // � [SUPERSCRIPT EIGHT]
+ case '\u2088':
+ // ₈ [SUBSCRIPT EIGHT]
+ case '\u2467':
+ // ⑧ [CIRCLED DIGIT EIGHT]
+ case '\u24FC':
+ // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
+ case '\u277D':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
+ case '\u2787':
+ // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
+ case '\u2791':
+ // âž‘ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
+ case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT]
+ output[outputPos++] = '8';
+ break;
+
+ case '\u248F': // â’� [DIGIT EIGHT FULL STOP]
+ output[outputPos++] = '8';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u247B': // â‘» [PARENTHESIZED DIGIT EIGHT]
+ output[outputPos++] = '(';
+ output[outputPos++] = '8';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2079':
+ // � [SUPERSCRIPT NINE]
+ case '\u2089':
+ // ₉ [SUBSCRIPT NINE]
+ case '\u2468':
+ // ⑨ [CIRCLED DIGIT NINE]
+ case '\u24FD':
+ // ⓽ [DOUBLE CIRCLED DIGIT NINE]
+ case '\u277E':
+ // � [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
+ case '\u2788':
+ // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
+ case '\u2792':
+ // âž’ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
+ case '\uFF19': // ï¼™ [FULLWIDTH DIGIT NINE]
+ output[outputPos++] = '9';
+ break;
+
+ case '\u2490': // â’� [DIGIT NINE FULL STOP]
+ output[outputPos++] = '9';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE]
+ output[outputPos++] = '(';
+ output[outputPos++] = '9';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2469':
+ // â‘© [CIRCLED NUMBER TEN]
+ case '\u24FE':
+ // ⓾ [DOUBLE CIRCLED NUMBER TEN]
+ case '\u277F':
+ // � [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
+ case '\u2789':
+ // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
+ case '\u2793': // âž“ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '0';
+ break;
+
+ case '\u2491': // â’‘ [NUMBER TEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '0';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '0';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u246A':
+ // ⑪ [CIRCLED NUMBER ELEVEN]
+ case '\u24EB': // â“« [NEGATIVE CIRCLED NUMBER ELEVEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '1';
+ break;
+
+ case '\u2492': // â’’ [NUMBER ELEVEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '1';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '1';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u246B':
+ // â‘« [CIRCLED NUMBER TWELVE]
+ case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
+ output[outputPos++] = '1';
+ output[outputPos++] = '2';
+ break;
+
+ case '\u2493': // â’“ [NUMBER TWELVE FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '2';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u247F': // â‘¿ [PARENTHESIZED NUMBER TWELVE]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '2';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u246C':
+ // ⑬ [CIRCLED NUMBER THIRTEEN]
+ case '\u24ED': // â“­ [NEGATIVE CIRCLED NUMBER THIRTEEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '3';
+ break;
+
+ case '\u2494': // â’�? [NUMBER THIRTEEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '3';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2480': // â’€ [PARENTHESIZED NUMBER THIRTEEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '3';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u246D':
+ // â‘­ [CIRCLED NUMBER FOURTEEN]
+ case '\u24EE': // â“® [NEGATIVE CIRCLED NUMBER FOURTEEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '4';
+ break;
+
+ case '\u2495': // â’• [NUMBER FOURTEEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '4';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2481': // â’� [PARENTHESIZED NUMBER FOURTEEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '4';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u246E':
+ // â‘® [CIRCLED NUMBER FIFTEEN]
+ case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '5';
+ break;
+
+ case '\u2496': // â’– [NUMBER FIFTEEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '5';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2482': // â’‚ [PARENTHESIZED NUMBER FIFTEEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '5';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u246F':
+ // ⑯ [CIRCLED NUMBER SIXTEEN]
+ case '\u24F0': // â“° [NEGATIVE CIRCLED NUMBER SIXTEEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '6';
+ break;
+
+ case '\u2497': // â’— [NUMBER SIXTEEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '6';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2483': // â’ƒ [PARENTHESIZED NUMBER SIXTEEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '6';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2470':
+ // â‘° [CIRCLED NUMBER SEVENTEEN]
+ case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '7';
+ break;
+
+ case '\u2498': // â’˜ [NUMBER SEVENTEEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '7';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2484': // â’„ [PARENTHESIZED NUMBER SEVENTEEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '7';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2471':
+ // ⑱ [CIRCLED NUMBER EIGHTEEN]
+ case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '8';
+ break;
+
+ case '\u2499': // â’™ [NUMBER EIGHTEEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '8';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2485': // â’… [PARENTHESIZED NUMBER EIGHTEEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '8';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2472':
+ // ⑲ [CIRCLED NUMBER NINETEEN]
+ case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
+ output[outputPos++] = '1';
+ output[outputPos++] = '9';
+ break;
+
+ case '\u249A': // â’š [NUMBER NINETEEN FULL STOP]
+ output[outputPos++] = '1';
+ output[outputPos++] = '9';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2486': // â’† [PARENTHESIZED NUMBER NINETEEN]
+ output[outputPos++] = '(';
+ output[outputPos++] = '1';
+ output[outputPos++] = '9';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2473':
+ // ⑳ [CIRCLED NUMBER TWENTY]
+ case '\u24F4': // â“´ [NEGATIVE CIRCLED NUMBER TWENTY]
+ output[outputPos++] = '2';
+ output[outputPos++] = '0';
+ break;
+
+ case '\u249B': // â’› [NUMBER TWENTY FULL STOP]
+ output[outputPos++] = '2';
+ output[outputPos++] = '0';
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2487': // â’‡ [PARENTHESIZED NUMBER TWENTY]
+ output[outputPos++] = '(';
+ output[outputPos++] = '2';
+ output[outputPos++] = '0';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u00AB':
+ // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
+ case '\u00BB':
+ // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
+ case '\u201C':
+ // “ [LEFT DOUBLE QUOTATION MARK]
+ case '\u201D':
+ // � [RIGHT DOUBLE QUOTATION MARK]
+ case '\u201E':
+ // „ [DOUBLE LOW-9 QUOTATION MARK]
+ case '\u2033':
+ // ″ [DOUBLE PRIME]
+ case '\u2036':
+ // ‶ [REVERSED DOUBLE PRIME]
+ case '\u275D':
+ // � [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
+ case '\u275E':
+ // � [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
+ case '\u276E':
+ // � [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+ case '\u276F':
+ // � [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+ case '\uFF02': // " [FULLWIDTH QUOTATION MARK]
+ output[outputPos++] = '"';
+ break;
+
+ case '\u2018':
+ // ‘ [LEFT SINGLE QUOTATION MARK]
+ case '\u2019':
+ // ’ [RIGHT SINGLE QUOTATION MARK]
+ case '\u201A':
+ // ‚ [SINGLE LOW-9 QUOTATION MARK]
+ case '\u201B':
+ // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
+ case '\u2032':
+ // ′ [PRIME]
+ case '\u2035':
+ // ‵ [REVERSED PRIME]
+ case '\u2039':
+ // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
+ case '\u203A':
+ // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
+ case '\u275B':
+ // � [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
+ case '\u275C':
+ // � [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
+ case '\uFF07': // ' [FULLWIDTH APOSTROPHE]
+ output[outputPos++] = '\'';
+ break;
+
+ case '\u2010':
+ // � [HYPHEN]
+ case '\u2011':
+ // ‑ [NON-BREAKING HYPHEN]
+ case '\u2012':
+ // ‒ [FIGURE DASH]
+ case '\u2013':
+ // – [EN DASH]
+ case '\u2014':
+ // �? [EM DASH]
+ case '\u207B':
+ // � [SUPERSCRIPT MINUS]
+ case '\u208B':
+ // â‚‹ [SUBSCRIPT MINUS]
+ case '\uFF0D': // � [FULLWIDTH HYPHEN-MINUS]
+ output[outputPos++] = '-';
+ break;
+
+ case '\u2045':
+ // � [LEFT SQUARE BRACKET WITH QUILL]
+ case '\u2772':
+ // � [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
+ case '\uFF3B': // ï¼» [FULLWIDTH LEFT SQUARE BRACKET]
+ output[outputPos++] = '[';
+ break;
+
+ case '\u2046':
+ // � [RIGHT SQUARE BRACKET WITH QUILL]
+ case '\u2773':
+ // � [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
+ case '\uFF3D': // ï¼½ [FULLWIDTH RIGHT SQUARE BRACKET]
+ output[outputPos++] = ']';
+ break;
+
+ case '\u207D':
+ // � [SUPERSCRIPT LEFT PARENTHESIS]
+ case '\u208D':
+ // � [SUBSCRIPT LEFT PARENTHESIS]
+ case '\u2768':
+ // � [MEDIUM LEFT PARENTHESIS ORNAMENT]
+ case '\u276A':
+ // � [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
+ case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS]
+ output[outputPos++] = '(';
+ break;
+
+ case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS]
+ output[outputPos++] = '(';
+ output[outputPos++] = '(';
+ break;
+
+ case '\u207E':
+ // � [SUPERSCRIPT RIGHT PARENTHESIS]
+ case '\u208E':
+ // â‚Ž [SUBSCRIPT RIGHT PARENTHESIS]
+ case '\u2769':
+ // � [MEDIUM RIGHT PARENTHESIS ORNAMENT]
+ case '\u276B':
+ // � [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
+ case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS]
+ output[outputPos++] = ')';
+ break;
+
+ case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS]
+ output[outputPos++] = ')';
+ output[outputPos++] = ')';
+ break;
+
+ case '\u276C':
+ // � [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\u2770':
+ // � [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN]
+ output[outputPos++] = '<';
+ break;
+
+ case '\u276D':
+ // � [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\u2771':
+ // � [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+ case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN]
+ output[outputPos++] = '>';
+ break;
+
+ case '\u2774':
+ // � [MEDIUM LEFT CURLY BRACKET ORNAMENT]
+ case '\uFF5B': // ï½› [FULLWIDTH LEFT CURLY BRACKET]
+ output[outputPos++] = '{';
+ break;
+
+ case '\u2775':
+ // � [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
+ case '\uFF5D': // � [FULLWIDTH RIGHT CURLY BRACKET]
+ output[outputPos++] = '}';
+ break;
+
+ case '\u207A':
+ // � [SUPERSCRIPT PLUS SIGN]
+ case '\u208A':
+ // â‚Š [SUBSCRIPT PLUS SIGN]
+ case '\uFF0B': // + [FULLWIDTH PLUS SIGN]
+ output[outputPos++] = '+';
+ break;
+
+ case '\u207C':
+ // � [SUPERSCRIPT EQUALS SIGN]
+ case '\u208C':
+ // ₌ [SUBSCRIPT EQUALS SIGN]
+ case '\uFF1D': // � [FULLWIDTH EQUALS SIGN]
+ output[outputPos++] = '=';
+ break;
+
+ case '\uFF01': // � [FULLWIDTH EXCLAMATION MARK]
+ output[outputPos++] = '!';
+ break;
+
+ case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK]
+ output[outputPos++] = '!';
+ output[outputPos++] = '!';
+ break;
+
+ case '\u2049': // � [EXCLAMATION QUESTION MARK]
+ output[outputPos++] = '!';
+ output[outputPos++] = '?';
+ break;
+
+ case '\uFF03': // # [FULLWIDTH NUMBER SIGN]
+ output[outputPos++] = '#';
+ break;
+
+ case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN]
+ output[outputPos++] = '$';
+ break;
+
+ case '\u2052':
+ // � [COMMERCIAL MINUS SIGN]
+ case '\uFF05': // ï¼… [FULLWIDTH PERCENT SIGN]
+ output[outputPos++] = '%';
+ break;
+
+ case '\uFF06': // & [FULLWIDTH AMPERSAND]
+ output[outputPos++] = '&';
+ break;
+
+ case '\u204E':
+ // � [LOW ASTERISK]
+ case '\uFF0A': // * [FULLWIDTH ASTERISK]
+ output[outputPos++] = '*';
+ break;
+
+ case '\uFF0C': // , [FULLWIDTH COMMA]
+ output[outputPos++] = ',';
+ break;
+
+ case '\uFF0E': // . [FULLWIDTH FULL STOP]
+ output[outputPos++] = '.';
+ break;
+
+ case '\u2044':
+ // � [FRACTION SLASH]
+ case '\uFF0F': // � [FULLWIDTH SOLIDUS]
+ output[outputPos++] = '/';
+ break;
+
+ case '\uFF1A': // : [FULLWIDTH COLON]
+ output[outputPos++] = ':';
+ break;
+
+ case '\u204F':
+ // � [REVERSED SEMICOLON]
+ case '\uFF1B': // ï¼› [FULLWIDTH SEMICOLON]
+ output[outputPos++] = ';';
+ break;
+
+ case '\uFF1F': // ? [FULLWIDTH QUESTION MARK]
+ output[outputPos++] = '?';
+ break;
+
+ case '\u2047': // � [DOUBLE QUESTION MARK]
+ output[outputPos++] = '?';
+ output[outputPos++] = '?';
+ break;
+
+ case '\u2048': // � [QUESTION EXCLAMATION MARK]
+ output[outputPos++] = '?';
+ output[outputPos++] = '!';
+ break;
+
+ case '\uFF20': // ï¼  [FULLWIDTH COMMERCIAL AT]
+ output[outputPos++] = '@';
+ break;
+
+ case '\uFF3C': // ï¼¼ [FULLWIDTH REVERSE SOLIDUS]
+ output[outputPos++] = '\\';
+ break;
+
+ case '\u2038':
+ // ‸ [CARET]
+ case '\uFF3E': // ï¼¾ [FULLWIDTH CIRCUMFLEX ACCENT]
+ output[outputPos++] = '^';
+ break;
+
+ case '\uFF3F': // _ [FULLWIDTH LOW LINE]
+ output[outputPos++] = '_';
+ break;
+
+ case '\u2053':
+ // � [SWUNG DASH]
+ case '\uFF5E': // ~ [FULLWIDTH TILDE]
+ output[outputPos++] = '~';
+ break;
+
+ default:
+ output[outputPos++] = c;
+ break;
+
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Analyzer.cs b/src/core/Analysis/Analyzer.cs
new file mode 100644
index 0000000..cea0ee3
--- /dev/null
+++ b/src/core/Analysis/Analyzer.cs
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+ /// <summary>An Analyzer builds TokenStreams, which analyze text. It thus represents a
+ /// policy for extracting index terms from text.
+ /// <p/>
+ /// Typical implementations first build a Tokenizer, which breaks the stream of
+ /// characters from the Reader into raw Tokens. One or more TokenFilters may
+ /// then be applied to the output of the Tokenizer.
+ /// </summary>
+ public abstract class Analyzer : IDisposable
+ {
+ /// <summary>Creates a TokenStream which tokenizes all the text in the provided
+ /// Reader. Must be able to handle null field name for
+ /// backward compatibility.
+ /// </summary>
+ public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader);
+
+ /// <summary>Creates a TokenStream that is allowed to be re-used
+ /// from the previous time that the same thread called
+ /// this method. Callers that do not need to use more
+ /// than one TokenStream at the same time from this
+ /// analyzer should use this method for better
+ /// performance.
+ /// </summary>
+ public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader)
+ {
+ return TokenStream(fieldName, reader);
+ }
+
+ private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
+ private bool isDisposed;
+
+ /// <summary>Used by Analyzers that implement reusableTokenStream
+ /// to retrieve previously saved TokenStreams for re-use
+ /// by the same thread.
+ /// </summary>
+ protected internal virtual object PreviousTokenStream
+ {
+ get
+ {
+ if (tokenStreams == null)
+ {
+ throw new AlreadyClosedException("this Analyzer is closed");
+ }
+ return tokenStreams.Get();
+ }
+ set
+ {
+ if (tokenStreams == null)
+ {
+ throw new AlreadyClosedException("this Analyzer is closed");
+ }
+ tokenStreams.Set(value);
+ }
+ }
+
+ [Obsolete()]
+ protected internal bool overridesTokenStreamMethod = false;
+
+ /// <deprecated> This is only present to preserve
+ /// back-compat of classes that subclass a core analyzer
+ /// and override tokenStream but not reusableTokenStream
+ /// </deprecated>
+ /// <summary>
+ /// Java uses Class&lt;? extends Analyer&gt; to constrain <typeparamref name="TClass"/> to
+ /// only Types that inherit from Analyzer. C# does not have a generic type class,
+ /// ie Type&lt;t&gt;. The method signature stays the same, and an exception may
+ /// still be thrown, if the method doesn't exist.
+ /// </summary>
+ [Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")]
+ protected internal virtual void SetOverridesTokenStreamMethod<TClass>()
+ where TClass : Analyzer
+ {
+ try
+ {
+ System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", new[] { typeof(string), typeof(System.IO.TextReader) });
+ overridesTokenStreamMethod = m.DeclaringType != typeof(TClass);
+ }
+ catch (MethodAccessException)
+ {
+ // can't happen, as baseClass is subclass of Analyzer
+ overridesTokenStreamMethod = false;
+ }
+ }
+
+
+ /// <summary> Invoked before indexing a Fieldable instance if
+ /// terms have already been added to that field. This allows custom
+ /// analyzers to place an automatic position increment gap between
+ /// Fieldable instances using the same field name. The default value
+ /// position increment gap is 0. With a 0 position increment gap and
+ /// the typical default token position increment of 1, all terms in a field,
+ /// including across Fieldable instances, are in successive positions, allowing
+ /// exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
+ ///
+ /// </summary>
+ /// <param name="fieldName">Fieldable name being indexed.
+ /// </param>
+ /// <returns> position increment gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
+ /// </returns>
+ public virtual int GetPositionIncrementGap(String fieldName)
+ {
+ return 0;
+ }
+
+ /// <summary> Just like <see cref="GetPositionIncrementGap" />, except for
+ /// Token offsets instead. By default this returns 1 for
+ /// tokenized fields and, as if the fields were joined
+ /// with an extra space character, and 0 for un-tokenized
+ /// fields. This method is only called if the field
+ /// produced at least one token for indexing.
+ ///
+ /// </summary>
+ /// <param name="field">the field just indexed
+ /// </param>
+ /// <returns> offset gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
+ /// </returns>
+ public virtual int GetOffsetGap(IFieldable field)
+ {
+ return field.IsTokenized ? 1 : 0;
+ }
+
+ /// <summary>Frees persistent resources used by this Analyzer </summary>
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public virtual void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (tokenStreams != null)
+ {
+ tokenStreams.Close();
+ tokenStreams = null;
+ }
+ }
+ isDisposed = true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/BaseCharFilter.cs b/src/core/Analysis/BaseCharFilter.cs
new file mode 100644
index 0000000..b84fce0
--- /dev/null
+++ b/src/core/Analysis/BaseCharFilter.cs
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>
+ /// * Base utility class for implementing a <see cref="CharFilter" />.
+ /// * You subclass this, and then record mappings by calling
+ /// * <see cref="AddOffCorrectMap" />, and then invoke the correct
+ /// * method to correct an offset.
+ /// </summary>
+ public abstract class BaseCharFilter : CharFilter
+ {
+
+ private int[] offsets;
+ private int[] diffs;
+ private int size = 0;
+
+ protected BaseCharFilter(CharStream @in) : base(@in)
+ {
+ }
+
+ /* Retrieve the corrected offset. */
+ //@Override
+ protected internal override int Correct(int currentOff)
+ {
+ if (offsets == null || currentOff < offsets[0])
+ {
+ return currentOff;
+ }
+
+ int hi = size - 1;
+ if (currentOff >= offsets[hi])
+ return currentOff + diffs[hi];
+
+ int lo = 0;
+ int mid = -1;
+
+ while (hi >= lo)
+ {
+ mid = Number.URShift(lo + hi, 1);
+ if (currentOff < offsets[mid])
+ hi = mid - 1;
+ else if (currentOff > offsets[mid])
+ lo = mid + 1;
+ else
+ return currentOff + diffs[mid];
+ }
+
+ if (currentOff < offsets[mid])
+ return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
+ return currentOff + diffs[mid];
+ }
+
+ protected int LastCumulativeDiff
+ {
+ get
+ {
+ return offsets == null ? 0 : diffs[size - 1];
+ }
+ }
+
+ [Obsolete("Use LastCumulativeDiff property instead")]
+ protected int GetLastCumulativeDiff()
+ {
+ return LastCumulativeDiff;
+ }
+
+ protected void AddOffCorrectMap(int off, int cumulativeDiff)
+ {
+ if (offsets == null)
+ {
+ offsets = new int[64];
+ diffs = new int[64];
+ }
+ else if (size == offsets.Length)
+ {
+ offsets = ArrayUtil.Grow(offsets);
+ diffs = ArrayUtil.Grow(diffs);
+ }
+
+ offsets[size] = off;
+ diffs[size++] = cumulativeDiff;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/CachingTokenFilter.cs b/src/core/Analysis/CachingTokenFilter.cs
new file mode 100644
index 0000000..c5f7694
--- /dev/null
+++ b/src/core/Analysis/CachingTokenFilter.cs
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> This class can be used if the token attributes of a TokenStream
+ /// are intended to be consumed more than once. It caches
+ /// all token attribute states locally in a List.
+ ///
+ /// <p/>CachingTokenFilter implements the optional method
+ /// <see cref="TokenStream.Reset()" />, which repositions the
+ /// stream to the first Token.
+ /// </summary>
+ public sealed class CachingTokenFilter : TokenFilter
+ {
+ private System.Collections.Generic.LinkedList<State> cache = null;
+ private System.Collections.Generic.IEnumerator<State> iterator = null;
+ private State finalState;
+
+ public CachingTokenFilter(TokenStream input):base(input)
+ {
+ }
+
+ public override bool IncrementToken()
+ {
+ if (cache == null)
+ {
+ // fill cache lazily
+ cache = new System.Collections.Generic.LinkedList<State>();
+ FillCache();
+ iterator = cache.GetEnumerator();
+ }
+
+ if (!iterator.MoveNext())
+ {
+ // the cache is exhausted, return false
+ return false;
+ }
+ // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
+ RestoreState(iterator.Current);
+ return true;
+ }
+
+ public override void End()
+ {
+ if (finalState != null)
+ {
+ RestoreState(finalState);
+ }
+ }
+
+ public override void Reset()
+ {
+ if (cache != null)
+ {
+ iterator = cache.GetEnumerator();
+ }
+ }
+
+ private void FillCache()
+ {
+ while (input.IncrementToken())
+ {
+ cache.AddLast(CaptureState());
+ }
+ // capture final state
+ input.End();
+ finalState = CaptureState();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/CharArraySet.cs b/src/core/Analysis/CharArraySet.cs
new file mode 100644
index 0000000..e7df0ba
--- /dev/null
+++ b/src/core/Analysis/CharArraySet.cs
@@ -0,0 +1,517 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Linq;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+ /// <summary> A simple class that stores Strings as char[]'s in a
+ /// hash table. Note that this is not a general purpose
+ /// class. For example, it cannot remove items from the
+ /// set, nor does it resize its hash table to be smaller,
+ /// etc. It is designed to be quick to test if a char[]
+ /// is in the set without the necessity of converting it
+ /// to a String first.
+ /// <p/>
+ /// <em>Please note:</em> This class implements <see cref="System.Collections.Generic.ISet{T}"/> but
+ /// does not behave like it should in all cases. The generic type is
+ /// <see cref="System.Collections.Generic.ICollection{T}"/>, because you can add any object to it,
+ /// that has a string representation. The add methods will use
+ /// <see cref="object.ToString()"/> and store the result using a <see cref="char"/>
+ /// buffer. The same behaviour have the <see cref="Contains(object)"/> methods.
+ /// The <see cref="GetEnumerator"/> method returns an <see cref="string"/> IEnumerable.
+ /// For type safety also {@link #stringIterator()} is provided.
+ /// </summary>
+ // TODO: java uses wildcards, .net doesn't have this, easiest way is to
+ // make the entire class generic. Ultimately, though, since this
+ // works with strings, I can't think of a reason not to just declare
+ // this as an ISet<string>.
+ public class CharArraySet : ISet<string>
+ {
+ bool _ReadOnly = false;
+ const int INIT_SIZE = 8;
+ char[][] _Entries;
+ int _Count;
+ bool _IgnoreCase;
+ public static CharArraySet EMPTY_SET = UnmodifiableSet(new CharArraySet(0, false));
+
+ private void Init(int startSize, bool ignoreCase)
+ {
+ this._IgnoreCase = ignoreCase;
+ int size = INIT_SIZE;
+ while (startSize + (startSize >> 2) > size)
+ size <<= 1;
+ _Entries = new char[size][];
+ }
+
+ /// <summary>Create set with enough capacity to hold startSize
+ /// terms
+ /// </summary>
+ public CharArraySet(int startSize, bool ignoreCase)
+ {
+ Init(startSize, ignoreCase);
+ }
+
+ public CharArraySet(IEnumerable<string> c, bool ignoreCase)
+ {
+ Init(c.Count(), ignoreCase);
+ AddItems(c);
+ }
+
+ /// <summary>Create set from a Collection of char[] or String </summary>
+ public CharArraySet(IEnumerable<object> c, bool ignoreCase)
+ {
+ Init(c.Count(), ignoreCase);
+ AddItems(c);
+ }
+
+ private void AddItems<T>(IEnumerable<T> items)
+ {
+ foreach(var item in items)
+ {
+ Add(item.ToString());
+ }
+ }
+
+ /// <summary>Create set from entries </summary>
+ private CharArraySet(char[][] entries, bool ignoreCase, int count)
+ {
+ this._Entries = entries;
+ this._IgnoreCase = ignoreCase;
+ this._Count = count;
+ }
+
+ /// <summary>true if the <c>len</c> chars of <c>text</c> starting at <c>off</c>
+ /// are in the set
+ /// </summary>
+ public virtual bool Contains(char[] text, int off, int len)
+ {
+ return _Entries[GetSlot(text, off, len)] != null;
+ }
+
+ public virtual bool Contains(string text)
+ {
+ return _Entries[GetSlot(text)] != null;
+ }
+
+
+ private int GetSlot(char[] text, int off, int len)
+ {
+ int code = GetHashCode(text, off, len);
+ int pos = code & (_Entries.Length - 1);
+ char[] text2 = _Entries[pos];
+ if (text2 != null && !Equals(text, off, len, text2))
+ {
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (_Entries.Length - 1);
+ text2 = _Entries[pos];
+ }
+ while (text2 != null && !Equals(text, off, len, text2));
+ }
+ return pos;
+ }
+
+ /// <summary>Returns true if the String is in the set </summary>
+ private int GetSlot(string text)
+ {
+ int code = GetHashCode(text);
+ int pos = code & (_Entries.Length - 1);
+ char[] text2 = _Entries[pos];
+ if (text2 != null && !Equals(text, text2))
+ {
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (_Entries.Length - 1);
+ text2 = _Entries[pos];
+ }
+ while (text2 != null && !Equals(text, text2));
+ }
+ return pos;
+ }
+
+ public bool Add(string text)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+ return Add(text.ToCharArray());
+ }
+
+ /// <summary>Add this char[] directly to the set.
+ /// If ignoreCase is true for this Set, the text array will be directly modified.
+ /// The user should never modify this text array after calling this method.
+ /// </summary>
+ public bool Add(char[] text)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+
+ if (_IgnoreCase)
+ for (int i = 0; i < text.Length; i++)
+ text[i] = Char.ToLower(text[i]);
+ int slot = GetSlot(text, 0, text.Length);
+ if (_Entries[slot] != null)
+ return false;
+ _Entries[slot] = text;
+ _Count++;
+
+ if (_Count + (_Count >> 2) > _Entries.Length)
+ {
+ Rehash();
+ }
+
+ return true;
+ }
+
+ private bool Equals(char[] text1, int off, int len, char[] text2)
+ {
+ if (len != text2.Length)
+ return false;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (char.ToLower(text1[off + i]) != text2[i])
+ return false;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (text1[off + i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private bool Equals(string text1, char[] text2)
+ {
+ int len = text1.Length;
+ if (len != text2.Length)
+ return false;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (char.ToLower(text1[i]) != text2[i])
+ return false;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (text1[i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void Rehash()
+ {
+ int newSize = 2 * _Entries.Length;
+ char[][] oldEntries = _Entries;
+ _Entries = new char[newSize][];
+
+ for (int i = 0; i < oldEntries.Length; i++)
+ {
+ char[] text = oldEntries[i];
+ if (text != null)
+ {
+ // todo: could be faster... no need to compare strings on collision
+ _Entries[GetSlot(text, 0, text.Length)] = text;
+ }
+ }
+ }
+
+ private int GetHashCode(char[] text, int offset, int len)
+ {
+ int code = 0;
+ int stop = offset + len;
+ if (_IgnoreCase)
+ {
+ for (int i = offset; i < stop; i++)
+ {
+ code = code * 31 + char.ToLower(text[i]);
+ }
+ }
+ else
+ {
+ for (int i = offset; i < stop; i++)
+ {
+ code = code * 31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ private int GetHashCode(string text)
+ {
+ int code = 0;
+ int len = text.Length;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ code = code * 31 + char.ToLower(text[i]);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ code = code * 31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ public int Count
+ {
+ get { return _Count; }
+ }
+
+ public bool IsEmpty
+ {
+ get { return _Count == 0; }
+ }
+
+ public bool Contains(object item)
+ {
+ var text = item as char[];
+ return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString());
+ }
+
+ public bool Add(object item)
+ {
+ return Add(item.ToString());
+ }
+
+ void ICollection<string>.Add(string item)
+ {
+ this.Add(item);
+ }
+
+ /// <summary>
+ /// Returns an unmodifiable <see cref="CharArraySet"/>. This allows to provide
+ /// unmodifiable views of internal sets for "read-only" use
+ /// </summary>
+ /// <param name="set">A Set for which the unmodifiable set it returns.</param>
+ /// <returns>A new unmodifiable <see cref="CharArraySet"/></returns>
+ /// <throws>ArgumentNullException of the given set is <c>null</c></throws>
+ public static CharArraySet UnmodifiableSet(CharArraySet set)
+ {
+ if(set == null)
+ throw new ArgumentNullException("Given set is null");
+ if (set == EMPTY_SET)
+ return EMPTY_SET;
+ if (set._ReadOnly)
+ return set;
+
+ var newSet = new CharArraySet(set._Entries, set._IgnoreCase, set.Count) {IsReadOnly = true};
+ return newSet;
+ }
+
+ /// <summary>
+ /// returns a copy of the given set as a <see cref="CharArraySet"/>. If the given set
+ /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.
+ /// </summary>
+ /// <param name="set">A set to copy</param>
+ /// <returns>a copy of the given set as a <see cref="CharArraySet"/>. If the given set
+ /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.</returns>
+ public static CharArraySet Copy<T>(ISet<T> set)
+ {
+ if (set == null)
+ throw new ArgumentNullException("set", "Given set is null!");
+ if (set == EMPTY_SET)
+ return EMPTY_SET;
+ bool ignoreCase = set is CharArraySet && ((CharArraySet)set)._IgnoreCase;
+ var arrSet = new CharArraySet(set.Count, ignoreCase);
+ arrSet.AddItems(set);
+ return arrSet;
+ }
+
+ public void Clear()
+ {
+ throw new NotSupportedException("Remove not supported!");
+ }
+
+ public bool IsReadOnly
+ {
+ get { return _ReadOnly; }
+ private set { _ReadOnly = value; }
+ }
+
+ /// <summary>Adds all of the elements in the specified collection to this collection </summary>
+ public void UnionWith(IEnumerable<string> other)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+
+ foreach (string s in other)
+ {
+ Add(s.ToCharArray());
+ }
+ }
+
+ /// <summary>Wrapper that calls UnionWith</summary>
+ public void AddAll(IEnumerable<string> coll)
+ {
+ UnionWith(coll);
+ }
+
+ #region Unneeded methods
+ public void RemoveAll(ICollection<string> c)
+ {
+ throw new NotSupportedException();
+ }
+
+ public void RetainAll(ICollection<string> c)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection<string>.CopyTo(string[] array, int arrayIndex)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet<string>.IntersectWith(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet<string>.ExceptWith(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet<string>.SymmetricExceptWith(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsSubsetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsSupersetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsProperSupersetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.IsProperSubsetOf(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.Overlaps(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet<string>.SetEquals(IEnumerable<string> other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ICollection<string>.Remove(string item)
+ {
+ throw new NotSupportedException();
+ }
+ #endregion
+
+ /// <summary>
+ /// The IEnumerator&lt;String&gt; for this set. Strings are constructed on the fly,
+ /// so use <c>nextCharArray</c> for more efficient access
+ /// </summary>
+ public class CharArraySetEnumerator : IEnumerator<string>
+ {
+ readonly CharArraySet _Creator;
+ int pos = -1;
+ char[] cur;
+
+ protected internal CharArraySetEnumerator(CharArraySet creator)
+ {
+ _Creator = creator;
+ }
+
+ public bool MoveNext()
+ {
+ cur = null;
+ pos++;
+ while (pos < _Creator._Entries.Length && (cur = _Creator._Entries[pos]) == null)
+ pos++;
+ return cur != null;
+ }
+
+ /// <summary>do not modify the returned char[] </summary>
+ public char[] NextCharArray()
+ {
+ return cur;
+ }
+
+ public string Current
+ {
+ get { return new string(NextCharArray()); }
+ }
+
+ public void Dispose()
+ {
+ }
+
+ object IEnumerator.Current
+ {
+ get { return new string(NextCharArray()); }
+ }
+
+ public void Reset()
+ {
+ throw new NotImplementedException();
+ }
+ }
+
+ public IEnumerator<string> StringEnumerator()
+ {
+ return new CharArraySetEnumerator(this);
+ }
+
+ public IEnumerator<string> GetEnumerator()
+ {
+ return new CharArraySetEnumerator(this);
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+
+} \ No newline at end of file
diff --git a/src/core/Analysis/CharFilter.cs b/src/core/Analysis/CharFilter.cs
new file mode 100644
index 0000000..039f841
--- /dev/null
+++ b/src/core/Analysis/CharFilter.cs
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Subclasses of CharFilter can be chained to filter CharStream.
+ /// They can be used as <see cref="System.IO.TextReader" /> with additional offset
+ /// correction. <see cref="Tokenizer" />s will automatically use <see cref="CorrectOffset" />
+ /// if a CharFilter/CharStream subclass is used.
+ ///
+ /// </summary>
+ /// <version> $Id$
+ ///
+ /// </version>
+ public abstract class CharFilter : CharStream
+ {
+ private long currentPosition = -1;
+ private bool isDisposed;
+ protected internal CharStream input;
+
+ protected internal CharFilter(CharStream in_Renamed) : base(in_Renamed)
+ {
+ input = in_Renamed;
+ }
+
+ /// <summary>Subclass may want to override to correct the current offset.</summary>
+ /// <param name="currentOff">current offset</param>
+ /// <returns>corrected offset</returns>
+ protected internal virtual int Correct(int currentOff)
+ {
+ return currentOff;
+ }
+
+ /// <summary> Chains the corrected offset through the input
+ /// CharFilter.
+ /// </summary>
+ public override int CorrectOffset(int currentOff)
+ {
+ return input.CorrectOffset(Correct(currentOff));
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ input = null;
+ isDisposed = true;
+ base.Dispose(disposing);
+ }
+
+ public override int Read(System.Char[] cbuf, int off, int len)
+ {
+ return input.Read(cbuf, off, len);
+ }
+
+ public bool MarkSupported()
+ {
+ return input.BaseStream.CanSeek;
+ }
+
+ public void Mark(int readAheadLimit)
+ {
+ currentPosition = input.BaseStream.Position;
+ input.BaseStream.Position = readAheadLimit;
+ }
+
+ public void Reset()
+ {
+ input.BaseStream.Position = currentPosition;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/CharReader.cs b/src/core/Analysis/CharReader.cs
new file mode 100644
index 0000000..2120bd4
--- /dev/null
+++ b/src/core/Analysis/CharReader.cs
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> CharReader is a Reader wrapper. It reads chars from
+ /// Reader and outputs <see cref="CharStream" />, defining an
+ /// identify function <see cref="CorrectOffset" /> method that
+ /// simply returns the provided offset.
+ /// </summary>
+ public sealed class CharReader:CharStream
+ {
+ private long currentPosition = -1;
+
+ private bool isDisposed;
+
+ internal System.IO.StreamReader input;
+
+ public static CharStream Get(System.IO.TextReader input)
+ {
+ var charStream = input as CharStream;
+ if (charStream != null)
+ return charStream;
+
+ // {{Aroush-2.9}} isn't there a better (faster) way to do this?
+ var theString = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(input.ReadToEnd()));
+ return new CharReader(new System.IO.StreamReader(theString));
+ //return input is CharStream?(CharStream) input:new CharReader(input);
+ }
+
+ private CharReader(System.IO.StreamReader in_Renamed) : base(in_Renamed)
+ {
+ input = in_Renamed;
+ }
+
+ public override int CorrectOffset(int currentOff)
+ {
+ return currentOff;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ input = null;
+ isDisposed = true;
+ base.Dispose(disposing);
+ }
+
+ public override int Read(System.Char[] cbuf, int off, int len)
+ {
+ return input.Read(cbuf, off, len);
+ }
+
+ public bool MarkSupported()
+ {
+ return input.BaseStream.CanSeek;
+ }
+
+ public void Mark(int readAheadLimit)
+ {
+ currentPosition = input.BaseStream.Position;
+ input.BaseStream.Position = readAheadLimit;
+ }
+
+ public void Reset()
+ {
+ input.BaseStream.Position = currentPosition;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/CharStream.cs b/src/core/Analysis/CharStream.cs
new file mode 100644
index 0000000..0b36fe2
--- /dev/null
+++ b/src/core/Analysis/CharStream.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> CharStream adds <see cref="CorrectOffset" />
+ /// functionality over <see cref="System.IO.TextReader" />. All Tokenizers accept a
+ /// CharStream instead of <see cref="System.IO.TextReader" /> as input, which enables
+ /// arbitrary character based filtering before tokenization.
+ /// The <see cref="CorrectOffset" /> method fixed offsets to account for
+ /// removal or insertion of characters, so that the offsets
+ /// reported in the tokens match the character offsets of the
+ /// original Reader.
+ /// </summary>
+ public abstract class CharStream : System.IO.StreamReader
+ {
+ protected CharStream(System.IO.StreamReader reader) : base(reader.BaseStream)
+ {
+ }
+
+ /// <summary> Called by CharFilter(s) and Tokenizer to correct token offset.
+ ///
+ /// </summary>
+ /// <param name="currentOff">offset as seen in the output
+ /// </param>
+ /// <returns> corrected offset based on the input
+ /// </returns>
+ public abstract int CorrectOffset(int currentOff);
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/CharTokenizer.cs b/src/core/Analysis/CharTokenizer.cs
new file mode 100644
index 0000000..22423ec
--- /dev/null
+++ b/src/core/Analysis/CharTokenizer.cs
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>An abstract base class for simple, character-oriented tokenizers.</summary>
+ public abstract class CharTokenizer:Tokenizer
+ {
+ protected CharTokenizer(System.IO.TextReader input):base(input)
+ {
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ protected CharTokenizer(AttributeSource source, System.IO.TextReader input):base(source, input)
+ {
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ protected CharTokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory, input)
+ {
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ private int offset = 0, bufferIndex = 0, dataLen = 0;
+ private const int MAX_WORD_LEN = 255;
+ private const int IO_BUFFER_SIZE = 4096;
+ private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+ private readonly ITermAttribute termAtt;
+ private readonly IOffsetAttribute offsetAtt;
+
+ /// <summary>Returns true iff a character should be included in a token. This
+ /// tokenizer generates as tokens adjacent sequences of characters which
+ /// satisfy this predicate. Characters for which this is false are used to
+ /// define token boundaries and are not included in tokens.
+ /// </summary>
+ protected internal abstract bool IsTokenChar(char c);
+
+ /// <summary>Called on each token character to normalize it before it is added to the
+ /// token. The default implementation does nothing. Subclasses may use this
+ /// to, e.g., lowercase tokens.
+ /// </summary>
+ protected internal virtual char Normalize(char c)
+ {
+ return c;
+ }
+
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ int length = 0;
+ int start = bufferIndex;
+ char[] buffer = termAtt.TermBuffer();
+ while (true)
+ {
+
+ if (bufferIndex >= dataLen)
+ {
+ offset += dataLen;
+ dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
+ if (dataLen <= 0)
+ {
+ dataLen = 0; // so next offset += dataLen won't decrement offset
+ if (length > 0)
+ break;
+ return false;
+ }
+ bufferIndex = 0;
+ }
+
+ char c = ioBuffer[bufferIndex++];
+
+ if (IsTokenChar(c))
+ {
+ // if it's a token char
+
+ if (length == 0)
+ // start of token
+ start = offset + bufferIndex - 1;
+ else if (length == buffer.Length)
+ buffer = termAtt.ResizeTermBuffer(1 + length);
+
+ buffer[length++] = Normalize(c); // buffer it, normalized
+
+ if (length == MAX_WORD_LEN)
+ // buffer overflow!
+ break;
+ }
+ else if (length > 0)
+ // at non-Letter w/ chars
+ break; // return 'em
+ }
+
+ termAtt.SetTermLength(length);
+ offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
+ return true;
+ }
+
+ public override void End()
+ {
+ // set final offset
+ int finalOffset = CorrectOffset(offset);
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset(System.IO.TextReader input)
+ {
+ base.Reset(input);
+ bufferIndex = 0;
+ offset = 0;
+ dataLen = 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/ISOLatin1AccentFilter.cs b/src/core/Analysis/ISOLatin1AccentFilter.cs
new file mode 100644
index 0000000..5fd839e
--- /dev/null
+++ b/src/core/Analysis/ISOLatin1AccentFilter.cs
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> A filter that replaces accented characters in the ISO Latin 1 character set
+ /// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
+ /// <p/>
+ /// For instance, '&#192;' will be replaced by 'a'.
+ /// <p/>
+ ///
+ /// </summary>
+ /// <deprecated> If you build a new index, use <see cref="ASCIIFoldingFilter"/>
+ /// which covers a superset of Latin 1.
+ /// This class is included for use with existing indexes and will be removed
+ /// in a future release (possible Lucene 4.0)
+ /// </deprecated>
+ [Obsolete("If you build a new index, use ASCIIFoldingFilter which covers a superset of Latin 1. This class is included for use with existing indexes and will be removed in a future release (possible Lucene 4.0).")]
+ public class ISOLatin1AccentFilter : TokenFilter
+ {
+ public ISOLatin1AccentFilter(TokenStream input):base(input)
+ {
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ private char[] output = new char[256];
+ private int outputPos;
+ private readonly ITermAttribute termAtt;
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ char[] buffer = termAtt.TermBuffer();
+ int length = termAtt.TermLength();
+ // If no characters actually require rewriting then we
+ // just return token as-is:
+ for (int i = 0; i < length; i++)
+ {
+ char c = buffer[i];
+ if (c >= '\u00c0' && c <= '\uFB06')
+ {
+ RemoveAccents(buffer, length);
+ termAtt.SetTermBuffer(output, 0, outputPos);
+ break;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
+ public void RemoveAccents(char[] input, int length)
+ {
+
+ // Worst-case length required:
+ int maxSizeNeeded = 2 * length;
+
+ int size = output.Length;
+ while (size < maxSizeNeeded)
+ size *= 2;
+
+ if (size != output.Length)
+ output = new char[size];
+
+ outputPos = 0;
+
+ int pos = 0;
+
+ for (int i = 0; i < length; i++, pos++)
+ {
+ char c = input[pos];
+
+ // Quick test: if it's not in range then just keep
+ // current character
+ if (c < '\u00c0' || c > '\uFB06')
+ output[outputPos++] = c;
+ else
+ {
+ switch (c)
+ {
+
+ case '\u00C0':
+ // À
+ case '\u00C1':
+ // �?
+ case '\u00C2':
+ // Â
+ case '\u00C3':
+ // Ã
+ case '\u00C4':
+ // Ä
+ case '\u00C5': // Ã…
+ output[outputPos++] = 'A';
+ break;
+
+ case '\u00C6': // Æ
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'E';
+ break;
+
+ case '\u00C7': // Ç
+ output[outputPos++] = 'C';
+ break;
+
+ case '\u00C8':
+ // È
+ case '\u00C9':
+ // É
+ case '\u00CA':
+ // Ê
+ case '\u00CB': // Ë
+ output[outputPos++] = 'E';
+ break;
+
+ case '\u00CC':
+ // Ì
+ case '\u00CD':
+ // �?
+ case '\u00CE':
+ // ÃŽ
+ case '\u00CF': // �?
+ output[outputPos++] = 'I';
+ break;
+
+ case '\u0132': // IJ
+ output[outputPos++] = 'I';
+ output[outputPos++] = 'J';
+ break;
+
+ case '\u00D0': // �?
+ output[outputPos++] = 'D';
+ break;
+
+ case '\u00D1': // Ñ
+ output[outputPos++] = 'N';
+ break;
+
+ case '\u00D2':
+ // Ã’
+ case '\u00D3':
+ // Ó
+ case '\u00D4':
+ // Ô
+ case '\u00D5':
+ // Õ
+ case '\u00D6':
+ // Ö
+ case '\u00D8': // Ø
+ output[outputPos++] = 'O';
+ break;
+
+ case '\u0152': // Å’
+ output[outputPos++] = 'O';
+ output[outputPos++] = 'E';
+ break;
+
+ case '\u00DE': // Þ
+ output[outputPos++] = 'T';
+ output[outputPos++] = 'H';
+ break;
+
+ case '\u00D9':
+ // Ù
+ case '\u00DA':
+ // Ú
+ case '\u00DB':
+ // Û
+ case '\u00DC': // Ü
+ output[outputPos++] = 'U';
+ break;
+
+ case '\u00DD':
+ // �?
+ case '\u0178': // Ÿ
+ output[outputPos++] = 'Y';
+ break;
+
+ case '\u00E0':
+ // à
+ case '\u00E1':
+ // á
+ case '\u00E2':
+ // â
+ case '\u00E3':
+ // ã
+ case '\u00E4':
+ // ä
+ case '\u00E5': // å
+ output[outputPos++] = 'a';
+ break;
+
+ case '\u00E6': // æ
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u00E7': // ç
+ output[outputPos++] = 'c';
+ break;
+
+ case '\u00E8':
+ // è
+ case '\u00E9':
+ // é
+ case '\u00EA':
+ // ê
+ case '\u00EB': // ë
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u00EC':
+ // ì
+ case '\u00ED':
+ // í
+ case '\u00EE':
+ // î
+ case '\u00EF': // ï
+ output[outputPos++] = 'i';
+ break;
+
+ case '\u0133': // ij
+ output[outputPos++] = 'i';
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u00F0': // ð
+ output[outputPos++] = 'd';
+ break;
+
+ case '\u00F1': // ñ
+ output[outputPos++] = 'n';
+ break;
+
+ case '\u00F2':
+ // ò
+ case '\u00F3':
+ // ó
+ case '\u00F4':
+ // ô
+ case '\u00F5':
+ // õ
+ case '\u00F6':
+ // ö
+ case '\u00F8': // ø
+ output[outputPos++] = 'o';
+ break;
+
+ case '\u0153': // Å“
+ output[outputPos++] = 'o';
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u00DF': // ß
+ output[outputPos++] = 's';
+ output[outputPos++] = 's';
+ break;
+
+ case '\u00FE': // þ
+ output[outputPos++] = 't';
+ output[outputPos++] = 'h';
+ break;
+
+ case '\u00F9':
+ // ù
+ case '\u00FA':
+ // ú
+ case '\u00FB':
+ // û
+ case '\u00FC': // ü
+ output[outputPos++] = 'u';
+ break;
+
+ case '\u00FD':
+ // ý
+ case '\u00FF': // ÿ
+ output[outputPos++] = 'y';
+ break;
+
+ case '\uFB00': // ff
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'f';
+ break;
+
+ case '\uFB01': // �?
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'i';
+ break;
+
+ case '\uFB02': // fl
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'l';
+ break;
+ // following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
+ // case '\uFB03': // ffi
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'i';
+ // break;
+ // case '\uFB04': // ffl
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'l';
+ // break;
+
+ case '\uFB05': // ſt
+ output[outputPos++] = 'f';
+ output[outputPos++] = 't';
+ break;
+
+ case '\uFB06': // st
+ output[outputPos++] = 's';
+ output[outputPos++] = 't';
+ break;
+
+ default:
+ output[outputPos++] = c;
+ break;
+
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/KeywordAnalyzer.cs b/src/core/Analysis/KeywordAnalyzer.cs
new file mode 100644
index 0000000..116babb
--- /dev/null
+++ b/src/core/Analysis/KeywordAnalyzer.cs
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> "Tokenizes" the entire stream as a single token. This is useful
+ /// for data like zip codes, ids, and some product names.
+ /// </summary>
+ public class KeywordAnalyzer:Analyzer
+ {
+ public KeywordAnalyzer()
+ {
+ SetOverridesTokenStreamMethod<KeywordAnalyzer>();
+ }
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new KeywordTokenizer(reader);
+ }
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ var tokenizer = (Tokenizer) PreviousTokenStream;
+ if (tokenizer == null)
+ {
+ tokenizer = new KeywordTokenizer(reader);
+ PreviousTokenStream = tokenizer;
+ }
+ else
+ tokenizer.Reset(reader);
+ return tokenizer;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/KeywordTokenizer.cs b/src/core/Analysis/KeywordTokenizer.cs
new file mode 100644
index 0000000..f97ff95
--- /dev/null
+++ b/src/core/Analysis/KeywordTokenizer.cs
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Emits the entire input as a single token.</summary>
+ public sealed class KeywordTokenizer:Tokenizer
+ {
+
+ private const int DEFAULT_BUFFER_SIZE = 256;
+
+ private bool done;
+ private int finalOffset;
+ private ITermAttribute termAtt;
+ private IOffsetAttribute offsetAtt;
+
+ public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE)
+ {
+ }
+
+ public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input)
+ {
+ Init(bufferSize);
+ }
+
+ public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input)
+ {
+ Init(bufferSize);
+ }
+
+ public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input)
+ {
+ Init(bufferSize);
+ }
+
+ private void Init(int bufferSize)
+ {
+ this.done = false;
+ termAtt = AddAttribute<ITermAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ termAtt.ResizeTermBuffer(bufferSize);
+ }
+
+ public override bool IncrementToken()
+ {
+ if (!done)
+ {
+ ClearAttributes();
+ done = true;
+ int upto = 0;
+ char[] buffer = termAtt.TermBuffer();
+ while (true)
+ {
+ int length = input.Read(buffer, upto, buffer.Length - upto);
+ if (length == 0)
+ break;
+ upto += length;
+ if (upto == buffer.Length)
+ buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
+ }
+ termAtt.SetTermLength(upto);
+ finalOffset = CorrectOffset(upto);
+ offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
+ return true;
+ }
+ return false;
+ }
+
+ public override void End()
+ {
+ // set final offset
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset(System.IO.TextReader input)
+ {
+ base.Reset(input);
+ this.done = false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/LengthFilter.cs b/src/core/Analysis/LengthFilter.cs
new file mode 100644
index 0000000..c4f60ad
--- /dev/null
+++ b/src/core/Analysis/LengthFilter.cs
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>Removes words that are too long or too short from the stream.</summary>
+ public sealed class LengthFilter:TokenFilter
+ {
+
+ internal int min;
+ internal int max;
+
+ private readonly ITermAttribute termAtt;
+
+ /// <summary> Build a filter that removes words that are too long or too
+ /// short from the text.
+ /// </summary>
+ public LengthFilter(TokenStream in_Renamed, int min, int max)
+ : base(in_Renamed)
+ {
+ this.min = min;
+ this.max = max;
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ /// <summary> Returns the next input Token whose term() is the right len</summary>
+ public override bool IncrementToken()
+ {
+ // return the first non-stop word found
+ while (input.IncrementToken())
+ {
+ var len = termAtt.TermLength();
+ if (len >= min && len <= max)
+ {
+ return true;
+ }
+ // note: else we ignore it but should we index each part of it?
+ }
+ // reached EOS -- return false
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/LetterTokenizer.cs b/src/core/Analysis/LetterTokenizer.cs
new file mode 100644
index 0000000..77629a8
--- /dev/null
+++ b/src/core/Analysis/LetterTokenizer.cs
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>A LetterTokenizer is a tokenizer that divides text at non-letters. That's
+ /// to say, it defines tokens as maximal strings of adjacent letters, as defined
+ /// by java.lang.Character.isLetter() predicate.
+ /// Note: this does a decent job for most European languages, but does a terrible
+ /// job for some Asian languages, where words are not separated by spaces.
+ /// </summary>
+
+ public class LetterTokenizer:CharTokenizer
+ {
+ /// <summary>Construct a new LetterTokenizer. </summary>
+ public LetterTokenizer(System.IO.TextReader @in):base(@in)
+ {
+ }
+
+ /// <summary>Construct a new LetterTokenizer using a given <see cref="AttributeSource" />. </summary>
+ public LetterTokenizer(AttributeSource source, System.IO.TextReader @in)
+ : base(source, @in)
+ {
+ }
+
+ /// <summary>Construct a new LetterTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+ public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+ : base(factory, @in)
+ {
+ }
+
+ /// <summary>Collects only characters which satisfy
+ /// <see cref="char.IsLetter(char)" />.
+ /// </summary>
+ protected internal override bool IsTokenChar(char c)
+ {
+ return System.Char.IsLetter(c);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/LowerCaseFilter.cs b/src/core/Analysis/LowerCaseFilter.cs
new file mode 100644
index 0000000..cad0197
--- /dev/null
+++ b/src/core/Analysis/LowerCaseFilter.cs
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>Normalizes token text to lower case.</summary>
+ public sealed class LowerCaseFilter:TokenFilter
+ {
+ public LowerCaseFilter(TokenStream @in)
+ : base(@in)
+ {
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ private readonly ITermAttribute termAtt;
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+
+ char[] buffer = termAtt.TermBuffer();
+ int length = termAtt.TermLength();
+ for (int i = 0; i < length; i++)
+ buffer[i] = System.Char.ToLower(buffer[i]);
+
+ return true;
+ }
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/LowerCaseTokenizer.cs b/src/core/Analysis/LowerCaseTokenizer.cs
new file mode 100644
index 0000000..4cea217
--- /dev/null
+++ b/src/core/Analysis/LowerCaseTokenizer.cs
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> LowerCaseTokenizer performs the function of LetterTokenizer
+ /// and LowerCaseFilter together. It divides text at non-letters and converts
+ /// them to lower case. While it is functionally equivalent to the combination
+ /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+ /// to doing the two tasks at once, hence this (redundant) implementation.
+ /// <p/>
+ /// Note: this does a decent job for most European languages, but does a terrible
+ /// job for some Asian languages, where words are not separated by spaces.
+ /// </summary>
+ public sealed class LowerCaseTokenizer:LetterTokenizer
+ {
+ /// <summary>Construct a new LowerCaseTokenizer. </summary>
+ public LowerCaseTokenizer(System.IO.TextReader @in)
+ : base(@in)
+ {
+ }
+
+ /// <summary>Construct a new LowerCaseTokenizer using a given <see cref="AttributeSource" />. </summary>
+ public LowerCaseTokenizer(AttributeSource source, System.IO.TextReader @in)
+ : base(source, @in)
+ {
+ }
+
+ /// <summary>Construct a new LowerCaseTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+ public LowerCaseTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+ : base(factory, @in)
+ {
+ }
+
+ /// <summary>Converts char to lower case
+ /// <see cref="char.ToLower(char)" />.
+ /// </summary>
+ protected internal override char Normalize(char c)
+ {
+ return System.Char.ToLower(c);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/MappingCharFilter.cs b/src/core/Analysis/MappingCharFilter.cs
new file mode 100644
index 0000000..9705719
--- /dev/null
+++ b/src/core/Analysis/MappingCharFilter.cs
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Simplistic <see cref="CharFilter" /> that applies the mappings
+ /// contained in a <see cref="NormalizeCharMap" /> to the character
+ /// stream, and correcting the resulting changes to the
+ /// offsets.
+ /// </summary>
+ public class MappingCharFilter : BaseCharFilter
+ {
+ private readonly NormalizeCharMap normMap;
+ private LinkedList<char> buffer;
+ private System.String replacement;
+ private int charPointer;
+ private int nextCharCounter;
+
+ /// Default constructor that takes a <see cref="CharStream" />.
+ public MappingCharFilter(NormalizeCharMap normMap, CharStream @in)
+ : base(@in)
+ {
+ this.normMap = normMap;
+ }
+
+ /// Easy-use constructor that takes a <see cref="System.IO.TextReader" />.
+ public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in)
+ : base(CharReader.Get(@in))
+ {
+ this.normMap = normMap;
+ }
+
+ public override int Read()
+ {
+ while (true)
+ {
+ if (replacement != null && charPointer < replacement.Length)
+ {
+ return replacement[charPointer++];
+ }
+
+ int firstChar = NextChar();
+ if (firstChar == - 1)
+ return - 1;
+ NormalizeCharMap nm = normMap.submap != null
+ ? normMap.submap[(char) firstChar]
+ : null;
+ if (nm == null)
+ return firstChar;
+ NormalizeCharMap result = Match(nm);
+ if (result == null)
+ return firstChar;
+ replacement = result.normStr;
+ charPointer = 0;
+ if (result.diff != 0)
+ {
+ int prevCumulativeDiff = LastCumulativeDiff;
+ if (result.diff < 0)
+ {
+ for (int i = 0; i < - result.diff; i++)
+ AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
+ }
+ else
+ {
+ AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
+ }
+ }
+ }
+ }
+
+ private int NextChar()
+ {
+ nextCharCounter++;
+ if (buffer != null && buffer.Count != 0)
+ {
+ char tempObject = buffer.First.Value;
+ buffer.RemoveFirst();
+ return (tempObject);
+ }
+ return input.Read();
+ }
+
+ private void PushChar(int c)
+ {
+ nextCharCounter--;
+ if (buffer == null)
+ {
+ buffer = new LinkedList<char>();
+ }
+ buffer.AddFirst((char)c);
+ }
+
+ private void PushLastChar(int c)
+ {
+ if (buffer == null)
+ {
+ buffer = new LinkedList<char>();
+ }
+ buffer.AddLast((char)c);
+ }
+
+ private NormalizeCharMap Match(NormalizeCharMap map)
+ {
+ NormalizeCharMap result = null;
+ if (map.submap != null)
+ {
+ int chr = NextChar();
+ if (chr != - 1)
+ {
+ NormalizeCharMap subMap = map.submap[(char)chr];
+ if (subMap != null)
+ {
+ result = Match(subMap);
+ }
+ if (result == null)
+ {
+ PushChar(chr);
+ }
+ }
+ }
+ if (result == null && map.normStr != null)
+ {
+ result = map;
+ }
+ return result;
+ }
+
+ public override int Read(System.Char[] cbuf, int off, int len)
+ {
+ var tmp = new char[len];
+ int l = input.Read(tmp, 0, len);
+ if (l != 0)
+ {
+ for (int i = 0; i < l; i++)
+ PushLastChar(tmp[i]);
+ }
+ l = 0;
+ for (int i = off; i < off + len; i++)
+ {
+ int c = Read();
+ if (c == - 1)
+ break;
+ cbuf[i] = (char) c;
+ l++;
+ }
+ return l == 0?- 1:l;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/NormalizeCharMap.cs b/src/core/Analysis/NormalizeCharMap.cs
new file mode 100644
index 0000000..7fd520c
--- /dev/null
+++ b/src/core/Analysis/NormalizeCharMap.cs
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Holds a map of String input to String output, to be used
+ /// with <see cref="MappingCharFilter" />.
+ /// </summary>
+ public class NormalizeCharMap
+ {
+ internal System.Collections.Generic.IDictionary<char, NormalizeCharMap> submap;
+ internal System.String normStr;
+ internal int diff;
+
+ /// <summary>Records a replacement to be applied to the inputs
+ /// stream. Whenever <c>singleMatch</c> occurs in
+ /// the input, it will be replaced with
+ /// <c>replacement</c>.
+ ///
+ /// </summary>
+ /// <param name="singleMatch">input String to be replaced
+ /// </param>
+ /// <param name="replacement">output String
+ /// </param>
+ public virtual void Add(System.String singleMatch, System.String replacement)
+ {
+ NormalizeCharMap currMap = this;
+ for (var i = 0; i < singleMatch.Length; i++)
+ {
+ char c = singleMatch[i];
+ if (currMap.submap == null)
+ {
+ currMap.submap = new HashMap<char, NormalizeCharMap>(1);
+ }
+ var map = currMap.submap[c];
+ if (map == null)
+ {
+ map = new NormalizeCharMap();
+ currMap.submap[c] = map;
+ }
+ currMap = map;
+ }
+ if (currMap.normStr != null)
+ {
+ throw new System.SystemException("MappingCharFilter: there is already a mapping for " + singleMatch);
+ }
+ currMap.normStr = replacement;
+ currMap.diff = singleMatch.Length - replacement.Length;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/NumericTokenStream.cs b/src/core/Analysis/NumericTokenStream.cs
new file mode 100644
index 0000000..90b6e72
--- /dev/null
+++ b/src/core/Analysis/NumericTokenStream.cs
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Search;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using NumericField = Lucene.Net.Documents.NumericField;
+// javadocs
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> <b>Expert:</b> This class provides a <see cref="TokenStream" />
+ /// for indexing numeric values that can be used by <see cref="NumericRangeQuery{T}" />
+ /// or <see cref="NumericRangeFilter{T}" />.
+ ///
+ /// <p/>Note that for simple usage, <see cref="NumericField" /> is
+ /// recommended. <see cref="NumericField" /> disables norms and
+ /// term freqs, as they are not usually needed during
+ /// searching. If you need to change these settings, you
+ /// should use this class.
+ ///
+ /// <p/>See <see cref="NumericField" /> for capabilities of fields
+ /// indexed numerically.<p/>
+ ///
+ /// <p/>Here's an example usage, for an <c>int</c> field:
+ ///
+ /// <code>
+ /// Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ /// field.setOmitNorms(true);
+ /// field.setOmitTermFreqAndPositions(true);
+ /// document.add(field);
+ /// </code>
+ ///
+ /// <p/>For optimal performance, re-use the TokenStream and Field instance
+ /// for more than one document:
+ ///
+ /// <code>
+ /// NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ /// Field field = new Field(name, stream);
+ /// field.setOmitNorms(true);
+ /// field.setOmitTermFreqAndPositions(true);
+ /// Document document = new Document();
+ /// document.add(field);
+ ///
+ /// for(all documents) {
+ /// stream.setIntValue(value)
+ /// writer.addDocument(document);
+ /// }
+ /// </code>
+ ///
+ /// <p/>This stream is not intended to be used in analyzers;
+ /// it's more for iterating the different precisions during
+ /// indexing a specific numeric value.<p/>
+ ///
+ /// <p/><b>NOTE</b>: as token streams are only consumed once
+ /// the document is added to the index, if you index more
+ /// than one numeric field, use a separate <c>NumericTokenStream</c>
+ /// instance for each.<p/>
+ ///
+ /// <p/>See <see cref="NumericRangeQuery{T}" /> for more details on the
+ /// <a href="../search/NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>
+ /// parameter as well as how numeric fields work under the hood.<p/>
+ ///
+ /// <p/><font color="red"><b>NOTE:</b> This API is experimental and
+ /// might change in incompatible ways in the next release.</font>
+ /// Since 2.9
+ /// </summary>
+ public sealed class NumericTokenStream : TokenStream
+ {
+ private void InitBlock()
+ {
+ termAtt = AddAttribute<ITermAttribute>();
+ typeAtt = AddAttribute<ITypeAttribute>();
+ posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ }
+
+ /// <summary>The full precision token gets this token type assigned. </summary>
+ public const System.String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
+
+ /// <summary>The lower precision tokens gets this token type assigned. </summary>
+ public const System.String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
+
+ /// <summary> Creates a token stream for numeric values using the default <c>precisionStep</c>
+ /// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream():this(NumericUtils.PRECISION_STEP_DEFAULT)
+ {
+ }
+
+ /// <summary> Creates a token stream for numeric values with the specified
+ /// <c>precisionStep</c>. The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream(int precisionStep):base()
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// <summary> Expert: Creates a token stream for numeric values with the specified
+ /// <c>precisionStep</c> using the given <see cref="AttributeSource" />.
+ /// The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream(AttributeSource source, int precisionStep):base(source)
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// <summary> Expert: Creates a token stream for numeric values with the specified
+ /// <c>precisionStep</c> using the given
+ /// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />.
+ /// The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream(AttributeFactory factory, int precisionStep):base(factory)
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>long</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetLongValue(long value_Renamed)
+ {
+ this.value_Renamed = value_Renamed;
+ valSize = 64;
+ shift = 0;
+ return this;
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>int</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetIntValue(int value_Renamed)
+ {
+ this.value_Renamed = (long) value_Renamed;
+ valSize = 32;
+ shift = 0;
+ return this;
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>double</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetDoubleValue(double value_Renamed)
+ {
+ this.value_Renamed = NumericUtils.DoubleToSortableLong(value_Renamed);
+ valSize = 64;
+ shift = 0;
+ return this;
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>float</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetFloatValue(float value_Renamed)
+ {
+ this.value_Renamed = (long) NumericUtils.FloatToSortableInt(value_Renamed);
+ valSize = 32;
+ shift = 0;
+ return this;
+ }
+
+ // @Override
+ public override void Reset()
+ {
+ if (valSize == 0)
+ throw new System.SystemException("call set???Value() before usage");
+ shift = 0;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+
+ // @Override
+ public override bool IncrementToken()
+ {
+ if (valSize == 0)
+ throw new System.SystemException("call set???Value() before usage");
+ if (shift >= valSize)
+ return false;
+
+ ClearAttributes();
+ char[] buffer;
+ switch (valSize)
+ {
+
+ case 64:
+ buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
+ termAtt.SetTermLength(NumericUtils.LongToPrefixCoded(value_Renamed, shift, buffer));
+ break;
+
+
+ case 32:
+ buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_INT);
+ termAtt.SetTermLength(NumericUtils.IntToPrefixCoded((int) value_Renamed, shift, buffer));
+ break;
+
+
+ default:
+ // should not happen
+ throw new System.ArgumentException("valSize must be 32 or 64");
+
+ }
+
+ typeAtt.Type = (shift == 0)?TOKEN_TYPE_FULL_PREC:TOKEN_TYPE_LOWER_PREC;
+ posIncrAtt.PositionIncrement = (shift == 0)?1:0;
+ shift += precisionStep;
+ return true;
+ }
+
+ // @Override
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder("(numeric,valSize=").Append(valSize);
+ sb.Append(",precisionStep=").Append(precisionStep).Append(')');
+ return sb.ToString();
+ }
+
+ // members
+ private ITermAttribute termAtt;
+ private ITypeAttribute typeAtt;
+ private IPositionIncrementAttribute posIncrAtt;
+
+ private int shift = 0, valSize = 0; // valSize==0 means not initialized
+ private readonly int precisionStep;
+
+ private long value_Renamed = 0L;
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/PerFieldAnalyzerWrapper.cs b/src/core/Analysis/PerFieldAnalyzerWrapper.cs
new file mode 100644
index 0000000..b1c43aa
--- /dev/null
+++ b/src/core/Analysis/PerFieldAnalyzerWrapper.cs
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> This analyzer is used to facilitate scenarios where different
+ /// fields require different analysis techniques. Use <see cref="AddAnalyzer" />
+ /// to add a non-default analyzer on a field name basis.
+ ///
+ /// <p/>Example usage:
+ ///
+ /// <code>
+ /// PerFieldAnalyzerWrapper aWrapper =
+ /// new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+ /// aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+ /// aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+ /// </code>
+ ///
+ /// <p/>In this example, StandardAnalyzer will be used for all fields except "firstname"
+ /// and "lastname", for which KeywordAnalyzer will be used.
+ ///
+ /// <p/>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+ /// and query parsing.
+ /// </summary>
+ public class PerFieldAnalyzerWrapper:Analyzer
+ {
+ private readonly Analyzer defaultAnalyzer;
+ private readonly IDictionary<string, Analyzer> analyzerMap = new HashMap<string, Analyzer>();
+
+
+ /// <summary> Constructs with default analyzer.
+ ///
+ /// </summary>
+ /// <param name="defaultAnalyzer">Any fields not specifically
+ /// defined to use a different analyzer will use the one provided here.
+ /// </param>
+ public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer)
+ : this(defaultAnalyzer, null)
+ {
+ }
+
+ /// <summary> Constructs with default analyzer and a map of analyzers to use for
+ /// specific fields.
+ ///
+ /// </summary>
+ /// <param name="defaultAnalyzer">Any fields not specifically
+ /// defined to use a different analyzer will use the one provided here.
+ /// </param>
+ /// <param name="fieldAnalyzers">a Map (String field name to the Analyzer) to be
+ /// used for those fields
+ /// </param>
+ public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IEnumerable<KeyValuePair<string, Analyzer>> fieldAnalyzers)
+ {
+ this.defaultAnalyzer = defaultAnalyzer;
+ if (fieldAnalyzers != null)
+ {
+ foreach(var entry in fieldAnalyzers)
+ analyzerMap[entry.Key] = entry.Value;
+ }
+ SetOverridesTokenStreamMethod<PerFieldAnalyzerWrapper>();
+ }
+
+
+ /// <summary> Defines an analyzer to use for the specified field.
+ ///
+ /// </summary>
+ /// <param name="fieldName">field name requiring a non-default analyzer
+ /// </param>
+ /// <param name="analyzer">non-default analyzer to use for field
+ /// </param>
+ public virtual void AddAnalyzer(System.String fieldName, Analyzer analyzer)
+ {
+ analyzerMap[fieldName] = analyzer;
+ }
+
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+
+ return analyzer.TokenStream(fieldName, reader);
+ }
+
+ public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+
+ return analyzer.ReusableTokenStream(fieldName, reader);
+ }
+
+ /// <summary>Return the positionIncrementGap from the analyzer assigned to fieldName </summary>
+ public override int GetPositionIncrementGap(string fieldName)
+ {
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+ return analyzer.GetPositionIncrementGap(fieldName);
+ }
+
+ /// <summary> Return the offsetGap from the analyzer assigned to field </summary>
+ public override int GetOffsetGap(Documents.IFieldable field)
+ {
+ Analyzer analyzer = analyzerMap[field.Name] ?? defaultAnalyzer;
+ return analyzer.GetOffsetGap(field);
+ }
+
+ public override System.String ToString()
+ {
+ // {{Aroush-2.9}} will 'analyzerMap.ToString()' work in the same way as Java's java.util.HashMap.toString()?
+ return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/PorterStemFilter.cs b/src/core/Analysis/PorterStemFilter.cs
new file mode 100644
index 0000000..b7f1dbf
--- /dev/null
+++ b/src/core/Analysis/PorterStemFilter.cs
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>Transforms the token stream as per the Porter stemming algorithm.
+ /// Note: the input to the stemming filter must already be in lower case,
+ /// so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+ /// down the Tokenizer chain in order for this to work properly!
+ /// <p/>
+ /// To use this filter with other analyzers, you'll want to write an
+ /// Analyzer class that sets up the TokenStream chain as you want it.
+ /// To use this with LowerCaseTokenizer, for example, you'd write an
+ /// analyzer like this:
+ /// <p/>
+ /// <code>
+ /// class MyAnalyzer extends Analyzer {
+ /// public final TokenStream tokenStream(String fieldName, Reader reader) {
+ /// return new PorterStemFilter(new LowerCaseTokenizer(reader));
+ /// }
+ /// }
+ /// </code>
+ /// </summary>
+ public sealed class PorterStemFilter:TokenFilter
+ {
+ private readonly PorterStemmer stemmer;
+ private readonly ITermAttribute termAtt;
+
+ public PorterStemFilter(TokenStream in_Renamed):base(in_Renamed)
+ {
+ stemmer = new PorterStemmer();
+ termAtt = AddAttribute<ITermAttribute>();
+ }
+
+ public override bool IncrementToken()
+ {
+ if (!input.IncrementToken())
+ return false;
+
+ if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength()))
+ termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/PorterStemmer.cs b/src/core/Analysis/PorterStemmer.cs
new file mode 100644
index 0000000..f47c5a7
--- /dev/null
+++ b/src/core/Analysis/PorterStemmer.cs
@@ -0,0 +1,746 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+Porter stemmer in Java. The original paper is in
+
+Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+no. 3, pp 130-137,
+
+See also http://www.tartarus.org/~martin/PorterStemmer/index.html
+
+Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+is then out outside the bounds of b.
+
+Similarly,
+
+Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+b[j] is then outside the bounds of b.
+
+Release 3.
+
+[ This version is derived from Release 3, modified by Brian Goetz to
+optimize for fewer object creations. ]
+*/
+using System;
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>
+ /// Stemmer, implementing the Porter Stemming Algorithm
+ ///
+ /// The Stemmer class transforms a word into its root form. The input
+ /// word can be provided a character at time (by calling add()), or at once
+ /// by calling one of the various stem(something) methods.
+ /// </summary>
+
+ class PorterStemmer
+ {
+ private char[] b;
+ private int i, j, k, k0;
+ private bool dirty = false;
+ private const int INC = 50; /* unit of size whereby b is increased */
+ private const int EXTRA = 1;
+
+ public PorterStemmer()
+ {
+ b = new char[INC];
+ i = 0;
+ }
+
+ /// <summary> reset() resets the stemmer so it can stem another word. If you invoke
+ /// the stemmer by calling add(char) and then stem(), you must call reset()
+ /// before starting another word.
+ /// </summary>
+ public virtual void Reset()
+ {
+ i = 0; dirty = false;
+ }
+
+ /// <summary> Add a character to the word being stemmed. When you are finished
+ /// adding characters, you can call stem(void) to process the word.
+ /// </summary>
+ public virtual void Add(char ch)
+ {
+ if (b.Length <= i + EXTRA)
+ {
+ var new_b = new char[b.Length + INC];
+ Array.Copy(b, 0, new_b, 0, b.Length);
+ b = new_b;
+ }
+ b[i++] = ch;
+ }
+
+ /// <summary> After a word has been stemmed, it can be retrieved by toString(),
+ /// or a reference to the internal buffer can be retrieved by getResultBuffer
+ /// and getResultLength (which is generally more efficient.)
+ /// </summary>
+ public override System.String ToString()
+ {
+ return new System.String(b, 0, i);
+ }
+
+ /// <summary> Returns the length of the word resulting from the stemming process.</summary>
+ public virtual int ResultLength
+ {
+ get { return i; }
+ }
+
+ /// <summary> Returns a reference to a character buffer containing the results of
+ /// the stemming process. You also need to consult getResultLength()
+ /// to determine the length of the result.
+ /// </summary>
+ public virtual char[] ResultBuffer
+ {
+ get { return b; }
+ }
+
+ /* cons(i) is true <=> b[i] is a consonant. */
+
+ private bool Cons(int i)
+ {
+ switch (b[i])
+ {
+
+ case 'a':
+ case 'e':
+ case 'i':
+ case 'o':
+ case 'u':
+ return false;
+
+ case 'y':
+ return (i == k0)?true:!Cons(i - 1);
+
+ default:
+ return true;
+
+ }
+ }
+
+ /* m() measures the number of consonant sequences between k0 and j. if c is
+ a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+ presence,
+
+ <c><v> gives 0
+ <c>vc<v> gives 1
+ <c>vcvc<v> gives 2
+ <c>vcvcvc<v> gives 3
+ ....
+ */
+
+ private int M()
+ {
+ int n = 0;
+ int i = k0;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ while (true)
+ {
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ n++;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ }
+ }
+
+ /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+ private bool Vowelinstem()
+ {
+ int i;
+ for (i = k0; i <= j; i++)
+ if (!Cons(i))
+ return true;
+ return false;
+ }
+
+ /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+ private bool Doublec(int j)
+ {
+ if (j < k0 + 1)
+ return false;
+ if (b[j] != b[j - 1])
+ return false;
+ return Cons(j);
+ }
+
+ /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+ and also if the second c is not w,x or y. this is used when trying to
+ restore an e at the end of a short word. e.g.
+
+ cav(e), lov(e), hop(e), crim(e), but
+ snow, box, tray.
+
+ */
+
+ private bool Cvc(int i)
+ {
+ if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
+ return false;
+ else
+ {
+ int ch = b[i];
+ if (ch == 'w' || ch == 'x' || ch == 'y')
+ return false;
+ }
+ return true;
+ }
+
+ private bool Ends(System.String s)
+ {
+ int l = s.Length;
+ int o = k - l + 1;
+ if (o < k0)
+ return false;
+ for (int i = 0; i < l; i++)
+ if (b[o + i] != s[i])
+ return false;
+ j = k - l;
+ return true;
+ }
+
+ /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+ k. */
+
+ internal virtual void Setto(System.String s)
+ {
+ int l = s.Length;
+ int o = j + 1;
+ for (int i = 0; i < l; i++)
+ b[o + i] = s[i];
+ k = j + l;
+ dirty = true;
+ }
+
+ /* r(s) is used further down. */
+
+ internal virtual void R(System.String s)
+ {
+ if (M() > 0)
+ Setto(s);
+ }
+
+ /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+ caresses -> caress
+ ponies -> poni
+ ties -> ti
+ caress -> caress
+ cats -> cat
+
+ feed -> feed
+ agreed -> agree
+ disabled -> disable
+
+ matting -> mat
+ mating -> mate
+ meeting -> meet
+ milling -> mill
+ messing -> mess
+
+ meetings -> meet
+
+ */
+
+ private void Step1()
+ {
+ if (b[k] == 's')
+ {
+ if (Ends("sses"))
+ k -= 2;
+ else if (Ends("ies"))
+ Setto("i");
+ else if (b[k - 1] != 's')
+ k--;
+ }
+ if (Ends("eed"))
+ {
+ if (M() > 0)
+ k--;
+ }
+ else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
+ {
+ k = j;
+ if (Ends("at"))
+ Setto("ate");
+ else if (Ends("bl"))
+ Setto("ble");
+ else if (Ends("iz"))
+ Setto("ize");
+ else if (Doublec(k))
+ {
+ int ch = b[k--];
+ if (ch == 'l' || ch == 's' || ch == 'z')
+ k++;
+ }
+ else if (M() == 1 && Cvc(k))
+ Setto("e");
+ }
+ }
+
+ /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+ private void Step2()
+ {
+ if (Ends("y") && Vowelinstem())
+ {
+ b[k] = 'i';
+ dirty = true;
+ }
+ }
+
+ /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+ -ation) maps to -ize etc. note that the string before the suffix must give
+ m() > 0. */
+
+ private void Step3()
+ {
+ if (k == k0)
+ return ; /* For Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("ational"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("tional"))
+ {
+ R("tion"); break;
+ }
+ break;
+
+ case 'c':
+ if (Ends("enci"))
+ {
+ R("ence"); break;
+ }
+ if (Ends("anci"))
+ {
+ R("ance"); break;
+ }
+ break;
+
+ case 'e':
+ if (Ends("izer"))
+ {
+ R("ize"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("bli"))
+ {
+ R("ble"); break;
+ }
+ if (Ends("alli"))
+ {
+ R("al"); break;
+ }
+ if (Ends("entli"))
+ {
+ R("ent"); break;
+ }
+ if (Ends("eli"))
+ {
+ R("e"); break;
+ }
+ if (Ends("ousli"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 'o':
+ if (Ends("ization"))
+ {
+ R("ize"); break;
+ }
+ if (Ends("ation"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("ator"))
+ {
+ R("ate"); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("alism"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iveness"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("fulness"))
+ {
+ R("ful"); break;
+ }
+ if (Ends("ousness"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 't':
+ if (Ends("aliti"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iviti"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("biliti"))
+ {
+ R("ble"); break;
+ }
+ break;
+
+ case 'g':
+ if (Ends("logi"))
+ {
+ R("log"); break;
+ }
+ break;
+ }
+ }
+
+ /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+ private void Step4()
+ {
+ switch (b[k])
+ {
+
+ case 'e':
+ if (Ends("icate"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ative"))
+ {
+ R(""); break;
+ }
+ if (Ends("alize"))
+ {
+ R("al"); break;
+ }
+ break;
+
+ case 'i':
+ if (Ends("iciti"))
+ {
+ R("ic"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("ical"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ful"))
+ {
+ R(""); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("ness"))
+ {
+ R(""); break;
+ }
+ break;
+ }
+ }
+
+ /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+ private void Step5()
+ {
+ if (k == k0)
+ return ; /* for Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("al"))
+ break;
+ return ;
+
+ case 'c':
+ if (Ends("ance"))
+ break;
+ if (Ends("ence"))
+ break;
+ return ;
+
+ case 'e':
+ if (Ends("er"))
+ break; return ;
+
+ case 'i':
+ if (Ends("ic"))
+ break; return ;
+
+ case 'l':
+ if (Ends("able"))
+ break;
+ if (Ends("ible"))
+ break; return ;
+
+ case 'n':
+ if (Ends("ant"))
+ break;
+ if (Ends("ement"))
+ break;
+ if (Ends("ment"))
+ break;
+ /* element etc. not stripped before the m */
+ if (Ends("ent"))
+ break;
+ return ;
+
+ case 'o':
+ if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+ break;
+ /* j >= 0 fixes Bug 2 */
+ if (Ends("ou"))
+ break;
+ return ;
+ /* takes care of -ous */
+
+ case 's':
+ if (Ends("ism"))
+ break;
+ return ;
+
+ case 't':
+ if (Ends("ate"))
+ break;
+ if (Ends("iti"))
+ break;
+ return ;
+
+ case 'u':
+ if (Ends("ous"))
+ break;
+ return ;
+
+ case 'v':
+ if (Ends("ive"))
+ break;
+ return ;
+
+ case 'z':
+ if (Ends("ize"))
+ break;
+ return ;
+
+ default:
+ return ;
+
+ }
+ if (M() > 1)
+ k = j;
+ }
+
+ /* step6() removes a final -e if m() > 1. */
+
+ private void Step6()
+ {
+ j = k;
+ if (b[k] == 'e')
+ {
+ int a = M();
+ if (a > 1 || a == 1 && !Cvc(k - 1))
+ k--;
+ }
+ if (b[k] == 'l' && Doublec(k) && M() > 1)
+ k--;
+ }
+
+
+ /// <summary> Stem a word provided as a String. Returns the result as a String.</summary>
+ public virtual System.String Stem(System.String s)
+ {
+ if (Stem(s.ToCharArray(), s.Length))
+ {
+ return ToString();
+ }
+ else
+ return s;
+ }
+
+ /// <summary>Stem a word contained in a char[]. Returns true if the stemming process
+ /// resulted in a word different from the input. You can retrieve the
+ /// result with getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] word)
+ {
+ return Stem(word, word.Length);
+ }
+
+ /// <summary>Stem a word contained in a portion of a char[] array. Returns
+ /// true if the stemming process resulted in a word different from
+ /// the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
+ {
+ Reset();
+ if (b.Length < wordLen)
+ {
+ var new_b = new char[wordLen + EXTRA];
+ b = new_b;
+ }
+ Array.Copy(wordBuffer, offset, b, 0, wordLen);
+ i = wordLen;
+ return Stem(0);
+ }
+
+ /// <summary>Stem a word contained in a leading portion of a char[] array.
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] word, int wordLen)
+ {
+ return Stem(word, 0, wordLen);
+ }
+
+ /// <summary>Stem the word placed into the Stemmer buffer through calls to add().
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem()
+ {
+ return Stem(0);
+ }
+
+ public virtual bool Stem(int i0)
+ {
+ k = i - 1;
+ k0 = i0;
+ if (k > k0 + 1)
+ {
+ Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
+ }
+ // Also, a word is considered dirty if we lopped off letters
+ // Thanks to Ifigenia Vairelles for pointing this out.
+ if (i != k + 1)
+ dirty = true;
+ i = k + 1;
+ return dirty;
+ }
+
+ /// <summary>Test program for demonstrating the Stemmer. It reads a file and
+ /// stems each word, writing the result to standard out.
+ /// Usage: Stemmer file-name
+ /// </summary>
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ var s = new PorterStemmer();
+
+ for (int i = 0; i < args.Length; i++)
+ {
+ try
+ {
+ System.IO.Stream in_Renamed = new System.IO.FileStream(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read);
+ var buffer = new byte[1024];
+
+ int bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ int offset = 0;
+ s.Reset();
+
+ while (true)
+ {
+ int ch;
+ if (offset < bufferLen)
+ ch = buffer[offset++];
+ else
+ {
+ bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ offset = 0;
+ if (bufferLen < 0)
+ ch = - 1;
+ else
+ ch = buffer[offset++];
+ }
+
+ if (Char.IsLetter((char) ch))
+ {
+ s.Add(Char.ToLower((char) ch));
+ }
+ else
+ {
+ s.Stem();
+ Console.Out.Write(s.ToString());
+ s.Reset();
+ if (ch < 0)
+ break;
+ else
+ {
+ System.Console.Out.Write((char) ch);
+ }
+ }
+ }
+
+ in_Renamed.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ Console.Out.WriteLine("error reading " + args[i]);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/SimpleAnalyzer.cs b/src/core/Analysis/SimpleAnalyzer.cs
new file mode 100644
index 0000000..b84f470
--- /dev/null
+++ b/src/core/Analysis/SimpleAnalyzer.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>An <see cref="Analyzer" /> that filters <see cref="LetterTokenizer" />
+ /// with <see cref="LowerCaseFilter" />
+ /// </summary>
+
+ public sealed class SimpleAnalyzer : Analyzer
+ {
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new LowerCaseTokenizer(reader);
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var tokenizer = (Tokenizer) PreviousTokenStream;
+ if (tokenizer == null)
+ {
+ tokenizer = new LowerCaseTokenizer(reader);
+ PreviousTokenStream = tokenizer;
+ }
+ else
+ tokenizer.Reset(reader);
+ return tokenizer;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardAnalyzer.cs b/src/core/Analysis/Standard/StandardAnalyzer.cs
new file mode 100644
index 0000000..347d026
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardAnalyzer.cs
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+ /// <summary> Filters <see cref="StandardTokenizer" /> with <see cref="StandardFilter" />,
+ /// <see cref="LowerCaseFilter" /> and <see cref="StopFilter" />, using a list of English stop
+ /// words.
+ ///
+ /// <a name="version"/>
+ /// <p/>
+ /// You must specify the required <see cref="Version" /> compatibility when creating
+ /// StandardAnalyzer:
+ /// <list type="bullet">
+ /// <item>As of 2.9, StopFilter preserves position increments</item>
+ /// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+ /// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>)</item>
+ /// </list>
+ /// </summary>
+ public class StandardAnalyzer : Analyzer
+ {
+ private ISet<string> stopSet;
+
+ /// <summary> Specifies whether deprecated acronyms should be replaced with HOST type.
+ /// See <a href="https://issues.apache.org/jira/browse/LUCENE-1068">https://issues.apache.org/jira/browse/LUCENE-1068</a>
+ /// </summary>
+ private bool replaceInvalidAcronym, enableStopPositionIncrements;
+
+ /// <summary>An unmodifiable set containing some common English words that are usually not
+ /// useful for searching.
+ /// </summary>
+ public static readonly ISet<string> STOP_WORDS_SET;
+ private Version matchVersion;
+
+ /// <summary>Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET" />).
+ /// </summary>
+ /// <param name="matchVersion">Lucene version to match see <see cref="Version">above</see></param>
+ public StandardAnalyzer(Version matchVersion)
+ : this(matchVersion, STOP_WORDS_SET)
+ { }
+
+ /// <summary>Builds an analyzer with the given stop words.</summary>
+ /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
+ ///
+ /// </param>
+ /// <param name="stopWords">stop words
+ /// </param>
+ public StandardAnalyzer(Version matchVersion, ISet<string> stopWords)
+ {
+ stopSet = stopWords;
+ SetOverridesTokenStreamMethod<StandardAnalyzer>();
+ enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ replaceInvalidAcronym = matchVersion.OnOrAfter(Version.LUCENE_24);
+ this.matchVersion = matchVersion;
+ }
+
+ /// <summary>Builds an analyzer with the stop words from the given file.</summary>
+ /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
+ /// </seealso>
+ /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
+ ///
+ /// </param>
+ /// <param name="stopwords">File to read stop words from
+ /// </param>
+ public StandardAnalyzer(Version matchVersion, System.IO.FileInfo stopwords)
+ : this (matchVersion, WordlistLoader.GetWordSet(stopwords))
+ {
+ }
+
+ /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
+ /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
+ /// </seealso>
+ /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
+ ///
+ /// </param>
+ /// <param name="stopwords">Reader to read stop words from
+ /// </param>
+ public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
+ : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
+ { }
+
+ /// <summary>Constructs a <see cref="StandardTokenizer" /> filtered by a <see cref="StandardFilter" />
+ ///, a <see cref="LowerCaseFilter" /> and a <see cref="StopFilter" />.
+ /// </summary>
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
+ tokenStream.MaxTokenLength = maxTokenLength;
+ TokenStream result = new StandardFilter(tokenStream);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+ return result;
+ }
+
+ private sealed class SavedStreams
+ {
+ internal StandardTokenizer tokenStream;
+ internal TokenStream filteredTokenStream;
+ }
+
+ /// <summary>Default maximum allowed token length </summary>
+ public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+ private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+ /// <summary> Set maximum allowed token length. If a token is seen
+ /// that exceeds this length then it is discarded. This
+ /// setting only takes effect the next time tokenStream or
+ /// reusableTokenStream is called.
+ /// </summary>
+ public virtual int MaxTokenLength
+ {
+ get { return maxTokenLength; }
+ set { maxTokenLength = value; }
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ SavedStreams streams = (SavedStreams) PreviousTokenStream;
+ if (streams == null)
+ {
+ streams = new SavedStreams();
+ PreviousTokenStream = streams;
+ streams.tokenStream = new StandardTokenizer(matchVersion, reader);
+ streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
+ streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
+ streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
+ streams.filteredTokenStream, stopSet);
+ }
+ else
+ {
+ streams.tokenStream.Reset(reader);
+ }
+ streams.tokenStream.MaxTokenLength = maxTokenLength;
+
+ streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);
+
+ return streams.filteredTokenStream;
+ }
+ static StandardAnalyzer()
+ {
+ STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardFilter.cs b/src/core/Analysis/Standard/StandardFilter.cs
new file mode 100644
index 0000000..fd13261
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardFilter.cs
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Token = Lucene.Net.Analysis.Token;
+using TokenFilter = Lucene.Net.Analysis.TokenFilter;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+ /// <summary>Normalizes tokens extracted with <see cref="StandardTokenizer" />. </summary>
+
+ public sealed class StandardFilter:TokenFilter
+ {
+
+
+ /// <summary>Construct filtering <i>in</i>. </summary>
+ public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
+ {
+ termAtt = AddAttribute<ITermAttribute>();
+ typeAtt = AddAttribute<ITypeAttribute>();
+ }
+
+ private static readonly System.String APOSTROPHE_TYPE;
+ private static readonly System.String ACRONYM_TYPE;
+
+ // this filters uses attribute type
+ private ITypeAttribute typeAtt;
+ private ITermAttribute termAtt;
+
+ /// <summary>Returns the next token in the stream, or null at EOS.
+ /// <p/>Removes <tt>'s</tt> from the end of words.
+ /// <p/>Removes dots from acronyms.
+ /// </summary>
+ public override bool IncrementToken()
+ {
+ if (!input.IncrementToken())
+ {
+ return false;
+ }
+
+ char[] buffer = termAtt.TermBuffer();
+ int bufferLength = termAtt.TermLength();
+ System.String type = typeAtt.Type;
+
+ if ((System.Object) type == (System.Object) APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
+ {
+ // Strip last 2 characters off
+ termAtt.SetTermLength(bufferLength - 2);
+ }
+ else if ((System.Object) type == (System.Object) ACRONYM_TYPE)
+ {
+ // remove dots
+ int upto = 0;
+ for (int i = 0; i < bufferLength; i++)
+ {
+ char c = buffer[i];
+ if (c != '.')
+ buffer[upto++] = c;
+ }
+ termAtt.SetTermLength(upto);
+ }
+
+ return true;
+ }
+ static StandardFilter()
+ {
+ APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
+ ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardTokenizer.cs b/src/core/Analysis/Standard/StandardTokenizer.cs
new file mode 100644
index 0000000..dca409d
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardTokenizer.cs
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using CharReader = Lucene.Net.Analysis.CharReader;
+using Token = Lucene.Net.Analysis.Token;
+using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+ /// <summary>A grammar-based tokenizer constructed with JFlex
+ ///
+ /// <p/> This should be a good tokenizer for most European-language documents:
+ ///
+ /// <list type="bullet">
+ /// <item>Splits words at punctuation characters, removing punctuation. However, a
+ /// dot that's not followed by whitespace is considered part of a token.</item>
+ /// <item>Splits words at hyphens, unless there's a number in the token, in which case
+ /// the whole token is interpreted as a product number and is not split.</item>
+ /// <item>Recognizes email addresses and internet hostnames as one token.</item>
+ /// </list>
+ ///
+ /// <p/>Many applications have specific tokenizer needs. If this tokenizer does
+ /// not suit your application, please consider copying this source code
+ /// directory to your project and maintaining your own grammar-based tokenizer.
+ ///
+ /// <a name="version"/>
+ /// <p/>
+ /// You must specify the required <see cref="Version" /> compatibility when creating
+ /// StandardAnalyzer:
+ /// <list type="bullet">
+ /// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+ /// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a></item>
+ /// </list>
+ /// </summary>
+
+ public sealed class StandardTokenizer:Tokenizer
+ {
+ private void InitBlock()
+ {
+ maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+ }
+ /// <summary>A private instance of the JFlex-constructed scanner </summary>
+ private StandardTokenizerImpl scanner;
+
+ public const int ALPHANUM = 0;
+ public const int APOSTROPHE = 1;
+ public const int ACRONYM = 2;
+ public const int COMPANY = 3;
+ public const int EMAIL = 4;
+ public const int HOST = 5;
+ public const int NUM = 6;
+ public const int CJ = 7;
+
+ /// <deprecated> this solves a bug where HOSTs that end with '.' are identified
+ /// as ACRONYMs.
+ /// </deprecated>
+ [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs.")]
+ public const int ACRONYM_DEP = 8;
+
+ /// <summary>String token types that correspond to token type int constants </summary>
+ public static readonly System.String[] TOKEN_TYPES = new System.String[]{"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
+
+ private bool replaceInvalidAcronym;
+
+ private int maxTokenLength;
+
+ /// <summary>Set the max allowed token length. Any token longer
+ /// than this is skipped.
+ /// </summary>
+ public int MaxTokenLength
+ {
+ get { return maxTokenLength; }
+ set { this.maxTokenLength = value; }
+ }
+
+ /// <summary> Creates a new instance of the
+ /// <see cref="Lucene.Net.Analysis.Standard.StandardTokenizer" />. Attaches
+ /// the <c>input</c> to the newly created JFlex scanner.
+ ///
+ /// </summary>
+ /// <param name="matchVersion"></param>
+ /// <param name="input">The input reader
+ ///
+ /// See http://issues.apache.org/jira/browse/LUCENE-1068
+ /// </param>
+ public StandardTokenizer(Version matchVersion, System.IO.TextReader input):base()
+ {
+ InitBlock();
+ this.scanner = new StandardTokenizerImpl(input);
+ Init(input, matchVersion);
+ }
+
+ /// <summary> Creates a new StandardTokenizer with a given <see cref="AttributeSource" />.</summary>
+ public StandardTokenizer(Version matchVersion, AttributeSource source, System.IO.TextReader input):base(source)
+ {
+ InitBlock();
+ this.scanner = new StandardTokenizerImpl(input);
+ Init(input, matchVersion);
+ }
+
+ /// <summary> Creates a new StandardTokenizer with a given
+ /// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />
+ /// </summary>
+ public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input):base(factory)
+ {
+ InitBlock();
+ this.scanner = new StandardTokenizerImpl(input);
+ Init(input, matchVersion);
+ }
+
+ private void Init(System.IO.TextReader input, Version matchVersion)
+ {
+ if (matchVersion.OnOrAfter(Version.LUCENE_24))
+ {
+ replaceInvalidAcronym = true;
+ }
+ else
+ {
+ replaceInvalidAcronym = false;
+ }
+ this.input = input;
+ termAtt = AddAttribute<ITermAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ typeAtt = AddAttribute<ITypeAttribute>();
+ }
+
+ // this tokenizer generates three attributes:
+ // offset, positionIncrement and type
+ private ITermAttribute termAtt;
+ private IOffsetAttribute offsetAtt;
+ private IPositionIncrementAttribute posIncrAtt;
+ private ITypeAttribute typeAtt;
+
+ ///<summary>
+ /// (non-Javadoc)
+ /// <see cref="Lucene.Net.Analysis.TokenStream.IncrementToken()" />
+ ///</summary>
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ int posIncr = 1;
+
+ while (true)
+ {
+ int tokenType = scanner.GetNextToken();
+
+ if (tokenType == StandardTokenizerImpl.YYEOF)
+ {
+ return false;
+ }
+
+ if (scanner.Yylength() <= maxTokenLength)
+ {
+ posIncrAtt.PositionIncrement = posIncr;
+ scanner.GetText(termAtt);
+ int start = scanner.Yychar();
+ offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength()));
+ // This 'if' should be removed in the next release. For now, it converts
+ // invalid acronyms to HOST. When removed, only the 'else' part should
+ // remain.
+ if (tokenType == StandardTokenizerImpl.ACRONYM_DEP)
+ {
+ if (replaceInvalidAcronym)
+ {
+ typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST];
+ termAtt.SetTermLength(termAtt.TermLength() - 1); // remove extra '.'
+ }
+ else
+ {
+ typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+ }
+ }
+ else
+ {
+ typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[tokenType];
+ }
+ return true;
+ }
+ // When we skip a too-long term, we still increment the
+ // position increment
+ else
+ posIncr++;
+ }
+ }
+
+ public override void End()
+ {
+ // set final offset
+ int finalOffset = CorrectOffset(scanner.Yychar() + scanner.Yylength());
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset(System.IO.TextReader reader)
+ {
+ base.Reset(reader);
+ scanner.Reset(reader);
+ }
+
+ /// <summary>
+ /// Remove in 3.X and make true the only valid value
+ /// See https://issues.apache.org/jira/browse/LUCENE-1068
+ /// </summary>
+ /// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms as HOST.
+ /// </param>
+ [Obsolete("Remove in 3.X and make true the only valid value. See https://issues.apache.org/jira/browse/LUCENE-1068")]
+ public void SetReplaceInvalidAcronym(bool replaceInvalidAcronym)
+ {
+ this.replaceInvalidAcronym = replaceInvalidAcronym;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Standard/StandardTokenizerImpl.cs b/src/core/Analysis/Standard/StandardTokenizerImpl.cs
new file mode 100644
index 0000000..cb4bf5f
--- /dev/null
+++ b/src/core/Analysis/Standard/StandardTokenizerImpl.cs
@@ -0,0 +1,707 @@
+/* The following code was generated by JFlex 1.4.1 on 9/4/08 6:49 PM */
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate the tokenizer,
+ the tokenizer, only use Java 1.4 !!!
+ This grammar currently uses constructs (eg :digit:, :letter:) whose
+ meaning can vary according to the JRE used to run jflex. See
+ https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+ For current backwards compatibility it is needed to support
+ only Java 1.4 - this will change in Lucene 3.1.
+*/
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+
+ /// <summary> This class is a scanner generated by
+ /// <a href="http://www.jflex.de/">JFlex</a> 1.4.1
+ /// on 9/4/08 6:49 PM from the specification file
+ /// <tt>/tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
+ /// </summary>
+ class StandardTokenizerImpl
+ {
+
+ /// <summary>This character denotes the end of file </summary>
+ public const int YYEOF = - 1;
+
+ /// <summary>initial size of the lookahead buffer </summary>
+ private const int ZZ_BUFFERSIZE = 16384;
+
+ /// <summary>lexical states </summary>
+ public const int YYINITIAL = 0;
+
+ /// <summary> Translates characters to character classes</summary>
+ private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0000\x0001\x000C\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0005" + "\x0001\x0003\x0004\x0000\x0001\x0009\x0001\x0007\x0001\x0004\x0001\x0009\x000A\x0002\x0006\x0000\x0001\x0006\x001A\x000A" + "\x0004\x0000\x0001\x0008\x0001\x0000\x001A\x000A\x002F\x0000\x0001\x000A\x000A\x0000\x0001\x000A\x0004\x0000\x0001\x000A" + "\x0005\x0000\x0017\x000A\x0001\x0000\x001F\x000A\x0001\x0000\u0128\x000A\x0002\x0000\x0012\x000A\x001C\x0000\x005E\x000A" + "\x0002\x0000\x0009\x000A\x0002\x0000\x0007\x000A\x000E\x0000\x0002\x000A\x000E\x0000\x0005\x000A\x0009\x0000\x0001\x000A" + "\x008B\x0000\x0001\x000A\x000B\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0014\x000A" + "\x0001\x0000\x002C\x000A\x0001\x0000\x0008\x000A\x0002\x0000\x001A\x000A\x000C\x0000\x0082\x000A\x000A\x0000\x0039\x000A" + "\x0002\x0000\x0002\x000A\x0002\x0000\x0002\x000A\x0003\x0000\x0026\x000A\x0002\x0000\x0002\x000A\x0037\x0000\x0026\x000A" + "\x0002\x0000\x0001\x000A\x0007\x0000\x0027\x000A\x0048\x0000\x001B\x000A\x0005\x0000\x0003\x000A\x002E\x0000\x001A\x000A" + "\x0005\x0000\x000B\x000A\x0015\x0000\x000A\x0002\x0007\x0000\x0063\x000A\x0001\x0000\x0001\x000A\x000F\x0000\x0002\x000A" + "\x0009\x0000\x000A\x0002\x0003\x000A\x0013\x0000\x0001\x000A\x0001\x0000\x001B\x000A\x0053\x0000\x0026\x000A\u015f\x0000" + "\x0035\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x0007\x0000\x000A\x000A\x0004\x0000\x000A\x0002\x0015\x0000" + "\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0003\x0000" + "\x0004\x000A\x0022\x0000\x0002\x000A\x0001\x0000\x0003\x000A\x0004\x0000\x000A\x0002\x0002\x000A\x0013\x0000\x0006\x000A" + "\x0004\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0002\x000A\x0001\x0000\x0002\x000A" +
+ "\x0001\x0000\x0002\x000A\x001F\x0000\x0004\x000A\x0001\x0000\x0001\x000A\x0007\x0000\x000A\x0002\x0002\x0000\x0003\x000A" + "\x0010\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x0005\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x000F\x0000\x0001\x000A" + "\x0005\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0002\x0000\x0004\x000A\x0003\x0000\x0001\x000A\x001E\x0000\x0002\x000A\x0001\x0000\x0003\x000A" + "\x0004\x0000\x000A\x0002\x0015\x0000\x0006\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0004\x000A\x0003\x0000\x0002\x000A" + "\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A\x0003\x0000\x0002\x000A\x0003\x0000\x0003\x000A\x0003\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x002D\x0000\x0009\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0024\x0000\x0001\x000A" + "\x0001\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x0010\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0012\x000A\x0003\x0000\x0018\x000A" + "\x0001\x0000\x0009\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0007\x000A\x0039\x0000\x0001\x0001\x0030\x000A\x0001\x0001" + "\x0002\x000A\x000C\x0001\x0007\x000A\x0009\x0001\x000A\x0002\x0027\x0000\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000" + "\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0001\x000A\x0006\x0000\x0004\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0002\x000A\x0001\x0000\x0004\x000A\x0001\x0000" +
+ "\x0002\x000A\x0009\x0000\x0001\x000A\x0002\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0009\x0000\x000A\x0002\x0002\x0000" + "\x0002\x000A\x0022\x0000\x0001\x000A\x001F\x0000\x000A\x0002\x0016\x0000\x0008\x000A\x0001\x0000\x0022\x000A\x001D\x0000" + "\x0004\x000A\x0074\x0000\x0022\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0002\x000A\x0015\x0000\x000A\x0002\x0006\x0000" + "\x0006\x000A\x004A\x0000\x0026\x000A\x000A\x0000\x0027\x000A\x0009\x0000\x005A\x000A\x0005\x0000\x0044\x000A\x0005\x0000" + "\x0052\x000A\x0006\x0000\x0007\x000A\x0001\x0000\x003F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000" + "\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0027\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0004\x000A\x0002\x0000\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0017\x000A\x0001\x0000" + "\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0027\x000A\x0001\x0000" + "\x0013\x000A\x000E\x0000\x0009\x0002\x002E\x0000\x0055\x000A\x000C\x0000\u026c\x000A\x0002\x0000\x0008\x000A\x000A\x0000" + "\x001A\x000A\x0005\x0000\x004B\x000A\x0095\x0000\x0034\x000A\x002C\x0000\x000A\x0002\x0026\x0000\x000A\x0002\x0006\x0000" + "\x0058\x000A\x0008\x0000\x0029\x000A\u0557\x0000\x009C\x000A\x0004\x0000\x005A\x000A\x0006\x0000\x0016\x000A\x0002\x0000" + "\x0006\x000A\x0002\x0000\x0026\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0008\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x001F\x000A\x0002\x0000\x0035\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0003\x0000\x0004\x000A\x0002\x0000\x0006\x000A\x0004\x0000" + "\x000D\x000A\x0005\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0082\x0000\x0001\x000A\x0082\x0000\x0001\x000A\x0004\x0000" +
+ "\x0001\x000A\x0002\x0000\x000A\x000A\x0001\x0000\x0001\x000A\x0003\x0000\x0005\x000A\x0006\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0007\x000A\u0ecb\x0000" + "\x0002\x000A\x002A\x0000\x0005\x000A\x000A\x0000\x0001\x000B\x0054\x000B\x0008\x000B\x0002\x000B\x0002\x000B\x005A\x000B" + "\x0001\x000B\x0003\x000B\x0006\x000B\x0028\x000B\x0003\x000B\x0001\x0000\x005E\x000A\x0011\x0000\x0018\x000A\x0038\x0000" + "\x0010\x000B\u0100\x0000\x0080\x000B\x0080\x0000\u19b6\x000B\x000A\x000B\x0040\x0000\u51a6\x000B\x005A\x000B\u048d\x000A" + "\u0773\x0000\u2ba4\x000A\u215c\x0000\u012e\x000B\x00D2\x000B\x0007\x000A\x000C\x0000\x0005\x000A\x0005\x0000\x0001\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x000D\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x006C\x000A\x0021\x0000\u016b\x000A\x0012\x0000\x0040\x000A\x0002\x0000\x0036\x000A" + "\x0028\x0000\x000C\x000A\x0074\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0087\x000A\x0013\x0000\x000A\x0002" + "\x0007\x0000\x001A\x000A\x0006\x0000\x001A\x000A\x000A\x0000\x0001\x000B\x003A\x000B\x001F\x000A\x0003\x0000\x0006\x000A" + "\x0002\x0000\x0006\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0003\x000A\x0023\x0000";
+
+ /// <summary> Translates characters to character classes</summary>
+ private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
+
+ /// <summary> Translates DFA states to action switch labels.</summary>
+ private static readonly int[] ZZ_ACTION = ZzUnpackAction();
+
+ private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0003\x0002\x0001\x0003\x0001\x0001\x000B\x0000\x0001\x0002\x0003\x0004" + "\x0002\x0000\x0001\x0005\x0001\x0000\x0001\x0005\x0003\x0004\x0006\x0005\x0001\x0006\x0001\x0004" + "\x0002\x0007\x0001\x0008\x0001\x0000\x0001\x0008\x0003\x0000\x0002\x0008\x0001\x0009\x0001\x000A" + "\x0001\x0004";
+
+ private static int[] ZzUnpackAction()
+ {
+ int[] result = new int[51];
+ int offset = 0;
+ offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackAction(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int count = packed[i++];
+ int value_Renamed = packed[i++];
+ do
+ result[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /// <summary> Translates a state to a row index in the transition table</summary>
+ private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
+
+ private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000E\x0000\x001C\x0000\x002A\x0000\x0038\x0000\x000E\x0000\x0046\x0000\x0054" + "\x0000\x0062\x0000\x0070\x0000\x007E\x0000\x008C\x0000\x009A\x0000\x00A8\x0000\x00B6\x0000\x00C4" + "\x0000\x00D2\x0000\x00E0\x0000\x00EE\x0000\x00FC\x0000\u010a\x0000\u0118\x0000\u0126\x0000\u0134" + "\x0000\u0142\x0000\u0150\x0000\u015e\x0000\u016c\x0000\u017a\x0000\u0188\x0000\u0196\x0000\u01a4" + "\x0000\u01b2\x0000\u01c0\x0000\u01ce\x0000\u01dc\x0000\u01ea\x0000\u01f8\x0000\x00D2\x0000\u0206" + "\x0000\u0214\x0000\u0222\x0000\u0230\x0000\u023e\x0000\u024c\x0000\u025a\x0000\x0054\x0000\x008C" + "\x0000\u0268\x0000\u0276\x0000\u0284";
+
+ private static int[] ZzUnpackRowMap()
+ {
+ int[] result = new int[51];
+ int offset = 0;
+ offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackRowMap(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int high = packed[i++] << 16;
+ result[j++] = high | packed[i++];
+ }
+ return j;
+ }
+
+ /// <summary> The transition table of the DFA</summary>
+ private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
+
+ private const System.String ZZ_TRANS_PACKED_0 = "\x0001\x0002\x0001\x0003\x0001\x0004\x0007\x0002\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0002" + "\x000F\x0000\x0002\x0003\x0001\x0000\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x0003\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x0000\x0001\x000C\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x0004\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x000F\x0001\x0010" + "\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0010\x0000\x0001\x0002\x0001\x0000" + "\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0017" + "\x0004\x0000\x0001\x0018\x0001\x0019\x0007\x0000\x0001\x001A\x0005\x0000\x0001\x001B\x0007\x0000" + "\x0001\x000B\x0004\x0000\x0001\x001C\x0001\x001D\x0007\x0000\x0001\x001E\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0007\x0000\x0001\x0021\x0004\x0000\x0001\x0022\x0001\x0023\x0007\x0000\x0001\x0024" + "\x000D\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0026\x000D\x0000" + "\x0001\x0027\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0028\x0004\x0000\x0001\x0003\x0001\x0004" + "\x0001\x000F\x0001\x0008\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0004\x0000" + "\x0002\x0014\x0001\x0000\x0001\x0029\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014" + "\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x002B\x0001\x0000\x0001\x0009\x0002\x002C" + "\x0001\x002D\x0001\x0015\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x0029\x0001\x0000" + "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0001\x0000\x0001\x002E" + "\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0017\x0004\x0000\x0002\x0018\x0001\x0000\x0001\x002A" + "\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0018\x0004\x0000\x0001\x0018\x0001\x0019" + "\x0001\x0000\x0001\x002C\x0001\x0000\x0001\x0009\x0002\x002C\x0001\x002D\x0001\x0019\x0004\x0000" +
+ "\x0001\x0018\x0001\x0019\x0001\x0000\x0001\x002A\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000" + "\x0001\x001A\x0005\x0000\x0001\x001B\x0001\x0000\x0001\x002D\x0002\x0000\x0003\x002D\x0001\x001B" + "\x0004\x0000\x0002\x001C\x0001\x0000\x0001\x002F\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x001C\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x0030\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x001D\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x002F" + "\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001E\x0004\x0000\x0002\x001F\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001F\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0009\x0002\x000D\x0001\x000E\x0001\x0020" + "\x0004\x0000\x0001\x001F\x0001\x0020\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A" + "\x0001\x000B\x0001\x0021\x0004\x0000\x0002\x0022\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0022\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0001\x0023\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0024\x0006\x0000\x0001\x000F\x0006\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015" + "\x0001\x0000\x0001\x0031\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000" + "\x0002\x0017\x0001\x0000\x0001\x002E\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0028\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0014\x0004\x0000\x0002\x0018\x0007\x0000\x0001\x0018\x0004\x0000" + "\x0002\x001C\x0007\x0000\x0001\x001C\x0004\x0000\x0002\x001F\x0007\x0000\x0001\x001F\x0004\x0000" + "\x0002\x0022\x0007\x0000\x0001\x0022\x0004\x0000\x0002\x0032\x0007\x0000\x0001\x0032\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0033\x0004\x0000\x0002\x0032\x0001\x0000\x0001\x002E\x0002\x0000" + "\x0001\x002E\x0002\x0000\x0001\x0032\x0004\x0000\x0002\x0014\x0001\x0000\x0001\x0031\x0001\x0000" +
+ "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014\x0003\x0000";
+
+ private static int[] ZzUnpackTrans()
+ {
+ int[] result = new int[658];
+ int offset = 0;
+ offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackTrans(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int count = packed[i++];
+ int value_Renamed = packed[i++];
+ value_Renamed--;
+ do
+ result[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /* error codes */
+ private const int ZZ_UNKNOWN_ERROR = 0;
+ private const int ZZ_NO_MATCH = 1;
+ private const int ZZ_PUSHBACK_2BIG = 2;
+
+ /* error messages for the codes above */
+ private static readonly System.String[] ZZ_ERROR_MSG = new System.String[]{"Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large"};
+
+ /// <summary> ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c></summary>
+ private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
+
+ private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0003\x0001\x0001\x0009\x0001\x0001\x000B\x0000\x0004\x0001\x0002\x0000" + "\x0001\x0001\x0001\x0000\x000F\x0001\x0001\x0000\x0001\x0001\x0003\x0000\x0005\x0001";
+
+ private static int[] ZzUnpackAttribute()
+ {
+ int[] result = new int[51];
+ int offset = 0;
+ offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackAttribute(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int count = packed[i++];
+ int value_Renamed = packed[i++];
+ do
+ result[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+ /// <summary>the input device </summary>
+ private System.IO.TextReader zzReader;
+
+ /// <summary>the current state of the DFA </summary>
+ private int zzState;
+
+ /// <summary>the current lexical state </summary>
+ private int zzLexicalState = YYINITIAL;
+
+ /// <summary>this buffer contains the current text to be matched and is
+ /// the source of the yytext() string
+ /// </summary>
+ private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
+
+ /// <summary>the textposition at the last accepting state </summary>
+ private int zzMarkedPos;
+
+ /// <summary>the textposition at the last state to be included in yytext </summary>
+ private int zzPushbackPos;
+
+ /// <summary>the current text position in the buffer </summary>
+ private int zzCurrentPos;
+
+ /// <summary>startRead marks the beginning of the yytext() string in the buffer </summary>
+ private int zzStartRead;
+
+ /// <summary>endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
+ private int zzEndRead;
+
+ /// <summary>number of newlines encountered up to the start of the matched text </summary>
+ private int yyline;
+
+ /// <summary>the number of characters up to the start of the matched text </summary>
+ private int yychar;
+
+ /// <summary> the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
+ private int yycolumn;
+
+ /// <summary> zzAtBOL == true &lt;=&gt; the scanner is currently at the beginning of a line</summary>
+ private bool zzAtBOL = true;
+
+ /// <summary>zzAtEOF == true &lt;=&gt; the scanner is at the EOF </summary>
+ private bool zzAtEOF;
+
+ /* user code: */
+
+ public static readonly int ALPHANUM;
+ public static readonly int APOSTROPHE;
+ public static readonly int ACRONYM;
+ public static readonly int COMPANY;
+ public static readonly int EMAIL;
+ public static readonly int HOST;
+ public static readonly int NUM;
+ public static readonly int CJ;
+ /// <deprecated> this solves a bug where HOSTs that end with '.' are identified
+ /// as ACRONYMs.
+ /// </deprecated>
+ [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs")]
+ public static readonly int ACRONYM_DEP;
+
+ public static readonly System.String[] TOKEN_TYPES;
+
+ public int Yychar()
+ {
+ return yychar;
+ }
+
+ /*
+ * Resets the Tokenizer to a new Reader.
+ */
+ internal void Reset(System.IO.TextReader r)
+ {
+ // reset to default buffer size, if buffer has grown
+ if (zzBuffer.Length > ZZ_BUFFERSIZE)
+ {
+ zzBuffer = new char[ZZ_BUFFERSIZE];
+ }
+ Yyreset(r);
+ }
+
+ /// <summary> Fills Lucene token with the current token text.</summary>
+ internal void GetText(Token t)
+ {
+ t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+ }
+
+ /// <summary> Fills TermAttribute with the current token text.</summary>
+ internal void GetText(ITermAttribute t)
+ {
+ t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+ }
+
+
+ /// <summary> Creates a new scanner
+ /// There is also a java.io.InputStream version of this constructor.
+ ///
+ /// </summary>
+ /// <param name="in_Renamed"> the java.io.Reader to read input from.
+ /// </param>
+ internal StandardTokenizerImpl(System.IO.TextReader in_Renamed)
+ {
+ this.zzReader = in_Renamed;
+ }
+
+ /// <summary> Creates a new scanner.
+ /// There is also java.io.Reader version of this constructor.
+ ///
+ /// </summary>
+ /// <param name="in_Renamed"> the java.io.Inputstream to read input from.
+ /// </param>
+ internal StandardTokenizerImpl(System.IO.Stream in_Renamed):this(new System.IO.StreamReader(in_Renamed, System.Text.Encoding.Default))
+ {
+ }
+
+ /// <summary> Unpacks the compressed character translation table.
+ ///
+ /// </summary>
+ /// <param name="packed"> the packed character translation table
+ /// </param>
+ /// <returns> the unpacked character translation table
+ /// </returns>
+ private static char[] ZzUnpackCMap(System.String packed)
+ {
+ char[] map = new char[0x10000];
+ int i = 0; /* index in packed string */
+ int j = 0; /* index in unpacked array */
+ while (i < 1154)
+ {
+ int count = packed[i++];
+ char value_Renamed = packed[i++];
+ do
+ map[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return map;
+ }
+
+
+ /// <summary> Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.
+ ///
+ /// </returns>
+ /// <exception cref="System.IO.IOException"> if any I/O-Error occurs
+ /// </exception>
+ private bool ZzRefill()
+ {
+
+ /* first: make room (if you can) */
+ if (zzStartRead > 0)
+ {
+ Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
+
+ /* translate stored positions */
+ zzEndRead -= zzStartRead;
+ zzCurrentPos -= zzStartRead;
+ zzMarkedPos -= zzStartRead;
+ zzPushbackPos -= zzStartRead;
+ zzStartRead = 0;
+ }
+
+ /* is the buffer big enough? */
+ if (zzCurrentPos >= zzBuffer.Length)
+ {
+ /* if not: blow it up */
+ char[] newBuffer = new char[zzCurrentPos * 2];
+ Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length);
+ zzBuffer = newBuffer;
+ }
+
+ /* finally: fill the buffer with new input */
+ int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+
+ if (numRead <= 0)
+ {
+ return true;
+ }
+ else
+ {
+ zzEndRead += numRead;
+ return false;
+ }
+ }
+
+
+ /// <summary> Closes the input stream.</summary>
+ public void Yyclose()
+ {
+ zzAtEOF = true; /* indicate end of file */
+ zzEndRead = zzStartRead; /* invalidate buffer */
+
+ if (zzReader != null)
+ zzReader.Close();
+ }
+
+
+ /// <summary> Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ ///
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+ ///
+ /// </summary>
+ /// <param name="reader"> the new input stream
+ /// </param>
+ public void Yyreset(System.IO.TextReader reader)
+ {
+ zzReader = reader;
+ zzAtBOL = true;
+ zzAtEOF = false;
+ zzEndRead = zzStartRead = 0;
+ zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
+ yyline = yychar = yycolumn = 0;
+ zzLexicalState = YYINITIAL;
+ }
+
+
+ /// <summary> Returns the current lexical state.</summary>
+ public int Yystate()
+ {
+ return zzLexicalState;
+ }
+
+
+ /// <summary> Enters a new lexical state
+ ///
+ /// </summary>
+ /// <param name="newState">the new lexical state
+ /// </param>
+ public void Yybegin(int newState)
+ {
+ zzLexicalState = newState;
+ }
+
+
+ /// <summary> Returns the text matched by the current regular expression.</summary>
+ public System.String Yytext()
+ {
+ return new System.String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+ }
+
+
+ /// <summary> Returns the character at position <tt>pos</tt> from the
+ /// matched text.
+ ///
+ /// It is equivalent to yytext().charAt(pos), but faster
+ ///
+ /// </summary>
+ /// <param name="pos">the position of the character to fetch.
+ /// A value from 0 to yylength()-1.
+ ///
+ /// </param>
+ /// <returns> the character at position pos
+ /// </returns>
+ public char Yycharat(int pos)
+ {
+ return zzBuffer[zzStartRead + pos];
+ }
+
+
+ /// <summary> Returns the length of the matched text region.</summary>
+ public int Yylength()
+ {
+ return zzMarkedPos - zzStartRead;
+ }
+
+
+ /// <summary> Reports an error that occured while scanning.
+ ///
+ /// In a wellformed scanner (no or only correct usage of
+ /// yypushback(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ ///
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ ///
+ /// </summary>
+ /// <param name="errorCode"> the code of the errormessage to display
+ /// </param>
+ private void ZzScanError(int errorCode)
+ {
+ System.String message;
+ try
+ {
+ message = ZZ_ERROR_MSG[errorCode];
+ }
+ catch (System.IndexOutOfRangeException)
+ {
+ message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+ }
+
+ throw new System.ApplicationException(message);
+ }
+
+
+ /// <summary> Pushes the specified amount of characters back into the input stream.
+ ///
+ /// They will be read again by then next call of the scanning method
+ ///
+ /// </summary>
+ /// <param name="number"> the number of characters to be read again.
+ /// This number must not be greater than yylength()!
+ /// </param>
+ public virtual void Yypushback(int number)
+ {
+ if (number > Yylength())
+ ZzScanError(ZZ_PUSHBACK_2BIG);
+
+ zzMarkedPos -= number;
+ }
+
+
+ /// <summary> Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ ///
+ /// </summary>
+ /// <returns> the next token
+ /// </returns>
+ /// <exception cref="System.IO.IOException"> if any I/O-Error occurs
+ /// </exception>
+ public virtual int GetNextToken()
+ {
+ int zzInput;
+ int zzAction;
+
+ // cached fields:
+ int zzCurrentPosL;
+ int zzMarkedPosL;
+ int zzEndReadL = zzEndRead;
+ char[] zzBufferL = zzBuffer;
+ char[] zzCMapL = ZZ_CMAP;
+
+ int[] zzTransL = ZZ_TRANS;
+ int[] zzRowMapL = ZZ_ROWMAP;
+ int[] zzAttrL = ZZ_ATTRIBUTE;
+
+ while (true)
+ {
+ zzMarkedPosL = zzMarkedPos;
+
+ yychar += zzMarkedPosL - zzStartRead;
+
+ zzAction = - 1;
+
+ zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+
+ zzState = zzLexicalState;
+
+
+ {
+ while (true)
+ {
+
+ if (zzCurrentPosL < zzEndReadL)
+ zzInput = zzBufferL[zzCurrentPosL++];
+ else if (zzAtEOF)
+ {
+ zzInput = YYEOF;
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ else
+ {
+ // store back cached positions
+ zzCurrentPos = zzCurrentPosL;
+ zzMarkedPos = zzMarkedPosL;
+ bool eof = ZzRefill();
+ // get translated positions and possibly new buffer
+ zzCurrentPosL = zzCurrentPos;
+ zzMarkedPosL = zzMarkedPos;
+ zzBufferL = zzBuffer;
+ zzEndReadL = zzEndRead;
+ if (eof)
+ {
+ zzInput = YYEOF;
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ else
+ {
+ zzInput = zzBufferL[zzCurrentPosL++];
+ }
+ }
+ int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
+ if (zzNext == - 1)
+ {
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ zzState = zzNext;
+
+ int zzAttributes = zzAttrL[zzState];
+ if ((zzAttributes & 1) == 1)
+ {
+ zzAction = zzState;
+ zzMarkedPosL = zzCurrentPosL;
+ if ((zzAttributes & 8) == 8)
+ {
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ }
+ }
+ }
+
+zzForAction_brk: ; // {{Aroush-2.9}} this 'lable' maybe in the wrong place
+
+
+ // store back cached position
+ zzMarkedPos = zzMarkedPosL;
+
+ switch (zzAction < 0?zzAction:ZZ_ACTION[zzAction])
+ {
+
+ case 4:
+ {
+ return HOST;
+ }
+
+ case 11: break;
+
+ case 9:
+ {
+ return ACRONYM;
+ }
+
+ case 12: break;
+
+ case 8:
+ {
+ return ACRONYM_DEP;
+ }
+
+ case 13: break;
+
+ case 1:
+ {
+ /* ignore */
+ }
+ goto case 14;
+
+ case 14: break;
+
+ case 5:
+ {
+ return NUM;
+ }
+
+ case 15: break;
+
+ case 3:
+ {
+ return CJ;
+ }
+
+ case 16: break;
+
+ case 2:
+ {
+ return ALPHANUM;
+ }
+
+ case 17: break;
+
+ case 7:
+ {
+ return COMPANY;
+ }
+
+ case 18: break;
+
+ case 6:
+ {
+ return APOSTROPHE;
+ }
+
+ case 19: break;
+
+ case 10:
+ {
+ return EMAIL;
+ }
+
+ case 20: break;
+
+ default:
+ if (zzInput == YYEOF && zzStartRead == zzCurrentPos)
+ {
+ zzAtEOF = true;
+ return YYEOF;
+ }
+ else
+ {
+ ZzScanError(ZZ_NO_MATCH);
+ }
+ break;
+
+ }
+ }
+ }
+ static StandardTokenizerImpl()
+ {
+ ALPHANUM = StandardTokenizer.ALPHANUM;
+ APOSTROPHE = StandardTokenizer.APOSTROPHE;
+ ACRONYM = StandardTokenizer.ACRONYM;
+ COMPANY = StandardTokenizer.COMPANY;
+ EMAIL = StandardTokenizer.EMAIL;
+ HOST = StandardTokenizer.HOST;
+ NUM = StandardTokenizer.NUM;
+ CJ = StandardTokenizer.CJ;
+ ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
+ TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/StopAnalyzer.cs b/src/core/Analysis/StopAnalyzer.cs
new file mode 100644
index 0000000..aabe197
--- /dev/null
+++ b/src/core/Analysis/StopAnalyzer.cs
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Filters <see cref="LetterTokenizer" /> with <see cref="LowerCaseFilter" /> and
+ /// <see cref="StopFilter" />.
+ ///
+ /// <a name="version"/>
+ /// <p/>
+ /// You must specify the required <see cref="Version" /> compatibility when creating
+ /// StopAnalyzer:
+ /// <list type="bullet">
+ /// <item>As of 2.9, position increments are preserved</item>
+ /// </list>
+ /// </summary>
+
+ public sealed class StopAnalyzer:Analyzer
+ {
+ private readonly ISet<string> stopWords;
+ private readonly bool enablePositionIncrements;
+
+ /// <summary>An unmodifiable set containing some common English words that are not usually useful
+ /// for searching.
+ /// </summary>
+ public static ISet<string> ENGLISH_STOP_WORDS_SET;
+
+ /// <summary> Builds an analyzer which removes words in ENGLISH_STOP_WORDS.</summary>
+ public StopAnalyzer(Version matchVersion)
+ {
+ stopWords = ENGLISH_STOP_WORDS_SET;
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// <summary>Builds an analyzer with the stop words from the given set.</summary>
+ public StopAnalyzer(Version matchVersion, ISet<string> stopWords)
+ {
+ this.stopWords = stopWords;
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// <summary> Builds an analyzer with the stop words from the given file.
+ ///
+ /// </summary>
+ /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
+ /// </seealso>
+ /// <param name="matchVersion">See <a href="#version">above</a>
+ /// </param>
+ /// <param name="stopwordsFile">File to load stop words from
+ /// </param>
+ public StopAnalyzer(Version matchVersion, System.IO.FileInfo stopwordsFile)
+ {
+ stopWords = WordlistLoader.GetWordSet(stopwordsFile);
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// <summary>Builds an analyzer with the stop words from the given reader. </summary>
+ /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
+ /// </seealso>
+ /// <param name="matchVersion">See <a href="#Version">above</a>
+ /// </param>
+ /// <param name="stopwords">Reader to load stop words from
+ /// </param>
+ public StopAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
+ {
+ stopWords = WordlistLoader.GetWordSet(stopwords);
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
+ }
+
+ /// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
+ private class SavedStreams
+ {
+ public SavedStreams(StopAnalyzer enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(StopAnalyzer enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private StopAnalyzer enclosingInstance;
+ public StopAnalyzer Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal Tokenizer source;
+ internal TokenStream result;
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var streams = (SavedStreams) PreviousTokenStream;
+ if (streams == null)
+ {
+ streams = new SavedStreams(this) {source = new LowerCaseTokenizer(reader)};
+ streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
+ PreviousTokenStream = streams;
+ }
+ else
+ streams.source.Reset(reader);
+ return streams.result;
+ }
+ static StopAnalyzer()
+ {
+ {
+ var stopWords = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
+ var stopSet = new CharArraySet(stopWords.Length, false);
+ stopSet.AddAll(stopWords);
+ ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/StopFilter.cs b/src/core/Analysis/StopFilter.cs
new file mode 100644
index 0000000..81b7dd0
--- /dev/null
+++ b/src/core/Analysis/StopFilter.cs
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using QueryParser = Lucene.Net.QueryParsers.QueryParser;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Removes stop words from a token stream.</summary>
+
+ public sealed class StopFilter:TokenFilter
+ {
+ private readonly CharArraySet stopWords;
+ private bool enablePositionIncrements = false;
+
+ private readonly ITermAttribute termAtt;
+ private readonly IPositionIncrementAttribute posIncrAtt;
+
+ /// <summary> Construct a token stream filtering the given input.
+ /// If <c>stopWords</c> is an instance of <see cref="CharArraySet" /> (true if
+ /// <c>makeStopSet()</c> was used to construct the set) it will be directly used
+ /// and <c>ignoreCase</c> will be ignored since <c>CharArraySet</c>
+ /// directly controls case sensitivity.
+ /// <p/>
+ /// If <c>stopWords</c> is not an instance of <see cref="CharArraySet" />,
+ /// a new CharArraySet will be constructed and <c>ignoreCase</c> will be
+ /// used to specify the case sensitivity of that set.
+ /// </summary>
+ /// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param>
+ /// <param name="input">Input TokenStream</param>
+ /// <param name="stopWords">A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords</param>
+ /// <param name="ignoreCase">if true, all words are lower cased first</param>
+ public StopFilter(bool enablePositionIncrements, TokenStream input, ISet<string> stopWords, bool ignoreCase)
+ : base(input)
+ {
+ if (stopWords is CharArraySet)
+ {
+ this.stopWords = (CharArraySet) stopWords;
+ }
+ else
+ {
+ this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
+ this.stopWords.AddAll(stopWords);
+ }
+ this.enablePositionIncrements = enablePositionIncrements;
+ termAtt = AddAttribute<ITermAttribute>();
+ posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ }
+
+ /// <summary> Constructs a filter which removes words from the input
+ /// TokenStream that are named in the Set.
+ /// </summary>
+ /// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param>
+ /// <param name="in">Input stream</param>
+ /// <param name="stopWords">A Set of strings or char[] or any other ToString()-able set representing the stopwords</param>
+ /// <seealso cref="MakeStopSet(String[])"/>
+ public StopFilter(bool enablePositionIncrements, TokenStream @in, ISet<string> stopWords)
+ : this(enablePositionIncrements, @in, stopWords, false)
+ { }
+
+ /// <summary> Builds a Set from an array of stop words,
+ /// appropriate for passing into the StopFilter constructor.
+ /// This permits this stopWords construction to be cached once when
+ /// an Analyzer is constructed.
+ ///
+ /// </summary>
+ /// <seealso cref="MakeStopSet(String[], bool)">passing false to ignoreCase</seealso>
+ public static ISet<string> MakeStopSet(params string[] stopWords)
+ {
+ return MakeStopSet(stopWords, false);
+ }
+
+ /// <summary> Builds a Set from an array of stop words,
+ /// appropriate for passing into the StopFilter constructor.
+ /// This permits this stopWords construction to be cached once when
+ /// an Analyzer is constructed.
+ /// </summary>
+ /// <param name="stopWords">A list of strings or char[] or any other ToString()-able list representing the stop words</param>
+ /// <seealso cref="MakeStopSet(String[], bool)">passing false to ignoreCase</seealso>
+ public static ISet<string> MakeStopSet(IList<object> stopWords)
+ {
+ return MakeStopSet(stopWords, false);
+ }
+
+ /// <summary></summary>
+ /// <param name="stopWords">An array of stopwords</param>
+ /// <param name="ignoreCase">If true, all words are lower cased first.</param>
+ /// <returns> a Set containing the words</returns>
+ public static ISet<string> MakeStopSet(string[] stopWords, bool ignoreCase)
+ {
+ var stopSet = new CharArraySet(stopWords.Length, ignoreCase);
+ stopSet.AddAll(stopWords);
+ return stopSet;
+ }
+
+ /// <summary> </summary>
+ /// <param name="stopWords">A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+ /// <param name="ignoreCase">if true, all words are lower cased first</param>
+ /// <returns>A Set (<see cref="CharArraySet"/>)containing the words</returns>
+ public static ISet<string> MakeStopSet(IList<object> stopWords, bool ignoreCase)
+ {
+ var stopSet = new CharArraySet(stopWords.Count, ignoreCase);
+ foreach(var word in stopWords)
+ stopSet.Add(word.ToString());
+ return stopSet;
+ }
+
+ /// <summary> Returns the next input Token whose term() is not a stop word.</summary>
+ public override bool IncrementToken()
+ {
+ // return the first non-stop word found
+ int skippedPositions = 0;
+ while (input.IncrementToken())
+ {
+ if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength()))
+ {
+ if (enablePositionIncrements)
+ {
+ posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+ }
+ return true;
+ }
+ skippedPositions += posIncrAtt.PositionIncrement;
+ }
+ // reached EOS -- return false
+ return false;
+ }
+
+ /// <summary> Returns version-dependent default for enablePositionIncrements. Analyzers
+ /// that embed StopFilter use this method when creating the StopFilter. Prior
+ /// to 2.9, this returns false. On 2.9 or later, it returns true.
+ /// </summary>
+ public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
+ {
+ return matchVersion.OnOrAfter(Version.LUCENE_29);
+ }
+
+ /// <summary> If <c>true</c>, this StopFilter will preserve
+ /// positions of the incoming tokens (ie, accumulate and
+ /// set position increments of the removed stop tokens).
+ /// Generally, <c>true</c> is best as it does not
+ /// lose information (positions of the original tokens)
+ /// during indexing.
+ ///
+ /// <p/> When set, when a token is stopped
+ /// (omitted), the position increment of the following
+ /// token is incremented.
+ ///
+ /// <p/> <b>NOTE</b>: be sure to also
+ /// set <see cref="QueryParser.EnablePositionIncrements" /> if
+ /// you use QueryParser to create queries.
+ /// </summary>
+ public bool EnablePositionIncrements
+ {
+ get { return enablePositionIncrements; }
+ set { enablePositionIncrements = value; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/TeeSinkTokenFilter.cs b/src/core/Analysis/TeeSinkTokenFilter.cs
new file mode 100644
index 0000000..bec605e
--- /dev/null
+++ b/src/core/Analysis/TeeSinkTokenFilter.cs
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Attribute = Lucene.Net.Util.Attribute;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> This TokenFilter provides the ability to set aside attribute states
+ /// that have already been analyzed. This is useful in situations where multiple fields share
+ /// many common analysis steps and then go their separate ways.
+ /// <p/>
+ /// It is also useful for doing things like entity extraction or proper noun analysis as
+ /// part of the analysis workflow and saving off those tokens for use in another field.
+ ///
+ /// <code>
+ /// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+ /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+ /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ /// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+ /// source2.addSinkTokenStream(sink1);
+ /// source2.addSinkTokenStream(sink2);
+ /// TokenStream final1 = new LowerCaseFilter(source1);
+ /// TokenStream final2 = source2;
+ /// TokenStream final3 = new EntityDetect(sink1);
+ /// TokenStream final4 = new URLDetect(sink2);
+ /// d.add(new Field("f1", final1));
+ /// d.add(new Field("f2", final2));
+ /// d.add(new Field("f3", final3));
+ /// d.add(new Field("f4", final4));
+ /// </code>
+ /// In this example, <c>sink1</c> and <c>sink2</c> will both get tokens from both
+ /// <c>reader1</c> and <c>reader2</c> after whitespace tokenizer
+ /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+ /// It is important, that tees are consumed before sinks (in the above example, the field names must be
+ /// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+ /// add another sink and then pass all tokens to the sinks at once using <see cref="ConsumeAllTokens" />.
+ /// This TokenFilter is exhausted after this. In the above example, change
+ /// the example above to:
+ /// <code>
+ /// ...
+ /// TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+ /// TokenStream final2 = source2.newSinkTokenStream();
+ /// sink1.consumeAllTokens();
+ /// sink2.consumeAllTokens();
+ /// ...
+ /// </code>
+ /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+ /// <p/>Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+ /// </summary>
+ public sealed class TeeSinkTokenFilter:TokenFilter
+ {
+ public class AnonymousClassSinkFilter:SinkFilter
+ {
+ public override bool Accept(AttributeSource source)
+ {
+ return true;
+ }
+ }
+ private readonly LinkedList<WeakReference> sinks = new LinkedList<WeakReference>();
+
+ /// <summary> Instantiates a new TeeSinkTokenFilter.</summary>
+ public TeeSinkTokenFilter(TokenStream input):base(input)
+ {
+ }
+
+ /// <summary> Returns a new <see cref="SinkTokenStream" /> that receives all tokens consumed by this stream.</summary>
+ public SinkTokenStream NewSinkTokenStream()
+ {
+ return NewSinkTokenStream(ACCEPT_ALL_FILTER);
+ }
+
+ /// <summary> Returns a new <see cref="SinkTokenStream" /> that receives all tokens consumed by this stream
+ /// that pass the supplied filter.
+ /// </summary>
+ /// <seealso cref="SinkFilter">
+ /// </seealso>
+ public SinkTokenStream NewSinkTokenStream(SinkFilter filter)
+ {
+ var sink = new SinkTokenStream(this.CloneAttributes(), filter);
+ sinks.AddLast(new WeakReference(sink));
+ return sink;
+ }
+
+ /// <summary> Adds a <see cref="SinkTokenStream" /> created by another <c>TeeSinkTokenFilter</c>
+ /// to this one. The supplied stream will also receive all consumed tokens.
+ /// This method can be used to pass tokens from two different tees to one sink.
+ /// </summary>
+ public void AddSinkTokenStream(SinkTokenStream sink)
+ {
+ // check that sink has correct factory
+ if (!this.Factory.Equals(sink.Factory))
+ {
+ throw new System.ArgumentException("The supplied sink is not compatible to this tee");
+ }
+ // add eventually missing attribute impls to the existing sink
+ foreach (var impl in this.CloneAttributes().GetAttributeImplsIterator())
+ {
+ sink.AddAttributeImpl(impl);
+ }
+ sinks.AddLast(new WeakReference(sink));
+ }
+
+ /// <summary> <c>TeeSinkTokenFilter</c> passes all tokens to the added sinks
+ /// when itself is consumed. To be sure, that all tokens from the input
+ /// stream are passed to the sinks, you can call this methods.
+ /// This instance is exhausted after this, but all sinks are instant available.
+ /// </summary>
+ public void ConsumeAllTokens()
+ {
+ while (IncrementToken())
+ {
+ }
+ }
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ // capture state lazily - maybe no SinkFilter accepts this state
+ State state = null;
+ foreach(WeakReference wr in sinks)
+ {
+ var sink = (SinkTokenStream)wr.Target;
+ if (sink != null)
+ {
+ if (sink.Accept(this))
+ {
+ if (state == null)
+ {
+ state = this.CaptureState();
+ }
+ sink.AddState(state);
+ }
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ public override void End()
+ {
+ base.End();
+ State finalState = CaptureState();
+ foreach(WeakReference wr in sinks)
+ {
+ var sink = (SinkTokenStream)wr.Target;
+ if (sink != null)
+ {
+ sink.SetFinalState(finalState);
+ }
+ }
+ }
+
+ /// <summary> A filter that decides which <see cref="AttributeSource" /> states to store in the sink.</summary>
+ public abstract class SinkFilter
+ {
+ /// <summary> Returns true, iff the current state of the passed-in <see cref="AttributeSource" /> shall be stored
+ /// in the sink.
+ /// </summary>
+ public abstract bool Accept(AttributeSource source);
+
+ /// <summary> Called by <see cref="SinkTokenStream.Reset()" />. This method does nothing by default
+ /// and can optionally be overridden.
+ /// </summary>
+ public virtual void Reset()
+ {
+ // nothing to do; can be overridden
+ }
+ }
+
+ public sealed class SinkTokenStream : TokenStream
+ {
+ private readonly LinkedList<State> cachedStates = new LinkedList<State>();
+ private State finalState;
+ private IEnumerator<AttributeSource.State> it = null;
+ private readonly SinkFilter filter;
+
+ internal SinkTokenStream(AttributeSource source, SinkFilter filter)
+ : base(source)
+ {
+ this.filter = filter;
+ }
+
+ internal /*private*/ bool Accept(AttributeSource source)
+ {
+ return filter.Accept(source);
+ }
+
+ internal /*private*/ void AddState(AttributeSource.State state)
+ {
+ if (it != null)
+ {
+ throw new System.SystemException("The tee must be consumed before sinks are consumed.");
+ }
+ cachedStates.AddLast(state);
+ }
+
+ internal /*private*/ void SetFinalState(AttributeSource.State finalState)
+ {
+ this.finalState = finalState;
+ }
+
+ public override bool IncrementToken()
+ {
+ // lazy init the iterator
+ if (it == null)
+ {
+ it = cachedStates.GetEnumerator();
+ }
+
+ if (!it.MoveNext())
+ {
+ return false;
+ }
+
+ State state = it.Current;
+ RestoreState(state);
+ return true;
+ }
+
+ public override void End()
+ {
+ if (finalState != null)
+ {
+ RestoreState(finalState);
+ }
+ }
+
+ public override void Reset()
+ {
+ it = cachedStates.GetEnumerator();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+ }
+
+ private static readonly SinkFilter ACCEPT_ALL_FILTER;
+ static TeeSinkTokenFilter()
+ {
+ ACCEPT_ALL_FILTER = new AnonymousClassSinkFilter();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Token.cs b/src/core/Analysis/Token.cs
new file mode 100644
index 0000000..3357f34
--- /dev/null
+++ b/src/core/Analysis/Token.cs
@@ -0,0 +1,852 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Payload = Lucene.Net.Index.Payload;
+using TermPositions = Lucene.Net.Index.TermPositions;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>A Token is an occurrence of a term from the text of a field. It consists of
+ /// a term's text, the start and end offset of the term in the text of the field,
+ /// and a type string.
+ /// <p/>
+ /// The start and end offsets permit applications to re-associate a token with
+ /// its source text, e.g., to display highlighted query terms in a document
+ /// browser, or to show matching text fragments in a <abbr
+ /// title="KeyWord In Context">KWIC</abbr> display, etc.
+ /// <p/>
+ /// The type is a string, assigned by a lexical analyzer
+ /// (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+ /// belongs to. For example an end of sentence marker token might be implemented
+ /// with type "eos". The default token type is "word".
+ /// <p/>
+ /// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
+ /// length byte array. Use <see cref="TermPositions.PayloadLength" /> and
+ /// <see cref="TermPositions.GetPayload(byte[], int)" /> to retrieve the payloads from the index.
+ /// </summary>
+ /// <summary><br/><br/>
+ /// </summary>
+ /// <summary><p/><b>NOTE:</b> As of 2.9, Token implements all <see cref="IAttribute" /> interfaces
+ /// that are part of core Lucene and can be found in the <see cref="Lucene.Net.Analysis.Tokenattributes"/> namespace.
+ /// Even though it is not necessary to use Token anymore, with the new TokenStream API it can
+ /// be used as convenience class that implements all <see cref="IAttribute" />s, which is especially useful
+ /// to easily switch from the old to the new TokenStream API.
+ /// <br/><br/>
+ /// <p/>Tokenizers and TokenFilters should try to re-use a Token instance when
+ /// possible for best performance, by implementing the
+ /// <see cref="TokenStream.IncrementToken()" /> API.
+ /// Failing that, to create a new Token you should first use
+ /// one of the constructors that starts with null text. To load
+ /// the token from a char[] use <see cref="SetTermBuffer(char[], int, int)" />.
+ /// To load from a String use <see cref="SetTermBuffer(String)" /> or <see cref="SetTermBuffer(String, int, int)" />.
+ /// Alternatively you can get the Token's termBuffer by calling either <see cref="TermBuffer()" />,
+ /// if you know that your text is shorter than the capacity of the termBuffer
+ /// or <see cref="ResizeTermBuffer(int)" />, if there is any possibility
+ /// that you may need to grow the buffer. Fill in the characters of your term into this
+ /// buffer, with <see cref="string.ToCharArray(int, int)" /> if loading from a string,
+ /// or with <see cref="Array.Copy(Array, long, Array, long, long)" />, and finally call <see cref="SetTermLength(int)" /> to
+ /// set the length of the term text. See <a target="_top"
+ /// href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
+ /// for details.<p/>
+ /// <p/>Typical Token reuse patterns:
+ /// <list type="bullet">
+ /// <item> Copying text from a string (type is reset to <see cref="DEFAULT_TYPE" /> if not
+ /// specified):<br/>
+ /// <code>
+ /// return reusableToken.reinit(string, startOffset, endOffset[, type]);
+ /// </code>
+ /// </item>
+ /// <item> Copying some text from a string (type is reset to <see cref="DEFAULT_TYPE" />
+ /// if not specified):<br/>
+ /// <code>
+ /// return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+ /// </code>
+ /// </item>
+ /// <item> Copying text from char[] buffer (type is reset to <see cref="DEFAULT_TYPE" />
+ /// if not specified):<br/>
+ /// <code>
+ /// return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+ /// </code>
+ /// </item>
+ /// <item> Copying some text from a char[] buffer (type is reset to
+ /// <see cref="DEFAULT_TYPE" /> if not specified):<br/>
+ /// <code>
+ /// return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+ /// </code>
+ /// </item>
+ /// <item> Copying from one one Token to another (type is reset to
+ /// <see cref="DEFAULT_TYPE" /> if not specified):<br/>
+ /// <code>
+ /// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+ /// </code>
+ /// </item>
+ /// </list>
+ /// A few things to note:
+ /// <list type="bullet">
+ /// <item>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</item>
+ /// <item>Because <c>TokenStreams</c> can be chained, one cannot assume that the <c>Token's</c> current type is correct.</item>
+ /// <item>The startOffset and endOffset represent the start and offset in the
+ /// source text, so be careful in adjusting them.</item>
+ /// <item>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</item>
+ /// </list>
+ /// <p/>
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.Payload">
+ /// </seealso>
+ [Serializable]
+ public class Token : Attribute, ITermAttribute, ITypeAttribute, IPositionIncrementAttribute, IFlagsAttribute, IOffsetAttribute, IPayloadAttribute
+ {
+ public const String DEFAULT_TYPE = "word";
+
+ private const int MIN_BUFFER_SIZE = 10;
+
+ private char[] termBuffer;
+ private int termLength;
+ private int startOffset, endOffset;
+ private string type = DEFAULT_TYPE;
+ private int flags;
+ private Payload payload;
+ private int positionIncrement = 1;
+
+ /// <summary>Constructs a Token will null text. </summary>
+ public Token()
+ {
+ }
+
+ /// <summary>Constructs a Token with null text and start &amp; end
+ /// offsets.
+ /// </summary>
+ /// <param name="start">start offset in the source text</param>
+ /// <param name="end">end offset in the source text</param>
+ public Token(int start, int end)
+ {
+ startOffset = start;
+ endOffset = end;
+ }
+
+ /// <summary>Constructs a Token with null text and start &amp; end
+ /// offsets plus the Token type.
+ /// </summary>
+ /// <param name="start">start offset in the source text</param>
+ /// <param name="end">end offset in the source text</param>
+ /// <param name="typ">the lexical type of this Token</param>
+ public Token(int start, int end, String typ)
+ {
+ startOffset = start;
+ endOffset = end;
+ type = typ;
+ }
+
+ /// <summary> Constructs a Token with null text and start &amp; end
+ /// offsets plus flags. NOTE: flags is EXPERIMENTAL.
+ /// </summary>
+ /// <param name="start">start offset in the source text</param>
+ /// <param name="end">end offset in the source text</param>
+ /// <param name="flags">The bits to set for this token</param>
+ public Token(int start, int end, int flags)
+ {
+ startOffset = start;
+ endOffset = end;
+ this.flags = flags;
+ }
+
+ /// <summary>Constructs a Token with the given term text, and start
+ /// &amp; end offsets. The type defaults to "word."
+ /// <b>NOTE:</b> for better indexing speed you should
+ /// instead use the char[] termBuffer methods to set the
+ /// term text.
+ /// </summary>
+ /// <param name="text">term text</param>
+ /// <param name="start">start offset</param>
+ /// <param name="end">end offset</param>
+ public Token(String text, int start, int end)
+ {
+ SetTermBuffer(text);
+ startOffset = start;
+ endOffset = end;
+ }
+
+ /// <summary>Constructs a Token with the given text, start and end
+ /// offsets, &amp; type. <b>NOTE:</b> for better indexing
+ /// speed you should instead use the char[] termBuffer
+ /// methods to set the term text.
+ /// </summary>
+ /// <param name="text">term text</param>
+ /// <param name="start">start offset</param>
+ /// <param name="end">end offset</param>
+ /// <param name="typ">token type</param>
+ public Token(System.String text, int start, int end, System.String typ)
+ {
+ SetTermBuffer(text);
+ startOffset = start;
+ endOffset = end;
+ type = typ;
+ }
+
+ /// <summary> Constructs a Token with the given text, start and end
+ /// offsets, &amp; type. <b>NOTE:</b> for better indexing
+ /// speed you should instead use the char[] termBuffer
+ /// methods to set the term text.
+ /// </summary>
+ /// <param name="text"></param>
+ /// <param name="start"></param>
+ /// <param name="end"></param>
+ /// <param name="flags">token type bits</param>
+ public Token(System.String text, int start, int end, int flags)
+ {
+ SetTermBuffer(text);
+ startOffset = start;
+ endOffset = end;
+ this.flags = flags;
+ }
+
+ /// <summary> Constructs a Token with the given term buffer (offset
+ /// &amp; length), start and end
+ /// offsets
+ /// </summary>
+ /// <param name="startTermBuffer"></param>
+ /// <param name="termBufferOffset"></param>
+ /// <param name="termBufferLength"></param>
+ /// <param name="start"></param>
+ /// <param name="end"></param>
+ public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end)
+ {
+ SetTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+ startOffset = start;
+ endOffset = end;
+ }
+
+ /// <summary>Set the position increment. This determines the position of this token
+ /// relative to the previous Token in a <see cref="TokenStream" />, used in phrase
+ /// searching.
+ ///
+ /// <p/>The default value is one.
+ ///
+ /// <p/>Some common uses for this are:<list>
+ ///
+ /// <item>Set it to zero to put multiple terms in the same position. This is
+ /// useful if, e.g., a word has multiple stems. Searches for phrases
+ /// including either stem will match. In this case, all but the first stem's
+ /// increment should be set to zero: the increment of the first instance
+ /// should be one. Repeating a token with an increment of zero can also be
+ /// used to boost the scores of matches on that token.</item>
+ ///
+ /// <item>Set it to values greater than one to inhibit exact phrase matches.
+ /// If, for example, one does not want phrases to match across removed stop
+ /// words, then one could build a stop word filter that removes stop words and
+ /// also sets the increment to the number of stop words removed before each
+ /// non-stop word. Then exact phrase queries will only match when the terms
+ /// occur with no intervening stop words.</item>
+ ///
+ /// </list>
+ /// </summary>
+ /// <value> the distance from the prior term </value>
+ /// <seealso cref="Lucene.Net.Index.TermPositions">
+ /// </seealso>
+ public virtual int PositionIncrement
+ {
+ set
+ {
+ if (value < 0)
+ throw new System.ArgumentException("Increment must be zero or greater: " + value);
+ this.positionIncrement = value;
+ }
+ get { return positionIncrement; }
+ }
+
+ /// <summary>Returns the Token's term text.
+ ///
+ /// This method has a performance penalty
+ /// because the text is stored internally in a char[]. If
+ /// possible, use <see cref="TermBuffer()" /> and <see cref="TermLength()"/>
+ /// directly instead. If you really need a
+ /// String, use this method, which is nothing more than
+ /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+ /// </summary>
+ public string Term
+ {
+ get
+ {
+ InitTermBuffer();
+ return new System.String(termBuffer, 0, termLength);
+ }
+ }
+
+ /// <summary>Copies the contents of buffer, starting at offset for
+ /// length characters, into the termBuffer array.
+ /// </summary>
+ /// <param name="buffer">the buffer to copy</param>
+ /// <param name="offset">the index in the buffer of the first character to copy</param>
+ /// <param name="length">the number of characters to copy</param>
+ public void SetTermBuffer(char[] buffer, int offset, int length)
+ {
+ GrowTermBuffer(length);
+ Array.Copy(buffer, offset, termBuffer, 0, length);
+ termLength = length;
+ }
+
+ /// <summary>Copies the contents of buffer into the termBuffer array.</summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ public void SetTermBuffer(System.String buffer)
+ {
+ int length = buffer.Length;
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// <summary>Copies the contents of buffer, starting at offset and continuing
+ /// for length characters, into the termBuffer array.
+ /// </summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ /// <param name="offset">the index in the buffer of the first character to copy
+ /// </param>
+ /// <param name="length">the number of characters to copy
+ /// </param>
+ public void SetTermBuffer(System.String buffer, int offset, int length)
+ {
+ System.Diagnostics.Debug.Assert(offset <= buffer.Length);
+ System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// <summary>Returns the internal termBuffer character array which
+ /// you can then directly alter. If the array is too
+ /// small for your token, use <see cref="ResizeTermBuffer(int)" />
+ /// to increase it. After
+ /// altering the buffer be sure to call <see cref="SetTermLength" />
+ /// to record the number of valid
+ /// characters that were placed into the termBuffer.
+ /// </summary>
+ public char[] TermBuffer()
+ {
+ InitTermBuffer();
+ return termBuffer;
+ }
+
+ /// <summary>Grows the termBuffer to at least size newSize, preserving the
+ /// existing content. Note: If the next operation is to change
+ /// the contents of the term buffer use
+ /// <see cref="SetTermBuffer(char[], int, int)" />,
+ /// <see cref="SetTermBuffer(String)" />, or
+ /// <see cref="SetTermBuffer(String, int, int)" />
+ /// to optimally combine the resize with the setting of the termBuffer.
+ /// </summary>
+ /// <param name="newSize">minimum size of the new termBuffer
+ /// </param>
+ /// <returns> newly created termBuffer with length >= newSize
+ /// </returns>
+ public virtual char[] ResizeTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation and preserve content
+ var newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
+ termBuffer = newCharBuffer;
+ }
+ }
+ return termBuffer;
+ }
+
+ /// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content.
+ /// its always used in places that set the content
+ /// </summary>
+ /// <param name="newSize">minimum size of the buffer
+ /// </param>
+ private void GrowTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ // The buffer is always at least MIN_BUFFER_SIZE
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation:
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ }
+ }
+ }
+
+ private void InitTermBuffer()
+ {
+ if (termBuffer == null)
+ {
+ termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
+ termLength = 0;
+ }
+ }
+
+ /// <summary>Return number of valid characters (length of the term)
+ /// in the termBuffer array.
+ /// </summary>
+ public int TermLength()
+ {
+ InitTermBuffer();
+ return termLength;
+ }
+
+ /// <summary>Set number of valid characters (length of the term) in
+ /// the termBuffer array. Use this to truncate the termBuffer
+ /// or to synchronize with external manipulation of the termBuffer.
+ /// Note: to grow the size of the array,
+ /// use <see cref="ResizeTermBuffer(int)" /> first.
+ /// </summary>
+ /// <param name="length">the truncated length
+ /// </param>
+ public void SetTermLength(int length)
+ {
+ InitTermBuffer();
+ if (length > termBuffer.Length)
+ throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
+ termLength = length;
+ }
+
+ /// <summary>Gets or sets this Token's starting offset, the position of the first character
+ /// corresponding to this token in the source text.
+ /// Note that the difference between endOffset() and startOffset() may not be
+ /// equal to <see cref="TermLength"/>, as the term text may have been altered by a
+ /// stemmer or some other filter.
+ /// </summary>
+ public virtual int StartOffset
+ {
+ get { return startOffset; }
+ set { this.startOffset = value; }
+ }
+
+ /// <summary>Gets or sets this Token's ending offset, one greater than the position of the
+ /// last character corresponding to this token in the source text. The length
+ /// of the token in the source text is (endOffset - startOffset).
+ /// </summary>
+ public virtual int EndOffset
+ {
+ get { return endOffset; }
+ set { this.endOffset = value; }
+ }
+
+ /// <summary>Set the starting and ending offset.
+ /// See StartOffset() and EndOffset()
+ /// </summary>
+ public virtual void SetOffset(int startOffset, int endOffset)
+ {
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
+ }
+
+ /// <summary>Returns this Token's lexical type. Defaults to "word". </summary>
+ public string Type
+ {
+ get { return type; }
+ set { this.type = value; }
+ }
+
+ /// <summary> EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
+ /// <p/>
+ ///
+ /// Get the bitset for any bits that have been set. This is completely distinct from <see cref="Type()" />, although they do share similar purposes.
+ /// The flags can be used to encode information about the token for use by other <see cref="TokenFilter"/>s.
+ ///
+ ///
+ /// </summary>
+ /// <value> The bits </value>
+ public virtual int Flags
+ {
+ get { return flags; }
+ set { flags = value; }
+ }
+
+ /// <summary> Returns this Token's payload.</summary>
+ public virtual Payload Payload
+ {
+ get { return payload; }
+ set { payload = value; }
+ }
+
+ public override String ToString()
+ {
+ var sb = new System.Text.StringBuilder();
+ sb.Append('(');
+ InitTermBuffer();
+ if (termBuffer == null)
+ sb.Append("null");
+ else
+ sb.Append(termBuffer, 0, termLength);
+ sb.Append(',').Append(startOffset).Append(',').Append(endOffset);
+ if (!type.Equals("word"))
+ sb.Append(",type=").Append(type);
+ if (positionIncrement != 1)
+ sb.Append(",posIncr=").Append(positionIncrement);
+ sb.Append(')');
+ return sb.ToString();
+ }
+
+ /// <summary>Resets the term text, payload, flags, and positionIncrement,
+ /// startOffset, endOffset and token type to default.
+ /// </summary>
+ public override void Clear()
+ {
+ payload = null;
+ // Leave termBuffer to allow re-use
+ termLength = 0;
+ positionIncrement = 1;
+ flags = 0;
+ startOffset = endOffset = 0;
+ type = DEFAULT_TYPE;
+ }
+
+ public override System.Object Clone()
+ {
+ var t = (Token) base.Clone();
+ // Do a deep clone
+ if (termBuffer != null)
+ {
+ t.termBuffer = new char[termBuffer.Length];
+ termBuffer.CopyTo(t.termBuffer, 0);
+ }
+ if (payload != null)
+ {
+ t.payload = (Payload) payload.Clone();
+ }
+ return t;
+ }
+
+ /// <summary>Makes a clone, but replaces the term buffer &amp;
+ /// start/end offset in the process. This is more
+ /// efficient than doing a full clone (and then calling
+ /// setTermBuffer) because it saves a wasted copy of the old
+ /// termBuffer.
+ /// </summary>
+ public virtual Token Clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+ {
+ var t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)
+ {positionIncrement = positionIncrement, flags = flags, type = type};
+ if (payload != null)
+ t.payload = (Payload) payload.Clone();
+ return t;
+ }
+
+ public override bool Equals(Object obj)
+ {
+ if (obj == this)
+ return true;
+
+ var other = obj as Token;
+ if (other == null)
+ return false;
+
+ InitTermBuffer();
+ other.InitTermBuffer();
+
+ if (termLength == other.termLength && startOffset == other.startOffset && endOffset == other.endOffset &&
+ flags == other.flags && positionIncrement == other.positionIncrement && SubEqual(type, other.type) &&
+ SubEqual(payload, other.payload))
+ {
+ for (int i = 0; i < termLength; i++)
+ if (termBuffer[i] != other.termBuffer[i])
+ return false;
+ return true;
+ }
+ return false;
+ }
+
+ private bool SubEqual(System.Object o1, System.Object o2)
+ {
+ if (o1 == null)
+ return o2 == null;
+ return o1.Equals(o2);
+ }
+
+ public override int GetHashCode()
+ {
+ InitTermBuffer();
+ int code = termLength;
+ code = code * 31 + startOffset;
+ code = code * 31 + endOffset;
+ code = code * 31 + flags;
+ code = code * 31 + positionIncrement;
+ code = code * 31 + type.GetHashCode();
+ code = (payload == null?code:code * 31 + payload.GetHashCode());
+ code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
+ return code;
+ }
+
+ // like clear() but doesn't clear termBuffer/text
+ private void ClearNoTermBuffer()
+ {
+ payload = null;
+ positionIncrement = 1;
+ flags = 0;
+ startOffset = endOffset = 0;
+ type = DEFAULT_TYPE;
+ }
+
+ /// <summary>Shorthand for calling <see cref="Clear" />,
+ /// <see cref="SetTermBuffer(char[], int, int)" />,
+ /// <see cref="StartOffset" />,
+ /// <see cref="EndOffset" />,
+ /// <see cref="Type" />
+ /// </summary>
+ /// <returns> this Token instance
+ /// </returns>
+ public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
+ {
+ ClearNoTermBuffer();
+ payload = null;
+ positionIncrement = 1;
+ SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = newType;
+ return this;
+ }
+
+ /// <summary>Shorthand for calling <see cref="Clear" />,
+ /// <see cref="SetTermBuffer(char[], int, int)" />,
+ /// <see cref="StartOffset" />,
+ /// <see cref="EndOffset" />
+ /// <see cref="Type" /> on Token.DEFAULT_TYPE
+ /// </summary>
+ /// <returns> this Token instance
+ /// </returns>
+ public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = DEFAULT_TYPE;
+ return this;
+ }
+
+ /// <summary>Shorthand for calling <see cref="Clear" />,
+ /// <see cref="SetTermBuffer(String)" />,
+ /// <see cref="StartOffset" />,
+ /// <see cref="EndOffset" />
+ /// <see cref="Type" />
+ /// </summary>
+ /// <returns> this Token instance
+ /// </returns>
+ public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset, System.String newType)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = newType;
+ return this;
+ }
+
+ /// <summary>Shorthand for calling <see cref="Clear" />,
+ /// <see cref="SetTermBuffer(String, int, int)" />,
+ /// <see cref="StartOffset" />,
+ /// <see cref="EndOffset" />
+ /// <see cref="Type" />
+ /// </summary>
+ /// <returns> this Token instance
+ /// </returns>
+ public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = newType;
+ return this;
+ }
+
+ /// <summary>Shorthand for calling <see cref="Clear" />,
+ /// <see cref="SetTermBuffer(String)" />,
+ /// <see cref="StartOffset" />,
+ /// <see cref="EndOffset" />
+ /// <see cref="Type" /> on Token.DEFAULT_TYPE
+ /// </summary>
+ /// <returns> this Token instance
+ /// </returns>
+ public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = DEFAULT_TYPE;
+ return this;
+ }
+
+ /// <summary>Shorthand for calling <see cref="Clear" />,
+ /// <see cref="SetTermBuffer(String, int, int)" />,
+ /// <see cref="StartOffset" />,
+ /// <see cref="EndOffset" />
+ /// <see cref="Type" /> on Token.DEFAULT_TYPE
+ /// </summary>
+ /// <returns> this Token instance
+ /// </returns>
+ public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = DEFAULT_TYPE;
+ return this;
+ }
+
+ /// <summary> Copy the prototype token's fields into this one. Note: Payloads are shared.</summary>
+ /// <param name="prototype">
+ /// </param>
+ public virtual void Reinit(Token prototype)
+ {
+ prototype.InitTermBuffer();
+ SetTermBuffer(prototype.termBuffer, 0, prototype.termLength);
+ positionIncrement = prototype.positionIncrement;
+ flags = prototype.flags;
+ startOffset = prototype.startOffset;
+ endOffset = prototype.endOffset;
+ type = prototype.type;
+ payload = prototype.payload;
+ }
+
+ /// <summary> Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.</summary>
+ /// <param name="prototype">
+ /// </param>
+ /// <param name="newTerm">
+ /// </param>
+ public virtual void Reinit(Token prototype, System.String newTerm)
+ {
+ SetTermBuffer(newTerm);
+ positionIncrement = prototype.positionIncrement;
+ flags = prototype.flags;
+ startOffset = prototype.startOffset;
+ endOffset = prototype.endOffset;
+ type = prototype.type;
+ payload = prototype.payload;
+ }
+
+ /// <summary> Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.</summary>
+ /// <param name="prototype">
+ /// </param>
+ /// <param name="newTermBuffer">
+ /// </param>
+ /// <param name="offset">
+ /// </param>
+ /// <param name="length">
+ /// </param>
+ public virtual void Reinit(Token prototype, char[] newTermBuffer, int offset, int length)
+ {
+ SetTermBuffer(newTermBuffer, offset, length);
+ positionIncrement = prototype.positionIncrement;
+ flags = prototype.flags;
+ startOffset = prototype.startOffset;
+ endOffset = prototype.endOffset;
+ type = prototype.type;
+ payload = prototype.payload;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ if (target is Token)
+ {
+ var to = (Token) target;
+ to.Reinit(this);
+ // reinit shares the payload, so clone it:
+ if (payload != null)
+ {
+ to.payload = (Payload) payload.Clone();
+ }
+ }
+ else
+ {
+ InitTermBuffer();
+ ((ITermAttribute) target).SetTermBuffer(termBuffer, 0, termLength);
+ ((IOffsetAttribute) target).SetOffset(startOffset, endOffset);
+ ((IPositionIncrementAttribute) target).PositionIncrement = positionIncrement;
+ ((IPayloadAttribute) target).Payload = (payload == null)?null:(Payload) payload.Clone();
+ ((IFlagsAttribute) target).Flags = flags;
+ ((ITypeAttribute) target).Type = type;
+ }
+ }
+
+ ///<summary>
+ /// Convenience factory that returns <c>Token</c> as implementation for the basic
+ /// attributes and return the default impl (with &quot;Impl&quot; appended) for all other
+ /// attributes.
+ /// @since 3.0
+ /// </summary>
+ public static AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
+ new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
+
+ /// <summary>
+ /// <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+ /// and for all other attributes calls the given delegate factory.
+ /// </summary>
+ public class TokenAttributeFactory : AttributeSource.AttributeFactory
+ {
+
+ private readonly AttributeSource.AttributeFactory _delegateFactory;
+
+ /// <summary>
+ /// <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+ /// and for all other attributes calls the given delegate factory.
+ /// </summary>
+ public TokenAttributeFactory(AttributeSource.AttributeFactory delegateFactory)
+ {
+ this._delegateFactory = delegateFactory;
+ }
+
+ public override Attribute CreateAttributeInstance<T>()
+ {
+ return typeof(T).IsAssignableFrom(typeof(Token))
+ ? new Token()
+ : _delegateFactory.CreateAttributeInstance<T>();
+ }
+
+ public override bool Equals(Object other)
+ {
+ if (this == other) return true;
+
+ var af = other as TokenAttributeFactory;
+ return af != null && _delegateFactory.Equals(af._delegateFactory);
+ }
+
+ public override int GetHashCode()
+ {
+ return _delegateFactory.GetHashCode() ^ 0x0a45aa31;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/TokenFilter.cs b/src/core/Analysis/TokenFilter.cs
new file mode 100644
index 0000000..7483c82
--- /dev/null
+++ b/src/core/Analysis/TokenFilter.cs
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> A TokenFilter is a TokenStream whose input is another TokenStream.
+ /// <p/>
+ /// This is an abstract class; subclasses must override <see cref="TokenStream.IncrementToken()" />.
+ ///
+ /// </summary>
+ /// <seealso cref="TokenStream">
+ /// </seealso>
+ public abstract class TokenFilter:TokenStream
+ {
+ /// <summary>The source of tokens for this filter. </summary>
+ protected internal TokenStream input;
+
+ private bool isDisposed;
+
+ /// <summary>Construct a token stream filtering the given input. </summary>
+ protected internal TokenFilter(TokenStream input):base(input)
+ {
+ this.input = input;
+ }
+
+ /// <summary>Performs end-of-stream operations, if any, and calls then <c>end()</c> on the
+ /// input TokenStream.<p/>
+ /// <b>NOTE:</b> Be sure to call <c>super.end()</c> first when overriding this method.
+ /// </summary>
+ public override void End()
+ {
+ input.End();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ //input = null;
+ isDisposed = true;
+ }
+
+ /// <summary>Reset the filter as well as the input TokenStream. </summary>
+ public override void Reset()
+ {
+ input.Reset();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/TokenStream.cs b/src/core/Analysis/TokenStream.cs
new file mode 100644
index 0000000..c624696
--- /dev/null
+++ b/src/core/Analysis/TokenStream.cs
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> A <c>TokenStream</c> enumerates the sequence of tokens, either from
+ /// <see cref="Field" />s of a <see cref="Document" /> or from query text.
+ /// <p/>
+ /// This is an abstract class. Concrete subclasses are:
+ /// <list type="bullet">
+ /// <item><see cref="Tokenizer" />, a <c>TokenStream</c> whose input is a Reader; and</item>
+ /// <item><see cref="TokenFilter" />, a <c>TokenStream</c> whose input is another
+ /// <c>TokenStream</c>.</item>
+ /// </list>
+ /// A new <c>TokenStream</c> API has been introduced with Lucene 2.9. This API
+ /// has moved from being <see cref="Token" /> based to <see cref="IAttribute" /> based. While
+ /// <see cref="Token" /> still exists in 2.9 as a convenience class, the preferred way
+ /// to store the information of a <see cref="Token" /> is to use <see cref="Util.Attribute" />s.
+ /// <p/>
+ /// <c>TokenStream</c> now extends <see cref="AttributeSource" />, which provides
+ /// access to all of the token <see cref="IAttribute" />s for the <c>TokenStream</c>.
+ /// Note that only one instance per <see cref="Util.Attribute" /> is created and reused
+ /// for every token. This approach reduces object creation and allows local
+ /// caching of references to the <see cref="Util.Attribute" />s. See
+ /// <see cref="IncrementToken()" /> for further details.
+ /// <p/>
+ /// <b>The workflow of the new <c>TokenStream</c> API is as follows:</b>
+ /// <list type="bullet">
+ /// <item>Instantiation of <c>TokenStream</c>/<see cref="TokenFilter" />s which add/get
+ /// attributes to/from the <see cref="AttributeSource" />.</item>
+ /// <item>The consumer calls <see cref="TokenStream.Reset()" />.</item>
+ /// <item>The consumer retrieves attributes from the stream and stores local
+ /// references to all attributes it wants to access</item>
+ /// <item>The consumer calls <see cref="IncrementToken()" /> until it returns false and
+ /// consumes the attributes after each call.</item>
+ /// <item>The consumer calls <see cref="End()" /> so that any end-of-stream operations
+ /// can be performed.</item>
+ /// <item>The consumer calls <see cref="Close()" /> to release any resource when finished
+ /// using the <c>TokenStream</c></item>
+ /// </list>
+ /// To make sure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers are
+ /// not required to check for availability of attributes in
+ /// <see cref="IncrementToken()" />.
+ /// <p/>
+ /// You can find some example code for the new API in the analysis package level
+ /// Javadoc.
+ /// <p/>
+ /// Sometimes it is desirable to capture a current state of a <c>TokenStream</c>
+ /// , e. g. for buffering purposes (see <see cref="CachingTokenFilter" />,
+ /// <see cref="TeeSinkTokenFilter" />). For this usecase
+ /// <see cref="AttributeSource.CaptureState" /> and <see cref="AttributeSource.RestoreState" />
+ /// can be used.
+ /// </summary>
+ public abstract class TokenStream : AttributeSource, IDisposable
+ {
+ /// <summary> A TokenStream using the default attribute factory.</summary>
+ protected internal TokenStream()
+ { }
+
+ /// <summary> A TokenStream that uses the same attributes as the supplied one.</summary>
+ protected internal TokenStream(AttributeSource input)
+ : base(input)
+ { }
+
+ /// <summary> A TokenStream using the supplied AttributeFactory for creating new <see cref="IAttribute" /> instances.</summary>
+ protected internal TokenStream(AttributeFactory factory)
+ : base(factory)
+ { }
+
+ /// <summary> Consumers (i.e., <see cref="IndexWriter" />) use this method to advance the stream to
+ /// the next token. Implementing classes must implement this method and update
+ /// the appropriate <see cref="Util.Attribute" />s with the attributes of the next
+ /// token.
+ ///
+ /// The producer must make no assumptions about the attributes after the
+ /// method has been returned: the caller may arbitrarily change it. If the
+ /// producer needs to preserve the state for subsequent calls, it can use
+ /// <see cref="AttributeSource.CaptureState" /> to create a copy of the current attribute state.
+ ///
+ /// This method is called for every token of a document, so an efficient
+ /// implementation is crucial for good performance. To avoid calls to
+ /// <see cref="AttributeSource.AddAttribute{T}()" /> and <see cref="AttributeSource.GetAttribute{T}()" />,
+ /// references to all <see cref="Util.Attribute" />s that this stream uses should be
+ /// retrieved during instantiation.
+ ///
+ /// To ensure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers
+ /// are not required to check for availability of attributes in
+ /// <see cref="IncrementToken()" />.
+ ///
+ /// </summary>
+ /// <returns> false for end of stream; true otherwise</returns>
+ public abstract bool IncrementToken();
+
+ /// <summary> This method is called by the consumer after the last token has been
+ /// consumed, after <see cref="IncrementToken" /> returned <c>false</c>
+ /// (using the new <c>TokenStream</c> API). Streams implementing the old API
+ /// should upgrade to use this feature.
+ /// <p/>
+ /// This method can be used to perform any end-of-stream operations, such as
+ /// setting the final offset of a stream. The final offset of a stream might
+ /// differ from the offset of the last token eg in case one or more whitespaces
+ /// followed after the last token, but a <see cref="WhitespaceTokenizer" /> was used.
+ ///
+ /// </summary>
+ /// <throws> IOException </throws>
+ public virtual void End()
+ {
+ // do nothing by default
+ }
+
+ /// <summary> Resets this stream to the beginning. This is an optional operation, so
+ /// subclasses may or may not implement this method. <see cref="Reset()" /> is not needed for
+ /// the standard indexing process. However, if the tokens of a
+ /// <c>TokenStream</c> are intended to be consumed more than once, it is
+ /// necessary to implement <see cref="Reset()" />. Note that if your TokenStream
+ /// caches tokens and feeds them back again after a reset, it is imperative
+ /// that you clone the tokens when you store them away (on the first pass) as
+ /// well as when you return them (on future passes after <see cref="Reset()" />).
+ /// </summary>
+ public virtual void Reset()
+ {
+ }
+
+ /// <summary>Releases resources associated with this stream. </summary>
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/FlagsAttribute.cs b/src/core/Analysis/Tokenattributes/FlagsAttribute.cs
new file mode 100644
index 0000000..b5c4b7b
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/FlagsAttribute.cs
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> This attribute can be used to pass different flags down the tokenizer chain,
+ /// eg from one TokenFilter to another one.
+ /// </summary>
+ [Serializable]
+ public class FlagsAttribute:Util.Attribute, IFlagsAttribute, System.ICloneable
+ {
+ private int flags = 0;
+
+ /// <summary> EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
+ /// <p/>
+ ///
+ /// Get the bitset for any bits that have been set. This is completely distinct from <see cref="ITypeAttribute.Type()" />, although they do share similar purposes.
+ /// The flags can be used to encode information about the token for use by other <see cref="Lucene.Net.Analysis.TokenFilter" />s.
+ ///
+ ///
+ /// </summary>
+ /// <value> The bits </value>
+ public virtual int Flags
+ {
+ get { return flags; }
+ set { this.flags = value; }
+ }
+
+ public override void Clear()
+ {
+ flags = 0;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (this == other)
+ {
+ return true;
+ }
+
+ if (other is FlagsAttribute)
+ {
+ return ((FlagsAttribute) other).flags == flags;
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return flags;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IFlagsAttribute t = (IFlagsAttribute) target;
+ t.Flags = flags;
+ }
+
+ override public System.Object Clone()
+ {
+ FlagsAttribute impl = new FlagsAttribute();
+ impl.flags = this.flags;
+ return impl;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs b/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs
new file mode 100644
index 0000000..24b2bea
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> This attribute can be used to pass different flags down the <see cref="Tokenizer" /> chain,
+ /// eg from one TokenFilter to another one.
+ /// </summary>
+ public interface IFlagsAttribute:IAttribute
+ {
+ /// <summary> EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
+ /// <p/>
+ ///
+ /// Get the bitset for any bits that have been set. This is completely distinct from <see cref="ITypeAttribute.Type()" />, although they do share similar purposes.
+ /// The flags can be used to encode information about the token for use by other <see cref="Lucene.Net.Analysis.TokenFilter" />s.
+ ///
+ ///
+ /// </summary>
+ /// <value> The bits </value>
+ int Flags { get; set; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs b/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs
new file mode 100644
index 0000000..ffbbe02
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> The start and end character offset of a Token. </summary>
+ public interface IOffsetAttribute : IAttribute
+ {
+ /// <summary>Returns this Token's starting offset, the position of the first character
+ /// corresponding to this token in the source text.
+ /// Note that the difference between endOffset() and startOffset() may not be
+ /// equal to termText.length(), as the term text may have been altered by a
+ /// stemmer or some other filter.
+ /// </summary>
+ int StartOffset { get; }
+
+
+ /// <summary>Set the starting and ending offset.
+ /// See StartOffset() and EndOffset()
+ /// </summary>
+ void SetOffset(int startOffset, int endOffset);
+
+
+ /// <summary>Returns this Token's ending offset, one greater than the position of the
+ /// last character corresponding to this token in the source text. The length
+ /// of the token in the source text is (endOffset - startOffset).
+ /// </summary>
+ int EndOffset { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs b/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs
new file mode 100644
index 0000000..7e313ce
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Payload = Lucene.Net.Index.Payload;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> The payload of a Token. See also <see cref="Payload" />.</summary>
+ public interface IPayloadAttribute:IAttribute
+ {
+ /// <summary> Returns this Token's payload.</summary>
+ Payload Payload { get; set; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs b/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs
new file mode 100644
index 0000000..6c2a131
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary>The positionIncrement determines the position of this token
+ /// relative to the previous Token in a TokenStream, used in phrase
+ /// searching.
+ ///
+ /// <p/>The default value is one.
+ ///
+ /// <p/>Some common uses for this are:<list>
+ ///
+ /// <item>Set it to zero to put multiple terms in the same position. This is
+ /// useful if, e.g., a word has multiple stems. Searches for phrases
+ /// including either stem will match. In this case, all but the first stem's
+ /// increment should be set to zero: the increment of the first instance
+ /// should be one. Repeating a token with an increment of zero can also be
+ /// used to boost the scores of matches on that token.</item>
+ ///
+ /// <item>Set it to values greater than one to inhibit exact phrase matches.
+ /// If, for example, one does not want phrases to match across removed stop
+ /// words, then one could build a stop word filter that removes stop words and
+ /// also sets the increment to the number of stop words removed before each
+ /// non-stop word. Then exact phrase queries will only match when the terms
+ /// occur with no intervening stop words.</item>
+ ///
+ /// </list>
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.TermPositions">
+ /// </seealso>
+ public interface IPositionIncrementAttribute:IAttribute
+ {
+ /// <summary>Gets or sets the position increment. The default value is one.
+ ///
+ /// </summary>
+ /// <value> the distance from the prior term </value>
+ int PositionIncrement { set; get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/ITermAttribute.cs b/src/core/Analysis/Tokenattributes/ITermAttribute.cs
new file mode 100644
index 0000000..8f9b030
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/ITermAttribute.cs
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> The term text of a Token.</summary>
+ public interface ITermAttribute:IAttribute
+ {
+ /// <summary>Returns the Token's term text.
+ ///
+ /// This method has a performance penalty
+ /// because the text is stored internally in a char[]. If
+ /// possible, use <see cref="TermBuffer()" /> and <see cref="TermLength()" />
+ /// directly instead. If you really need a
+ /// String, use this method, which is nothing more than
+ /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+ /// </summary>
+ string Term { get; }
+
+ /// <summary>Copies the contents of buffer, starting at offset for
+ /// length characters, into the termBuffer array.
+ /// </summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ /// <param name="offset">the index in the buffer of the first character to copy
+ /// </param>
+ /// <param name="length">the number of characters to copy
+ /// </param>
+ void SetTermBuffer(char[] buffer, int offset, int length);
+
+ /// <summary>Copies the contents of buffer into the termBuffer array.</summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ void SetTermBuffer(System.String buffer);
+
+ /// <summary>Copies the contents of buffer, starting at offset and continuing
+ /// for length characters, into the termBuffer array.
+ /// </summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ /// <param name="offset">the index in the buffer of the first character to copy
+ /// </param>
+ /// <param name="length">the number of characters to copy
+ /// </param>
+ void SetTermBuffer(System.String buffer, int offset, int length);
+
+ /// <summary>Returns the internal termBuffer character array which
+ /// you can then directly alter. If the array is too
+ /// small for your token, use <see cref="ResizeTermBuffer(int)" />
+ /// to increase it. After
+ /// altering the buffer be sure to call <see cref="SetTermLength" />
+ /// to record the number of valid
+ /// characters that were placed into the termBuffer.
+ /// </summary>
+ char[] TermBuffer();
+
+ /// <summary>Grows the termBuffer to at least size newSize, preserving the
+ /// existing content. Note: If the next operation is to change
+ /// the contents of the term buffer use
+ /// <see cref="SetTermBuffer(char[], int, int)" />,
+ /// <see cref="SetTermBuffer(String)" />, or
+ /// <see cref="SetTermBuffer(String, int, int)" />
+ /// to optimally combine the resize with the setting of the termBuffer.
+ /// </summary>
+ /// <param name="newSize">minimum size of the new termBuffer
+ /// </param>
+ /// <returns> newly created termBuffer with length >= newSize
+ /// </returns>
+ char[] ResizeTermBuffer(int newSize);
+
+ /// <summary>Return number of valid characters (length of the term)
+ /// in the termBuffer array.
+ /// </summary>
+ int TermLength();
+
+ /// <summary>Set number of valid characters (length of the term) in
+ /// the termBuffer array. Use this to truncate the termBuffer
+ /// or to synchronize with external manipulation of the termBuffer.
+ /// Note: to grow the size of the array,
+ /// use <see cref="ResizeTermBuffer(int)" /> first.
+ /// </summary>
+ /// <param name="length">the truncated length
+ /// </param>
+ void SetTermLength(int length);
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/ITypeAttribute.cs b/src/core/Analysis/Tokenattributes/ITypeAttribute.cs
new file mode 100644
index 0000000..48bcc10
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/ITypeAttribute.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> A Token's lexical type. The Default value is "word". </summary>
+ public interface ITypeAttribute:IAttribute
+ {
+ /// <summary>Gets or sets this Token's lexical type. Defaults to "word". </summary>
+ string Type { get; set; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/OffsetAttribute.cs b/src/core/Analysis/Tokenattributes/OffsetAttribute.cs
new file mode 100644
index 0000000..5149559
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/OffsetAttribute.cs
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> The start and end character offset of a Token. </summary>
+ [Serializable]
+ public class OffsetAttribute:Attribute, IOffsetAttribute, System.ICloneable
+ {
+ private int startOffset;
+ private int endOffset;
+
+ /// <summary>Returns this Token's starting offset, the position of the first character
+ /// corresponding to this token in the source text.
+ /// Note that the difference between endOffset() and startOffset() may not be
+ /// equal to termText.length(), as the term text may have been altered by a
+ /// stemmer or some other filter.
+ /// </summary>
+ public virtual int StartOffset
+ {
+ get { return startOffset; }
+ }
+
+
+ /// <summary>Set the starting and ending offset.
+ /// See StartOffset() and EndOffset()
+ /// </summary>
+ public virtual void SetOffset(int startOffset, int endOffset)
+ {
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
+ }
+
+
+ /// <summary>Returns this Token's ending offset, one greater than the position of the
+ /// last character corresponding to this token in the source text. The length
+ /// of the token in the source text is (endOffset - startOffset).
+ /// </summary>
+ public virtual int EndOffset
+ {
+ get { return endOffset; }
+ }
+
+
+ public override void Clear()
+ {
+ startOffset = 0;
+ endOffset = 0;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is OffsetAttribute)
+ {
+ OffsetAttribute o = (OffsetAttribute) other;
+ return o.startOffset == startOffset && o.endOffset == endOffset;
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ int code = startOffset;
+ code = code * 31 + endOffset;
+ return code;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IOffsetAttribute t = (IOffsetAttribute) target;
+ t.SetOffset(startOffset, endOffset);
+ }
+
+ override public System.Object Clone()
+ {
+ OffsetAttribute impl = new OffsetAttribute();
+ impl.endOffset = endOffset;
+ impl.startOffset = startOffset;
+ return impl;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/PayloadAttribute.cs b/src/core/Analysis/Tokenattributes/PayloadAttribute.cs
new file mode 100644
index 0000000..ae1c4d9
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/PayloadAttribute.cs
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+using Payload = Lucene.Net.Index.Payload;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> The payload of a Token. See also <see cref="Payload" />.</summary>
+ [Serializable]
+ public class PayloadAttribute:Attribute, IPayloadAttribute, System.ICloneable
+ {
+ private Payload payload;
+
+ /// <summary> Initialize this attribute with no payload.</summary>
+ public PayloadAttribute()
+ {
+ }
+
+ /// <summary> Initialize this attribute with the given payload. </summary>
+ public PayloadAttribute(Payload payload)
+ {
+ this.payload = payload;
+ }
+
+ /// <summary> Returns this Token's payload.</summary>
+ public virtual Payload Payload
+ {
+ get { return this.payload; }
+ set { this.payload = value; }
+ }
+
+ public override void Clear()
+ {
+ payload = null;
+ }
+
+ public override System.Object Clone()
+ {
+ var clone = (PayloadAttribute) base.Clone();
+ if (payload != null)
+ {
+ clone.payload = (Payload) payload.Clone();
+ }
+ return clone;
+ // TODO: This code use to be as below. Any reason why? the if(payload!=null) was missing...
+ //PayloadAttributeImpl impl = new PayloadAttributeImpl();
+ //impl.payload = new Payload(this.payload.data, this.payload.offset, this.payload.length);
+ //return impl;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is IPayloadAttribute)
+ {
+ PayloadAttribute o = (PayloadAttribute) other;
+ if (o.payload == null || payload == null)
+ {
+ return o.payload == null && payload == null;
+ }
+
+ return o.payload.Equals(payload);
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return (payload == null)?0:payload.GetHashCode();
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IPayloadAttribute t = (IPayloadAttribute) target;
+ t.Payload = (payload == null)?null:(Payload) payload.Clone();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs b/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs
new file mode 100644
index 0000000..4f7a04f
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary>The positionIncrement determines the position of this token
+ /// relative to the previous Token in a <see cref="TokenStream" />, used in phrase
+ /// searching.
+ ///
+ /// <p/>The default value is one.
+ ///
+ /// <p/>Some common uses for this are:<list>
+ ///
+ /// <item>Set it to zero to put multiple terms in the same position. This is
+ /// useful if, e.g., a word has multiple stems. Searches for phrases
+ /// including either stem will match. In this case, all but the first stem's
+ /// increment should be set to zero: the increment of the first instance
+ /// should be one. Repeating a token with an increment of zero can also be
+ /// used to boost the scores of matches on that token.</item>
+ ///
+ /// <item>Set it to values greater than one to inhibit exact phrase matches.
+ /// If, for example, one does not want phrases to match across removed stop
+ /// words, then one could build a stop word filter that removes stop words and
+ /// also sets the increment to the number of stop words removed before each
+ /// non-stop word. Then exact phrase queries will only match when the terms
+ /// occur with no intervening stop words.</item>
+ ///
+ /// </list>
+ /// </summary>
+ [Serializable]
+ public class PositionIncrementAttribute:Attribute, IPositionIncrementAttribute, System.ICloneable
+ {
+ private int positionIncrement = 1;
+
+ /// <summary>Set the position increment. The default value is one.
+ ///
+ /// </summary>
+ /// <value> the distance from the prior term </value>
+ public virtual int PositionIncrement
+ {
+ set
+ {
+ if (value < 0)
+ throw new System.ArgumentException("Increment must be zero or greater: " + value);
+ this.positionIncrement = value;
+ }
+ get { return positionIncrement; }
+ }
+
+ public override void Clear()
+ {
+ this.positionIncrement = 1;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is PositionIncrementAttribute)
+ {
+ return positionIncrement == ((PositionIncrementAttribute) other).positionIncrement;
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return positionIncrement;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IPositionIncrementAttribute t = (IPositionIncrementAttribute) target;
+ t.PositionIncrement = positionIncrement;
+ }
+
+ override public System.Object Clone()
+ {
+ PositionIncrementAttribute impl = new PositionIncrementAttribute();
+ impl.positionIncrement = positionIncrement;
+ return impl;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/TermAttribute.cs b/src/core/Analysis/Tokenattributes/TermAttribute.cs
new file mode 100644
index 0000000..f95402c
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/TermAttribute.cs
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> The term text of a Token.</summary>
+ [Serializable]
+ public class TermAttribute:Attribute, ITermAttribute, System.ICloneable
+ {
+ private static int MIN_BUFFER_SIZE = 10;
+
+ private char[] termBuffer;
+ private int termLength;
+
+ /// <summary>Returns the Token's term text.
+ ///
+ /// This method has a performance penalty
+ /// because the text is stored internally in a char[]. If
+ /// possible, use <see cref="TermBuffer()" /> and
+ /// <see cref="TermLength()" /> directly instead. If you
+ /// really need a String, use this method, which is nothing more than
+ /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+ /// </summary>
+ public virtual string Term
+ {
+ get
+ {
+ InitTermBuffer();
+ return new System.String(termBuffer, 0, termLength);
+ }
+ }
+
+ /// <summary>Copies the contents of buffer, starting at offset for
+ /// length characters, into the termBuffer array.
+ /// </summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ /// <param name="offset">the index in the buffer of the first character to copy
+ /// </param>
+ /// <param name="length">the number of characters to copy
+ /// </param>
+ public virtual void SetTermBuffer(char[] buffer, int offset, int length)
+ {
+ GrowTermBuffer(length);
+ Array.Copy(buffer, offset, termBuffer, 0, length);
+ termLength = length;
+ }
+
+ /// <summary>Copies the contents of buffer into the termBuffer array.</summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ public virtual void SetTermBuffer(System.String buffer)
+ {
+ int length = buffer.Length;
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// <summary>Copies the contents of buffer, starting at offset and continuing
+ /// for length characters, into the termBuffer array.
+ /// </summary>
+ /// <param name="buffer">the buffer to copy
+ /// </param>
+ /// <param name="offset">the index in the buffer of the first character to copy
+ /// </param>
+ /// <param name="length">the number of characters to copy
+ /// </param>
+ public virtual void SetTermBuffer(System.String buffer, int offset, int length)
+ {
+ System.Diagnostics.Debug.Assert(offset <= buffer.Length);
+ System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// <summary>Returns the internal termBuffer character array which
+ /// you can then directly alter. If the array is too
+ /// small for your token, use <see cref="ResizeTermBuffer(int)" />
+ /// to increase it. After
+ /// altering the buffer be sure to call <see cref="SetTermLength" />
+ /// to record the number of valid
+ /// characters that were placed into the termBuffer.
+ /// </summary>
+ public virtual char[] TermBuffer()
+ {
+ InitTermBuffer();
+ return termBuffer;
+ }
+
+ /// <summary>Grows the termBuffer to at least size newSize, preserving the
+ /// existing content. Note: If the next operation is to change
+ /// the contents of the term buffer use
+ /// <see cref="SetTermBuffer(char[], int, int)" />,
+ /// <see cref="SetTermBuffer(String)" />, or
+ /// <see cref="SetTermBuffer(String, int, int)" />
+ /// to optimally combine the resize with the setting of the termBuffer.
+ /// </summary>
+ /// <param name="newSize">minimum size of the new termBuffer
+ /// </param>
+ /// <returns> newly created termBuffer with length >= newSize
+ /// </returns>
+ public virtual char[] ResizeTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ // The buffer is always at least MIN_BUFFER_SIZE
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation and preserve content
+ char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
+ termBuffer = newCharBuffer;
+ }
+ }
+ return termBuffer;
+ }
+
+
+ /// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content.
+ /// its always used in places that set the content
+ /// </summary>
+ /// <param name="newSize">minimum size of the buffer
+ /// </param>
+ private void GrowTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ // The buffer is always at least MIN_BUFFER_SIZE
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation:
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ }
+ }
+ }
+
+ private void InitTermBuffer()
+ {
+ if (termBuffer == null)
+ {
+ termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
+ termLength = 0;
+ }
+ }
+
+ /// <summary>Return number of valid characters (length of the term)
+ /// in the termBuffer array.
+ /// </summary>
+ public virtual int TermLength()
+ {
+ return termLength;
+ }
+
+ /// <summary>Set number of valid characters (length of the term) in
+ /// the termBuffer array. Use this to truncate the termBuffer
+ /// or to synchronize with external manipulation of the termBuffer.
+ /// Note: to grow the size of the array,
+ /// use <see cref="ResizeTermBuffer(int)" /> first.
+ /// </summary>
+ /// <param name="length">the truncated length
+ /// </param>
+ public virtual void SetTermLength(int length)
+ {
+ InitTermBuffer();
+ if (length > termBuffer.Length)
+ throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
+ termLength = length;
+ }
+
+ public override int GetHashCode()
+ {
+ InitTermBuffer();
+ int code = termLength;
+ code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
+ return code;
+ }
+
+ public override void Clear()
+ {
+ termLength = 0;
+ }
+
+ public override System.Object Clone()
+ {
+ TermAttribute t = (TermAttribute) base.Clone();
+ // Do a deep clone
+ if (termBuffer != null)
+ {
+ t.termBuffer = new char[termBuffer.Length];
+ termBuffer.CopyTo(t.termBuffer, 0);
+ }
+ return t;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is ITermAttribute)
+ {
+ InitTermBuffer();
+ TermAttribute o = ((TermAttribute) other);
+ o.InitTermBuffer();
+
+ if (termLength != o.termLength)
+ return false;
+ for (int i = 0; i < termLength; i++)
+ {
+ if (termBuffer[i] != o.termBuffer[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ public override System.String ToString()
+ {
+ InitTermBuffer();
+ return "term=" + new System.String(termBuffer, 0, termLength);
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ InitTermBuffer();
+ ITermAttribute t = (ITermAttribute) target;
+ t.SetTermBuffer(termBuffer, 0, termLength);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenattributes/TypeAttribute.cs b/src/core/Analysis/Tokenattributes/TypeAttribute.cs
new file mode 100644
index 0000000..1da1c50
--- /dev/null
+++ b/src/core/Analysis/Tokenattributes/TypeAttribute.cs
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// <summary> A Token's lexical type. The Default value is "word". </summary>
+ [Serializable]
+ public class TypeAttribute:Attribute, ITypeAttribute, System.ICloneable
+ {
+ private System.String type;
+ public const System.String DEFAULT_TYPE = "word";
+
+ public TypeAttribute():this(DEFAULT_TYPE)
+ {
+ }
+
+ public TypeAttribute(System.String type)
+ {
+ this.type = type;
+ }
+
+ /// <summary>Returns this Token's lexical type. Defaults to "word". </summary>
+ public virtual string Type
+ {
+ get { return type; }
+ set { this.type = value; }
+ }
+
+ public override void Clear()
+ {
+ type = DEFAULT_TYPE;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is TypeAttribute)
+ {
+ return type.Equals(((TypeAttribute) other).type);
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return type.GetHashCode();
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ ITypeAttribute t = (ITypeAttribute) target;
+ t.Type = type;
+ }
+
+ override public System.Object Clone()
+ {
+ TypeAttribute impl = new TypeAttribute();
+ impl.type = type;
+ return impl;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/Tokenizer.cs b/src/core/Analysis/Tokenizer.cs
new file mode 100644
index 0000000..5ab741e
--- /dev/null
+++ b/src/core/Analysis/Tokenizer.cs
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> A Tokenizer is a TokenStream whose input is a Reader.
+ /// <p/>
+ /// This is an abstract class; subclasses must override <see cref="TokenStream.IncrementToken()" />
+ /// <p/>
+ /// NOTE: Subclasses overriding <see cref="TokenStream.IncrementToken()" /> must call
+ /// <see cref="AttributeSource.ClearAttributes()" /> before setting attributes.
+ /// </summary>
+
+ public abstract class Tokenizer:TokenStream
+ {
+ /// <summary>The text source for this Tokenizer. </summary>
+ protected internal System.IO.TextReader input;
+
+ private bool isDisposed;
+
+ /// <summary>Construct a tokenizer with null input. </summary>
+ protected internal Tokenizer()
+ {
+ }
+
+ /// <summary>Construct a token stream processing the given input. </summary>
+ protected internal Tokenizer(System.IO.TextReader input)
+ {
+ this.input = CharReader.Get(input);
+ }
+
+ /// <summary>Construct a tokenizer with null input using the given AttributeFactory. </summary>
+ protected internal Tokenizer(AttributeFactory factory):base(factory)
+ {
+ }
+
+ /// <summary>Construct a token stream processing the given input using the given AttributeFactory. </summary>
+ protected internal Tokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory)
+ {
+ this.input = CharReader.Get(input);
+ }
+
+ /// <summary>Construct a token stream processing the given input using the given AttributeSource. </summary>
+ protected internal Tokenizer(AttributeSource source):base(source)
+ {
+ }
+
+ /// <summary>Construct a token stream processing the given input using the given AttributeSource. </summary>
+ protected internal Tokenizer(AttributeSource source, System.IO.TextReader input):base(source)
+ {
+ this.input = CharReader.Get(input);
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ // LUCENE-2387: don't hold onto Reader after close, so
+ // GC can reclaim
+ input = null;
+ isDisposed = true;
+ }
+
+ /// <summary>Return the corrected offset. If <see cref="input" /> is a <see cref="CharStream" /> subclass
+ /// this method calls <see cref="CharStream.CorrectOffset" />, else returns <c>currentOff</c>.
+ /// </summary>
+ /// <param name="currentOff">offset as seen in the output
+ /// </param>
+ /// <returns> corrected offset based on the input
+ /// </returns>
+ /// <seealso cref="CharStream.CorrectOffset">
+ /// </seealso>
+ protected internal int CorrectOffset(int currentOff)
+ {
+ return (input is CharStream)?((CharStream) input).CorrectOffset(currentOff):currentOff;
+ }
+
+ /// <summary>Expert: Reset the tokenizer to a new reader. Typically, an
+ /// analyzer (in its reusableTokenStream method) will use
+ /// this to re-use a previously created tokenizer.
+ /// </summary>
+ public virtual void Reset(System.IO.TextReader input)
+ {
+ this.input = input;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/WhitespaceAnalyzer.cs b/src/core/Analysis/WhitespaceAnalyzer.cs
new file mode 100644
index 0000000..77dbaa3
--- /dev/null
+++ b/src/core/Analysis/WhitespaceAnalyzer.cs
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>An Analyzer that uses <see cref="WhitespaceTokenizer" />. </summary>
+
+ public sealed class WhitespaceAnalyzer:Analyzer
+ {
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new WhitespaceTokenizer(reader);
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var tokenizer = (Tokenizer) PreviousTokenStream;
+ if (tokenizer == null)
+ {
+ tokenizer = new WhitespaceTokenizer(reader);
+ PreviousTokenStream = tokenizer;
+ }
+ else
+ tokenizer.Reset(reader);
+ return tokenizer;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/WhitespaceTokenizer.cs b/src/core/Analysis/WhitespaceTokenizer.cs
new file mode 100644
index 0000000..c96ad50
--- /dev/null
+++ b/src/core/Analysis/WhitespaceTokenizer.cs
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary>A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
+ /// Adjacent sequences of non-Whitespace characters form tokens.
+ /// </summary>
+
+ public class WhitespaceTokenizer:CharTokenizer
+ {
+ /// <summary>Construct a new WhitespaceTokenizer. </summary>
+ public WhitespaceTokenizer(System.IO.TextReader @in)
+ : base(@in)
+ {
+ }
+
+ /// <summary>Construct a new WhitespaceTokenizer using a given <see cref="AttributeSource" />. </summary>
+ public WhitespaceTokenizer(AttributeSource source, System.IO.TextReader @in)
+ : base(source, @in)
+ {
+ }
+
+ /// <summary>Construct a new WhitespaceTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+ public WhitespaceTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+ : base(factory, @in)
+ {
+ }
+
+ /// <summary>Collects only characters which do not satisfy
+ /// <see cref="char.IsWhiteSpace(char)" />.
+ /// </summary>
+ protected internal override bool IsTokenChar(char c)
+ {
+ return !System.Char.IsWhiteSpace(c);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Analysis/WordlistLoader.cs b/src/core/Analysis/WordlistLoader.cs
new file mode 100644
index 0000000..bfd1b07
--- /dev/null
+++ b/src/core/Analysis/WordlistLoader.cs
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// <summary> Loader for text files that represent a list of stopwords.</summary>
+ public class WordlistLoader
+ {
+
+ /// <summary> Loads a text file and adds every line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the file should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ /// </summary>
+ /// <param name="wordfile">File containing the wordlist</param>
+ /// <returns> A HashSet with the file's words</returns>
+ public static ISet<string> GetWordSet(System.IO.FileInfo wordfile)
+ {
+ using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
+ {
+ return GetWordSet(reader);
+ }
+ }
+
+ /// <summary> Loads a text file and adds every non-comment line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the file should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ /// </summary>
+ /// <param name="wordfile">File containing the wordlist</param>
+ /// <param name="comment">The comment string to ignore</param>
+ /// <returns> A HashSet with the file's words</returns>
+ public static ISet<string> GetWordSet(System.IO.FileInfo wordfile, System.String comment)
+ {
+ using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
+ {
+ return GetWordSet(reader, comment);
+ }
+ }
+
+
+ /// <summary> Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the Reader should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ /// </summary>
+ /// <param name="reader">Reader containing the wordlist</param>
+ /// <returns>A HashSet with the reader's words</returns>
+ public static ISet<string> GetWordSet(System.IO.TextReader reader)
+ {
+ var result = Support.Compatibility.SetFactory.CreateHashSet<string>();
+
+ System.String word;
+ while ((word = reader.ReadLine()) != null)
+ {
+ result.Add(word.Trim());
+ }
+
+ return result;
+ }
+
+ /// <summary> Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the Reader should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ ///
+ /// </summary>
+ /// <param name="reader">Reader containing the wordlist
+ /// </param>
+ /// <param name="comment">The string representing a comment.
+ /// </param>
+ /// <returns> A HashSet with the reader's words
+ /// </returns>
+ public static ISet<string> GetWordSet(System.IO.TextReader reader, System.String comment)
+ {
+ var result = Support.Compatibility.SetFactory.CreateHashSet<string>();
+
+ System.String word = null;
+ while ((word = reader.ReadLine()) != null)
+ {
+ if (word.StartsWith(comment) == false)
+ {
+ result.Add(word.Trim());
+ }
+ }
+
+ return result;
+ }
+
+
+
+ /// <summary> Reads a stem dictionary. Each line contains:
+ /// <c>word<b>\t</b>stem</c>
+ /// (i.e. two tab seperated words)
+ ///
+ /// </summary>
+ /// <returns> stem dictionary that overrules the stemming algorithm
+ /// </returns>
+ /// <throws> IOException </throws>
+ public static Dictionary<string, string> GetStemDict(System.IO.FileInfo wordstemfile)
+ {
+ if (wordstemfile == null)
+ throw new System.NullReferenceException("wordstemfile may not be null");
+ var result = new Dictionary<string, string>();
+ System.IO.StreamReader br = null;
+ System.IO.StreamReader fr = null;
+ try
+ {
+ fr = new System.IO.StreamReader(wordstemfile.FullName, System.Text.Encoding.Default);
+ br = new System.IO.StreamReader(fr.BaseStream, fr.CurrentEncoding);
+ System.String line;
+ char[] tab = {'\t'};
+ while ((line = br.ReadLine()) != null)
+ {
+ System.String[] wordstem = line.Split(tab, 2);
+ result[wordstem[0]] = wordstem[1];
+ }
+ }
+ finally
+ {
+ if (fr != null)
+ fr.Close();
+ if (br != null)
+ br.Close();
+ }
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/AbstractField.cs b/src/core/Document/AbstractField.cs
new file mode 100644
index 0000000..a526f1d
--- /dev/null
+++ b/src/core/Document/AbstractField.cs
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using StringHelper = Lucene.Net.Util.StringHelper;
+using PhraseQuery = Lucene.Net.Search.PhraseQuery;
+using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary>
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public abstract class AbstractField : IFieldable
+ {
+
+ protected internal System.String internalName = "body";
+ protected internal bool storeTermVector = false;
+ protected internal bool storeOffsetWithTermVector = false;
+ protected internal bool storePositionWithTermVector = false;
+ protected internal bool internalOmitNorms = false;
+ protected internal bool internalIsStored = false;
+ protected internal bool internalIsIndexed = true;
+ protected internal bool internalIsTokenized = true;
+ protected internal bool internalIsBinary = false;
+ protected internal bool lazy = false;
+ protected internal bool internalOmitTermFreqAndPositions = false;
+ protected internal float internalBoost = 1.0f;
+ // the data object for all different kind of field values
+ protected internal System.Object fieldsData = null;
+ // pre-analyzed tokenStream for indexed fields
+ protected internal TokenStream tokenStream;
+ // length/offset for all primitive types
+ protected internal int internalBinaryLength;
+ protected internal int internalbinaryOffset;
+
+ protected internal AbstractField()
+ {
+ }
+
+ protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ this.internalName = StringHelper.Intern(name); // field names are interned
+
+ this.internalIsStored = store.IsStored();
+ this.internalIsIndexed = index.IsIndexed();
+ this.internalIsTokenized = index.IsAnalyzed();
+ this.internalOmitNorms = index.OmitNorms();
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// <summary>Gets or sets the boost factor for hits for this field.
+ ///
+ /// <p/>The default value is 1.0.
+ ///
+ /// <p/>Note: this value is not stored directly with the document in the index.
+ /// Documents returned from <see cref="Lucene.Net.Index.IndexReader.Document(int)" /> and
+ /// <see cref="Lucene.Net.Search.Searcher.Doc(int)" /> may thus not have the same value present as when
+ /// this field was indexed.
+ /// </summary>
+ public virtual float Boost
+ {
+ get { return internalBoost; }
+ set { this.internalBoost = value; }
+ }
+
+ /// <summary>Returns the name of the field as an interned string.
+ /// For example "date", "title", "body", ...
+ /// </summary>
+ public virtual string Name
+ {
+ get { return internalName; }
+ }
+
+ protected internal virtual void SetStoreTermVector(Field.TermVector termVector)
+ {
+ this.storeTermVector = termVector.IsStored();
+ this.storePositionWithTermVector = termVector.WithPositions();
+ this.storeOffsetWithTermVector = termVector.WithOffsets();
+ }
+
+ /// <summary>True iff the value of the field is to be stored in the index for return
+ /// with search hits. It is an error for this to be true if a field is
+ /// Reader-valued.
+ /// </summary>
+ public bool IsStored
+ {
+ get { return internalIsStored; }
+ }
+
+ /// <summary>True iff the value of the field is to be indexed, so that it may be
+ /// searched on.
+ /// </summary>
+ public bool IsIndexed
+ {
+ get { return internalIsIndexed; }
+ }
+
+ /// <summary>True iff the value of the field should be tokenized as text prior to
+ /// indexing. Un-tokenized fields are indexed as a single word and may not be
+ /// Reader-valued.
+ /// </summary>
+ public bool IsTokenized
+ {
+ get { return internalIsTokenized; }
+ }
+
+ /// <summary>True iff the term or terms used to index this field are stored as a term
+ /// vector, available from <see cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int,String)" />.
+ /// These methods do not provide access to the original content of the field,
+ /// only to terms used to index it. If the original content must be
+ /// preserved, use the <c>stored</c> attribute instead.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int, String)">
+ /// </seealso>
+ public bool IsTermVectorStored
+ {
+ get { return storeTermVector; }
+ }
+
+ /// <summary> True iff terms are stored as term vector together with their offsets
+ /// (start and end position in source text).
+ /// </summary>
+ public virtual bool IsStoreOffsetWithTermVector
+ {
+ get { return storeOffsetWithTermVector; }
+ }
+
+ /// <summary> True iff terms are stored as term vector together with their token positions.</summary>
+ public virtual bool IsStorePositionWithTermVector
+ {
+ get { return storePositionWithTermVector; }
+ }
+
+ /// <summary>True iff the value of the filed is stored as binary </summary>
+ public bool IsBinary
+ {
+ get { return internalIsBinary; }
+ }
+
+
+ /// <summary> Return the raw byte[] for the binary field. Note that
+ /// you must also call <see cref="BinaryLength" /> and <see cref="BinaryOffset" />
+ /// to know which range of bytes in this
+ /// returned array belong to the field.
+ /// </summary>
+ /// <returns> reference to the Field value as byte[]. </returns>
+ public virtual byte[] GetBinaryValue()
+ {
+ return GetBinaryValue(null);
+ }
+
+ public virtual byte[] GetBinaryValue(byte[] result)
+ {
+ if (internalIsBinary || fieldsData is byte[])
+ return (byte[]) fieldsData;
+ else
+ return null;
+ }
+
+ /// <summary> Returns length of byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> length of byte[] segment that represents this Field value </value>
+ public virtual int BinaryLength
+ {
+ get
+ {
+ if (internalIsBinary)
+ {
+ return internalBinaryLength;
+ }
+ return fieldsData is byte[] ? ((byte[]) fieldsData).Length : 0;
+ }
+ }
+
+ /// <summary> Returns offset into byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> index of the first character in byte[] segment that represents this Field value </value>
+ public virtual int BinaryOffset
+ {
+ get { return internalbinaryOffset; }
+ }
+
+ /// <summary>True if norms are omitted for this indexed field </summary>
+ public virtual bool OmitNorms
+ {
+ get { return internalOmitNorms; }
+ set { this.internalOmitNorms = value; }
+ }
+
+ /// <summary>Expert:
+ ///
+ /// If set, omit term freq, positions and payloads from
+ /// postings for this field.
+ ///
+ /// <p/><b>NOTE</b>: While this option reduces storage space
+ /// required in the index, it also means any query
+ /// requiring positional information, such as <see cref="PhraseQuery" />
+ /// or <see cref="SpanQuery" /> subclasses will
+ /// silently fail to find results.
+ /// </summary>
+ public virtual bool OmitTermFreqAndPositions
+ {
+ set { this.internalOmitTermFreqAndPositions = value; }
+ get { return internalOmitTermFreqAndPositions; }
+ }
+
+ public virtual bool IsLazy
+ {
+ get { return lazy; }
+ }
+
+ /// <summary>Prints a Field for human consumption. </summary>
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder result = new System.Text.StringBuilder();
+ if (internalIsStored)
+ {
+ result.Append("stored");
+ }
+ if (internalIsIndexed)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("indexed");
+ }
+ if (internalIsTokenized)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("tokenized");
+ }
+ if (storeTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVector");
+ }
+ if (storeOffsetWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorOffsets");
+ }
+ if (storePositionWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorPosition");
+ }
+ if (internalIsBinary)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("binary");
+ }
+ if (internalOmitNorms)
+ {
+ result.Append(",omitNorms");
+ }
+ if (internalOmitTermFreqAndPositions)
+ {
+ result.Append(",omitTermFreqAndPositions");
+ }
+ if (lazy)
+ {
+ result.Append(",lazy");
+ }
+ result.Append('<');
+ result.Append(internalName);
+ result.Append(':');
+
+ if (fieldsData != null && lazy == false)
+ {
+ result.Append(fieldsData);
+ }
+
+ result.Append('>');
+ return result.ToString();
+ }
+
+ public abstract TokenStream TokenStreamValue { get; }
+ public abstract TextReader ReaderValue { get; }
+ public abstract string StringValue { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/CompressionTools.cs b/src/core/Document/CompressionTools.cs
new file mode 100644
index 0000000..400633f
--- /dev/null
+++ b/src/core/Document/CompressionTools.cs
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+// To enable compression support in Lucene.Net ,
+// you will need to define 'SHARP_ZIP_LIB' and reference the SharpLibZip
+// library. The SharpLibZip library can be downloaded from:
+// http://www.icsharpcode.net/OpenSource/SharpZipLib/
+
+using System;
+using Lucene.Net.Support;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary>Simple utility class providing static methods to
+ /// compress and decompress binary data for stored fields.
+ /// This class uses java.util.zip.Deflater and Inflater
+ /// classes to compress and decompress.
+ /// </summary>
+
+ public class CompressionTools
+ {
+
+ // Export only static methods
+ private CompressionTools()
+ {
+ }
+
+ /// <summary>Compresses the specified byte range using the
+ /// specified compressionLevel (constants are defined in
+ /// java.util.zip.Deflater).
+ /// </summary>
+ public static byte[] Compress(byte[] value_Renamed, int offset, int length, int compressionLevel)
+ {
+ /* Create an expandable byte array to hold the compressed data.
+ * You cannot use an array that's the same size as the orginal because
+ * there is no guarantee that the compressed data will be smaller than
+ * the uncompressed data. */
+ System.IO.MemoryStream bos = new System.IO.MemoryStream(length);
+
+ Deflater compressor = SharpZipLib.CreateDeflater();
+
+ try
+ {
+ compressor.SetLevel(compressionLevel);
+ compressor.SetInput(value_Renamed, offset, length);
+ compressor.Finish();
+
+ // Compress the data
+ byte[] buf = new byte[1024];
+ while (!compressor.IsFinished)
+ {
+ int count = compressor.Deflate(buf);
+ bos.Write(buf, 0, count);
+ }
+ }
+ finally
+ {
+ }
+
+ return bos.ToArray();
+ }
+
+ /// <summary>Compresses the specified byte range, with default BEST_COMPRESSION level </summary>
+ public static byte[] Compress(byte[] value_Renamed, int offset, int length)
+ {
+ return Compress(value_Renamed, offset, length, Deflater.BEST_COMPRESSION);
+ }
+
+ /// <summary>Compresses all bytes in the array, with default BEST_COMPRESSION level </summary>
+ public static byte[] Compress(byte[] value_Renamed)
+ {
+ return Compress(value_Renamed, 0, value_Renamed.Length, Deflater.BEST_COMPRESSION);
+ }
+
+ /// <summary>Compresses the String value, with default BEST_COMPRESSION level </summary>
+ public static byte[] CompressString(System.String value_Renamed)
+ {
+ return CompressString(value_Renamed, Deflater.BEST_COMPRESSION);
+ }
+
+ /// <summary>Compresses the String value using the specified
+ /// compressionLevel (constants are defined in
+ /// java.util.zip.Deflater).
+ /// </summary>
+ public static byte[] CompressString(System.String value_Renamed, int compressionLevel)
+ {
+ UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
+ UnicodeUtil.UTF16toUTF8(value_Renamed, 0, value_Renamed.Length, result);
+ return Compress(result.result, 0, result.length, compressionLevel);
+ }
+
+ /// <summary>Decompress the byte array previously returned by
+ /// compress
+ /// </summary>
+ public static byte[] Decompress(byte[] value_Renamed)
+ {
+ // Create an expandable byte array to hold the decompressed data
+ System.IO.MemoryStream bos = new System.IO.MemoryStream(value_Renamed.Length);
+
+ Inflater decompressor = SharpZipLib.CreateInflater();
+
+ try
+ {
+ decompressor.SetInput(value_Renamed);
+
+ // Decompress the data
+ byte[] buf = new byte[1024];
+ while (!decompressor.IsFinished)
+ {
+ int count = decompressor.Inflate(buf);
+ bos.Write(buf, 0, count);
+ }
+ }
+ finally
+ {
+ }
+
+ return bos.ToArray();
+ }
+
+ /// <summary>Decompress the byte array previously returned by
+ /// compressString back into a String
+ /// </summary>
+ public static System.String DecompressString(byte[] value_Renamed)
+ {
+ UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
+ byte[] bytes = Decompress(value_Renamed);
+ UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.Length, result);
+ return new System.String(result.result, 0, result.length);
+ }
+ }
+}
+
diff --git a/src/core/Document/DateField.cs b/src/core/Document/DateField.cs
new file mode 100644
index 0000000..6179f4c
--- /dev/null
+++ b/src/core/Document/DateField.cs
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using PrefixQuery = Lucene.Net.Search.PrefixQuery;
+using TermRangeQuery = Lucene.Net.Search.TermRangeQuery;
+// for javadoc
+
+namespace Lucene.Net.Documents
+{
+ // for javadoc
+
+ // do not remove in 3.0, needed for reading old indexes!
+
+ /// <summary> Provides support for converting dates to strings and vice-versa.
+ /// The strings are structured so that lexicographic sorting orders by date,
+ /// which makes them suitable for use as field values and search terms.
+ ///
+ /// <p/>Note that this class saves dates with millisecond granularity,
+ /// which is bad for <see cref="TermRangeQuery" /> and <see cref="PrefixQuery" />, as those
+ /// queries are expanded to a BooleanQuery with a potentially large number
+ /// of terms when searching. Thus you might want to use
+ /// <see cref="DateTools" /> instead.
+ ///
+ /// <p/>
+ /// Note: dates before 1970 cannot be used, and therefore cannot be
+ /// indexed when using this class. See <see cref="DateTools" /> for an
+ /// alternative without such a limitation.
+ ///
+ /// <p/>
+ /// Another approach is <see cref="NumericUtils" />, which provides
+ /// a sortable binary representation (prefix encoded) of numeric values, which
+ /// date/time are.
+ /// For indexing a <see cref="DateTime" />, convert it to unix timestamp as
+ /// <c>long</c> and
+ /// index this as a numeric value with <see cref="NumericField" />
+ /// and use <see cref="NumericRangeQuery{T}" /> to query it.
+ ///
+ /// </summary>
+ /// <deprecated> If you build a new index, use <see cref="DateTools" /> or
+ /// <see cref="NumericField" /> instead.
+ /// This class is included for use with existing
+ /// indices and will be removed in a future (possibly Lucene 4.0)
+ /// </deprecated>
+ [Obsolete("If you build a new index, use DateTools or NumericField instead.This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")]
+ public class DateField
+ {
+
+ private DateField()
+ {
+ }
+
+ // make date strings long enough to last a millenium
+ private static int DATE_LEN = Number.ToString(1000L * 365 * 24 * 60 * 60 * 1000, Number.MAX_RADIX).Length;
+
+ public static System.String MIN_DATE_STRING()
+ {
+ return TimeToString(0);
+ }
+
+ public static System.String MAX_DATE_STRING()
+ {
+ char[] buffer = new char[DATE_LEN];
+ char c = Character.ForDigit(Character.MAX_RADIX - 1, Character.MAX_RADIX);
+ for (int i = 0; i < DATE_LEN; i++)
+ buffer[i] = c;
+ return new System.String(buffer);
+ }
+
+ /// <summary> Converts a Date to a string suitable for indexing.</summary>
+ /// <throws> RuntimeException if the date specified in the </throws>
+ /// <summary> method argument is before 1970
+ /// </summary>
+ public static System.String DateToString(System.DateTime date)
+ {
+ TimeSpan ts = date.Subtract(new DateTime(1970, 1, 1));
+ ts = ts.Subtract(TimeZone.CurrentTimeZone.GetUtcOffset(date));
+ return TimeToString(ts.Ticks / TimeSpan.TicksPerMillisecond);
+ }
+ /// <summary> Converts a millisecond time to a string suitable for indexing.</summary>
+ /// <throws> RuntimeException if the time specified in the </throws>
+ /// <summary> method argument is negative, that is, before 1970
+ /// </summary>
+ public static System.String TimeToString(long time)
+ {
+ if (time < 0)
+ throw new System.SystemException("time '" + time + "' is too early, must be >= 0");
+
+ System.String s = Number.ToString(time, Character.MAX_RADIX);
+
+ if (s.Length > DATE_LEN)
+ throw new System.SystemException("time '" + time + "' is too late, length of string " + "representation must be <= " + DATE_LEN);
+
+ // Pad with leading zeros
+ if (s.Length < DATE_LEN)
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder(s);
+ while (sb.Length < DATE_LEN)
+ sb.Insert(0, 0);
+ s = sb.ToString();
+ }
+
+ return s;
+ }
+
+ /// <summary>Converts a string-encoded date into a millisecond time. </summary>
+ public static long StringToTime(System.String s)
+ {
+ return Number.Parse(s, Number.MAX_RADIX);
+ }
+ /// <summary>Converts a string-encoded date into a Date object. </summary>
+ public static System.DateTime StringToDate(System.String s)
+ {
+ long ticks = StringToTime(s) * TimeSpan.TicksPerMillisecond;
+ System.DateTime date = new System.DateTime(1970, 1, 1);
+ date = date.AddTicks(ticks);
+ date = date.Add(TimeZone.CurrentTimeZone.GetUtcOffset(date));
+ return date;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/DateTools.cs b/src/core/Document/DateTools.cs
new file mode 100644
index 0000000..8263df1
--- /dev/null
+++ b/src/core/Document/DateTools.cs
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary> Provides support for converting dates to strings and vice-versa.
+ /// The strings are structured so that lexicographic sorting orders
+ /// them by date, which makes them suitable for use as field values
+ /// and search terms.
+ ///
+ /// <p/>This class also helps you to limit the resolution of your dates. Do not
+ /// save dates with a finer resolution than you really need, as then
+ /// RangeQuery and PrefixQuery will require more memory and become slower.
+ ///
+ /// <p/>Compared to <see cref="DateField" /> the strings generated by the methods
+ /// in this class take slightly more space, unless your selected resolution
+ /// is set to <c>Resolution.DAY</c> or lower.
+ ///
+ /// <p/>
+ /// Another approach is <see cref="NumericUtils" />, which provides
+ /// a sortable binary representation (prefix encoded) of numeric values, which
+ /// date/time are.
+ /// For indexing a <see cref="DateTime" />, convert it to unix timestamp as
+ /// <c>long</c> and
+ /// index this as a numeric value with <see cref="NumericField" />
+ /// and use <see cref="NumericRangeQuery{T}" /> to query it.
+ /// </summary>
+ public class DateTools
+ {
+
+ private static readonly System.String YEAR_FORMAT = "yyyy";
+ private static readonly System.String MONTH_FORMAT = "yyyyMM";
+ private static readonly System.String DAY_FORMAT = "yyyyMMdd";
+ private static readonly System.String HOUR_FORMAT = "yyyyMMddHH";
+ private static readonly System.String MINUTE_FORMAT = "yyyyMMddHHmm";
+ private static readonly System.String SECOND_FORMAT = "yyyyMMddHHmmss";
+ private static readonly System.String MILLISECOND_FORMAT = "yyyyMMddHHmmssfff";
+
+ private static readonly System.Globalization.Calendar calInstance = new System.Globalization.GregorianCalendar();
+
+ // cannot create, the class has static methods only
+ private DateTools()
+ {
+ }
+
+ /// <summary> Converts a Date to a string suitable for indexing.
+ ///
+ /// </summary>
+ /// <param name="date">the date to be converted
+ /// </param>
+ /// <param name="resolution">the desired resolution, see
+ /// <see cref="Round(DateTime, DateTools.Resolution)" />
+ /// </param>
+ /// <returns> a string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
+ /// depending on <c>resolution</c>; using GMT as timezone
+ /// </returns>
+ public static System.String DateToString(System.DateTime date, Resolution resolution)
+ {
+ return TimeToString(date.Ticks / TimeSpan.TicksPerMillisecond, resolution);
+ }
+
+ /// <summary> Converts a millisecond time to a string suitable for indexing.
+ ///
+ /// </summary>
+ /// <param name="time">the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </param>
+ /// <param name="resolution">the desired resolution, see
+ /// <see cref="Round(long, DateTools.Resolution)" />
+ /// </param>
+ /// <returns> a string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
+ /// depending on <c>resolution</c>; using GMT as timezone
+ /// </returns>
+ public static System.String TimeToString(long time, Resolution resolution)
+ {
+ System.DateTime date = new System.DateTime(Round(time, resolution));
+
+ if (resolution == Resolution.YEAR)
+ {
+ return date.ToString(YEAR_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ return date.ToString(MONTH_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ return date.ToString(DAY_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ return date.ToString(HOUR_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ return date.ToString(MINUTE_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ return date.ToString(SECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ return date.ToString(MILLISECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+
+ /// <summary> Converts a string produced by <c>timeToString</c> or
+ /// <c>DateToString</c> back to a time, represented as the
+ /// number of milliseconds since January 1, 1970, 00:00:00 GMT.
+ ///
+ /// </summary>
+ /// <param name="dateString">the date string to be converted
+ /// </param>
+ /// <returns> the number of milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </returns>
+ /// <throws> ParseException if <c>dateString</c> is not in the </throws>
+ /// <summary> expected format
+ /// </summary>
+ public static long StringToTime(System.String dateString)
+ {
+ return StringToDate(dateString).Ticks;
+ }
+
+ /// <summary> Converts a string produced by <c>timeToString</c> or
+ /// <c>DateToString</c> back to a time, represented as a
+ /// Date object.
+ ///
+ /// </summary>
+ /// <param name="dateString">the date string to be converted
+ /// </param>
+ /// <returns> the parsed time as a Date object
+ /// </returns>
+ /// <throws> ParseException if <c>dateString</c> is not in the </throws>
+ /// <summary> expected format
+ /// </summary>
+ public static System.DateTime StringToDate(System.String dateString)
+ {
+ System.DateTime date;
+ if (dateString.Length == 4)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ 1, 1, 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 6)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ 1, 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 8)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 10)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ 0, 0, 0);
+ }
+ else if (dateString.Length == 12)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ 0, 0);
+ }
+ else if (dateString.Length == 14)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ Convert.ToInt16(dateString.Substring(12, 2)),
+ 0);
+ }
+ else if (dateString.Length == 17)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ Convert.ToInt16(dateString.Substring(12, 2)),
+ Convert.ToInt16(dateString.Substring(14, 3)));
+ }
+ else
+ {
+ throw new System.FormatException("Input is not valid date string: " + dateString);
+ }
+ return date;
+ }
+
+ /// <summary> Limit a date's resolution. For example, the date <c>2004-09-21 13:50:11</c>
+ /// will be changed to <c>2004-09-01 00:00:00</c> when using
+ /// <c>Resolution.MONTH</c>.
+ ///
+ /// </summary>
+ /// <param name="date"></param>
+ /// <param name="resolution">The desired resolution of the date to be returned
+ /// </param>
+ /// <returns> the date with all values more precise than <c>resolution</c>
+ /// set to 0 or 1
+ /// </returns>
+ public static System.DateTime Round(System.DateTime date, Resolution resolution)
+ {
+ return new System.DateTime(Round(date.Ticks / TimeSpan.TicksPerMillisecond, resolution));
+ }
+
+ /// <summary> Limit a date's resolution. For example, the date <c>1095767411000</c>
+ /// (which represents 2004-09-21 13:50:11) will be changed to
+ /// <c>1093989600000</c> (2004-09-01 00:00:00) when using
+ /// <c>Resolution.MONTH</c>.
+ ///
+ /// </summary>
+ /// <param name="time">The time in milliseconds (not ticks).</param>
+ /// <param name="resolution">The desired resolution of the date to be returned
+ /// </param>
+ /// <returns> the date with all values more precise than <c>resolution</c>
+ /// set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </returns>
+ public static long Round(long time, Resolution resolution)
+ {
+ System.DateTime dt = new System.DateTime(time * TimeSpan.TicksPerMillisecond);
+
+ if (resolution == Resolution.YEAR)
+ {
+ dt = dt.AddMonths(1 - dt.Month);
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ // don't cut off anything
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+ return dt.Ticks;
+ }
+
+ /// <summary>Specifies the time granularity. </summary>
+ public class Resolution
+ {
+
+ public static readonly Resolution YEAR = new Resolution("year");
+ public static readonly Resolution MONTH = new Resolution("month");
+ public static readonly Resolution DAY = new Resolution("day");
+ public static readonly Resolution HOUR = new Resolution("hour");
+ public static readonly Resolution MINUTE = new Resolution("minute");
+ public static readonly Resolution SECOND = new Resolution("second");
+ public static readonly Resolution MILLISECOND = new Resolution("millisecond");
+
+ private System.String resolution;
+
+ internal Resolution()
+ {
+ }
+
+ internal Resolution(System.String resolution)
+ {
+ this.resolution = resolution;
+ }
+
+ public override System.String ToString()
+ {
+ return resolution;
+ }
+ }
+ static DateTools()
+ {
+ {
+ // times need to be normalized so the value doesn't depend on the
+ // location the index is created/used:
+ // {{Aroush-2.1}}
+ /*
+ YEAR_FORMAT.setTimeZone(GMT);
+ MONTH_FORMAT.setTimeZone(GMT);
+ DAY_FORMAT.setTimeZone(GMT);
+ HOUR_FORMAT.setTimeZone(GMT);
+ MINUTE_FORMAT.setTimeZone(GMT);
+ SECOND_FORMAT.setTimeZone(GMT);
+ MILLISECOND_FORMAT.setTimeZone(GMT);
+ */
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/Document.cs b/src/core/Document/Document.cs
new file mode 100644
index 0000000..f24a46a
--- /dev/null
+++ b/src/core/Document/Document.cs
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+// for javadoc
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ScoreDoc = Lucene.Net.Search.ScoreDoc;
+using Searcher = Lucene.Net.Search.Searcher;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary>Documents are the unit of indexing and search.
+ ///
+ /// A Document is a set of fields. Each field has a name and a textual value.
+ /// A field may be <see cref="IFieldable.IsStored()">stored</see> with the document, in which
+ /// case it is returned with search hits on the document. Thus each document
+ /// should typically contain one or more stored fields which uniquely identify
+ /// it.
+ ///
+ /// <p/>Note that fields which are <i>not</i> <see cref="IFieldable.IsStored()">stored</see> are
+ /// <i>not</i> available in documents retrieved from the index, e.g. with <see cref="ScoreDoc.Doc" />,
+ /// <see cref="Searcher.Doc(int)" /> or <see cref="IndexReader.Document(int)" />.
+ /// </summary>
+
+ [Serializable]
+ public sealed class Document
+ {
+ private class AnonymousClassEnumeration : System.Collections.IEnumerator
+ {
+ public AnonymousClassEnumeration(Document enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(Document enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ iter = Enclosing_Instance.fields.GetEnumerator();
+ }
+ private System.Object tempAuxObj;
+ public bool MoveNext()
+ {
+ bool result = HasMoreElements();
+ if (result)
+ {
+ tempAuxObj = NextElement();
+ }
+ return result;
+ }
+ public void Reset()
+ {
+ tempAuxObj = null;
+ }
+ public System.Object Current
+ {
+ get
+ {
+ return tempAuxObj;
+ }
+
+ }
+ private Document enclosingInstance;
+ public Document Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal System.Collections.IEnumerator iter;
+ public bool HasMoreElements()
+ {
+ return iter.MoveNext();
+ }
+ public System.Object NextElement()
+ {
+ return iter.Current;
+ }
+ }
+ internal System.Collections.Generic.IList<IFieldable> fields = new System.Collections.Generic.List<IFieldable>();
+ private float boost = 1.0f;
+
+ /// <summary>Constructs a new document with no fields. </summary>
+ public Document()
+ {
+ }
+
+
+ /// <summary>Gets or sets, at indexing time, the boost factor.
+ /// <para>
+ /// The default is 1.0
+ /// </para>
+ /// <p/>Note that once a document is indexed this value is no longer available
+ /// from the index. At search time, for retrieved documents, this method always
+ /// returns 1. This however does not mean that the boost value set at indexing
+ /// time was ignored - it was just combined with other indexing time factors and
+ /// stored elsewhere, for better indexing and search performance. (For more
+ /// information see the "norm(t,d)" part of the scoring formula in
+ /// <see cref="Lucene.Net.Search.Similarity">Similarity</see>.)
+ /// </summary>
+ public float Boost
+ {
+ get { return boost; }
+ set { this.boost = value; }
+ }
+
+ /// <summary> <p/>Adds a field to a document. Several fields may be added with
+ /// the same name. In this case, if the fields are indexed, their text is
+ /// treated as though appended for the purposes of search.<p/>
+ /// <p/> Note that add like the removeField(s) methods only makes sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.<p/>
+ /// </summary>
+ public void Add(IFieldable field)
+ {
+ fields.Add(field);
+ }
+
+ /// <summary> <p/>Removes field with the specified name from the document.
+ /// If multiple fields exist with this name, this method removes the first field that has been added.
+ /// If there is no field with the specified name, the document remains unchanged.<p/>
+ /// <p/> Note that the removeField(s) methods like the add method only make sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.<p/>
+ /// </summary>
+ public void RemoveField(System.String name)
+ {
+ System.Collections.Generic.IEnumerator<IFieldable> it = fields.GetEnumerator();
+ while (it.MoveNext())
+ {
+ IFieldable field = it.Current;
+ if (field.Name.Equals(name))
+ {
+ fields.Remove(field);
+ return ;
+ }
+ }
+ }
+
+ /// <summary> <p/>Removes all fields with the given name from the document.
+ /// If there is no field with the specified name, the document remains unchanged.<p/>
+ /// <p/> Note that the removeField(s) methods like the add method only make sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.<p/>
+ /// </summary>
+ public void RemoveFields(System.String name)
+ {
+ for (int i = fields.Count - 1; i >= 0; i--)
+ {
+ IFieldable field = fields[i];
+ if (field.Name.Equals(name))
+ {
+ fields.RemoveAt(i);
+ }
+ }
+ }
+
+ /// <summary>Returns a field with the given name if any exist in this document, or
+ /// null. If multiple fields exists with this name, this method returns the
+ /// first value added.
+ /// Do not use this method with lazy loaded fields.
+ /// </summary>
+ public Field GetField(System.String name)
+ {
+ return (Field) GetFieldable(name);
+ }
+
+
+ /// <summary>Returns a field with the given name if any exist in this document, or
+ /// null. If multiple fields exists with this name, this method returns the
+ /// first value added.
+ /// </summary>
+ public IFieldable GetFieldable(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ return field;
+ }
+ return null;
+ }
+
+ /// <summary>Returns the string value of the field with the given name if any exist in
+ /// this document, or null. If multiple fields exist with this name, this
+ /// method returns the first value added. If only binary fields with this name
+ /// exist, returns null.
+ /// </summary>
+ public System.String Get(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (!field.IsBinary))
+ return field.StringValue;
+ }
+ return null;
+ }
+
+ /// <summary>Returns a List of all the fields in a document.
+ /// <p/>Note that fields which are <i>not</i> <see cref="IFieldable.IsStored()">stored</see> are
+ /// <i>not</i> available in documents retrieved from the
+ /// index, e.g. <see cref="Searcher.Doc(int)" /> or <see cref="IndexReader.Document(int)" />.
+ /// </summary>
+ public System.Collections.Generic.IList<IFieldable> GetFields()
+ {
+ return fields;
+ }
+
+ private static readonly Field[] NO_FIELDS = new Field[0];
+
+ /// <summary> Returns an array of <see cref="Field" />s with the given name.
+ /// Do not use with lazy loaded fields.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>Field[]</c> array
+ /// </returns>
+ public Field[] GetFields(System.String name)
+ {
+ var result = new System.Collections.Generic.List<Field>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ {
+ result.Add((Field)field);
+ }
+ }
+
+ if (result.Count == 0)
+ return NO_FIELDS;
+
+ return result.ToArray();
+ }
+
+
+ private static readonly IFieldable[] NO_FIELDABLES = new IFieldable[0];
+
+ /// <summary> Returns an array of <see cref="IFieldable" />s with the given name.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>Fieldable[]</c> array
+ /// </returns>
+ public IFieldable[] GetFieldables(System.String name)
+ {
+ var result = new System.Collections.Generic.List<IFieldable>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ {
+ result.Add(field);
+ }
+ }
+
+ if (result.Count == 0)
+ return NO_FIELDABLES;
+
+ return result.ToArray();
+ }
+
+
+ private static readonly System.String[] NO_STRINGS = new System.String[0];
+
+ /// <summary> Returns an array of values of the field specified as the method parameter.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>String[]</c> of field values
+ /// </returns>
+ public System.String[] GetValues(System.String name)
+ {
+ var result = new System.Collections.Generic.List<string>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (!field.IsBinary))
+ result.Add(field.StringValue);
+ }
+
+ if (result.Count == 0)
+ return NO_STRINGS;
+
+ return result.ToArray();
+ }
+
+ private static readonly byte[][] NO_BYTES = new byte[0][];
+
+ /// <summary> Returns an array of byte arrays for of the fields that have the name specified
+ /// as the method parameter. This method returns an empty
+ /// array when there are no matching fields. It never
+ /// returns null.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <c>byte[][]</c> of binary field values
+ /// </returns>
+ public byte[][] GetBinaryValues(System.String name)
+ {
+ var result = new System.Collections.Generic.List<byte[]>();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (field.IsBinary))
+ result.Add(field.GetBinaryValue());
+ }
+
+ if (result.Count == 0)
+ return NO_BYTES;
+
+ return result.ToArray();
+ }
+
+ /// <summary> Returns an array of bytes for the first (or only) field that has the name
+ /// specified as the method parameter. This method will return <c>null</c>
+ /// if no binary fields with the specified name are available.
+ /// There may be non-binary fields with the same name.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field.
+ /// </param>
+ /// <returns> a <c>byte[]</c> containing the binary field value or <c>null</c>
+ /// </returns>
+ public byte[] GetBinaryValue(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (field.IsBinary))
+ return field.GetBinaryValue();
+ }
+ return null;
+ }
+
+ /// <summary>Prints the fields of a document for human consumption. </summary>
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("Document<");
+ for (int i = 0; i < fields.Count; i++)
+ {
+ IFieldable field = fields[i];
+ buffer.Append(field.ToString());
+ if (i != fields.Count - 1)
+ buffer.Append(" ");
+ }
+ buffer.Append(">");
+ return buffer.ToString();
+ }
+
+ public System.Collections.Generic.IList<IFieldable> fields_ForNUnit
+ {
+ get { return fields; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/Field.cs b/src/core/Document/Field.cs
new file mode 100644
index 0000000..d39d9f4
--- /dev/null
+++ b/src/core/Document/Field.cs
@@ -0,0 +1,667 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary>A field is a section of a Document. Each field has two parts, a name and a
+ /// value. Values may be free text, provided as a String or as a Reader, or they
+ /// may be atomic keywords, which are not further processed. Such keywords may
+ /// be used to represent dates, urls, etc. Fields are optionally stored in the
+ /// index, so that they may be returned with hits on the document.
+ /// </summary>
+
+ [Serializable]
+ public sealed class Field:AbstractField, IFieldable
+ {
+ /// <summary>Specifies whether and how a field should be stored. </summary>
+ public enum Store
+ {
+ /// <summary>Store the original field value in the index. This is useful for short texts
+ /// like a document's title which should be displayed with the results. The
+ /// value is stored in its original form, i.e. no analyzer is used before it is
+ /// stored.
+ /// </summary>
+ YES,
+
+ /// <summary>Do not store the field value in the index. </summary>
+ NO
+ }
+
+ /// <summary>Specifies whether and how a field should be indexed. </summary>
+
+ public enum Index
+ {
+ /// <summary>Do not index the field value. This field can thus not be searched,
+ /// but one can still access its contents provided it is
+ /// <see cref="Field.Store">stored</see>.
+ /// </summary>
+ NO,
+
+ /// <summary>Index the tokens produced by running the field's
+ /// value through an Analyzer. This is useful for
+ /// common text.
+ /// </summary>
+ ANALYZED,
+
+ /// <summary>Index the field's value without using an Analyzer, so it can be searched.
+ /// As no analyzer is used the value will be stored as a single term. This is
+ /// useful for unique Ids like product numbers.
+ /// </summary>
+ NOT_ANALYZED,
+
+ /// <summary>Expert: Index the field's value without an Analyzer,
+ /// and also disable the storing of norms. Note that you
+ /// can also separately enable/disable norms by setting
+ /// <see cref="AbstractField.OmitNorms" />. No norms means that
+ /// index-time field and document boosting and field
+ /// length normalization are disabled. The benefit is
+ /// less memory usage as norms take up one byte of RAM
+ /// per indexed field for every document in the index,
+ /// during searching. Note that once you index a given
+ /// field <i>with</i> norms enabled, disabling norms will
+ /// have no effect. In other words, for this to have the
+ /// above described effect on a field, all instances of
+ /// that field must be indexed with NOT_ANALYZED_NO_NORMS
+ /// from the beginning.
+ /// </summary>
+ NOT_ANALYZED_NO_NORMS,
+
+ /// <summary>Expert: Index the tokens produced by running the
+ /// field's value through an Analyzer, and also
+ /// separately disable the storing of norms. See
+ /// <see cref="NOT_ANALYZED_NO_NORMS" /> for what norms are
+ /// and why you may want to disable them.
+ /// </summary>
+ ANALYZED_NO_NORMS,
+ }
+
+ /// <summary>Specifies whether and how a field should have term vectors. </summary>
+ public enum TermVector
+ {
+ /// <summary>Do not store term vectors. </summary>
+ NO,
+
+ /// <summary>Store the term vectors of each document. A term vector is a list
+ /// of the document's terms and their number of occurrences in that document.
+ /// </summary>
+ YES,
+
+ /// <summary> Store the term vector + token position information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ WITH_POSITIONS,
+
+ /// <summary> Store the term vector + Token offset information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ WITH_OFFSETS,
+
+ /// <summary> Store the term vector + Token position and offset information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ /// <seealso cref="WITH_POSITIONS">
+ /// </seealso>
+ /// <seealso cref="WITH_OFFSETS">
+ /// </seealso>
+ WITH_POSITIONS_OFFSETS,
+ }
+
+
+ /// <summary>The value of the field as a String, or null. If null, the Reader value or
+ /// binary value is used. Exactly one of stringValue(),
+ /// readerValue(), and getBinaryValue() must be set.
+ /// </summary>
+ public override string StringValue
+ {
+ get { return fieldsData is System.String ? (System.String) fieldsData : null; }
+ }
+
+ /// <summary>The value of the field as a Reader, or null. If null, the String value or
+ /// binary value is used. Exactly one of stringValue(),
+ /// readerValue(), and getBinaryValue() must be set.
+ /// </summary>
+ public override TextReader ReaderValue
+ {
+ get { return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null; }
+ }
+
+ /// <summary>The TokesStream for this field to be used when indexing, or null. If null, the Reader value
+ /// or String value is analyzed to produce the indexed tokens.
+ /// </summary>
+ public override TokenStream TokenStreamValue
+ {
+ get { return tokenStream; }
+ }
+
+
+ /// <summary><p/>Expert: change the value of this field. This can
+ /// be used during indexing to re-use a single Field
+ /// instance to improve indexing speed by avoiding GC cost
+ /// of new'ing and reclaiming Field instances. Typically
+ /// a single <see cref="Document" /> instance is re-used as
+ /// well. This helps most on small documents.<p/>
+ ///
+ /// <p/>Each Field instance should only be used once
+ /// within a single <see cref="Document" /> instance. See <a
+ /// href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
+ /// for details.<p/>
+ /// </summary>
+ public void SetValue(System.String value)
+ {
+ if (internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a String value on a binary field");
+ }
+ fieldsData = value;
+ }
+
+ /// <summary>Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. </summary>
+ public void SetValue(System.IO.TextReader value)
+ {
+ if (internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a Reader value on a binary field");
+ }
+ if (internalIsStored)
+ {
+ throw new System.ArgumentException("cannot set a Reader value on a stored field");
+ }
+ fieldsData = value;
+ }
+
+ /// <summary>Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. </summary>
+ public void SetValue(byte[] value)
+ {
+ if (!internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ internalBinaryLength = value.Length;
+ internalbinaryOffset = 0;
+ }
+
+ /// <summary>Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. </summary>
+ public void SetValue(byte[] value, int offset, int length)
+ {
+ if (!internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ internalBinaryLength = length;
+ internalbinaryOffset = offset;
+ }
+
+ /// <summary>Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
+ /// May be combined with stored values from stringValue() or GetBinaryValue()
+ /// </summary>
+ public void SetTokenStream(TokenStream tokenStream)
+ {
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+ this.tokenStream = tokenStream;
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index. Term vectors will not be stored in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <c>value</c> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <throws> NullPointerException if name or value is <c>null</c> </throws>
+ /// <throws> IllegalArgumentException if the field is neither stored nor indexed </throws>
+ public Field(System.String name, System.String value, Store store, Index index)
+ : this(name, value, store, index, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <c>value</c> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or value is <c>null</c> </throws>
+ /// <throws> IllegalArgumentException in any of the following situations: </throws>
+ /// <summary> <list>
+ /// <item>the field is neither stored nor indexed</item>
+ /// <item>the field is not indexed but termVector is <c>TermVector.YES</c></item>
+ /// </list>
+ /// </summary>
+ public Field(System.String name, System.String value, Store store, Index index, TermVector termVector)
+ : this(name, true, value, store, index, termVector)
+ {
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="internName">Whether to .intern() name or not
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <c>value</c> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or value is <c>null</c> </throws>
+ /// <throws> IllegalArgumentException in any of the following situations: </throws>
+ /// <summary> <list>
+ /// <item>the field is neither stored nor indexed</item>
+ /// <item>the field is not indexed but termVector is <c>TermVector.YES</c></item>
+ /// </list>
+ /// </summary>
+ public Field(System.String name, bool internName, System.String value, Store store, Index index, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (value == null)
+ throw new System.NullReferenceException("value cannot be null");
+ if (name.Length == 0 && value.Length == 0)
+ throw new System.ArgumentException("name and value cannot both be empty");
+ if (index == Index.NO && store == Store.NO)
+ throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
+ if (index == Index.NO && termVector != TermVector.NO)
+ throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
+
+ if (internName)
+ // field names are optionally interned
+ name = StringHelper.Intern(name);
+
+ this.internalName = name;
+
+ this.fieldsData = value;
+
+ this.internalIsStored = store.IsStored();
+
+ this.internalIsIndexed = index.IsIndexed();
+ this.internalIsTokenized = index.IsAnalyzed();
+ this.internalOmitNorms = index.OmitNorms();
+
+ if (index == Index.NO)
+ {
+ this.internalOmitTermFreqAndPositions = false;
+ }
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
+ /// not be stored. The Reader is read only when the Document is added to the index,
+ /// i.e. you may not close the Reader until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="reader">The reader with the content
+ /// </param>
+ /// <throws> NullPointerException if name or reader is <c>null</c> </throws>
+ public Field(System.String name, System.IO.TextReader reader):this(name, reader, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored, optionally with
+ /// storing term vectors. The Reader is read only when the Document is added to the index,
+ /// i.e. you may not close the Reader until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="reader">The reader with the content
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or reader is <c>null</c> </throws>
+ public Field(System.String name, System.IO.TextReader reader, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (reader == null)
+ throw new System.NullReferenceException("reader cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ this.fieldsData = reader;
+
+ this.internalIsStored = false;
+
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
+ /// not be stored. This is useful for pre-analyzed fields.
+ /// The TokenStream is read only when the Document is added to the index,
+ /// i.e. you may not close the TokenStream until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="tokenStream">The TokenStream with the content
+ /// </param>
+ /// <throws> NullPointerException if name or tokenStream is <c>null</c> </throws>
+ public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored, optionally with
+ /// storing term vectors. This is useful for pre-analyzed fields.
+ /// The TokenStream is read only when the Document is added to the index,
+ /// i.e. you may not close the TokenStream until <see cref="IndexWriter.AddDocument(Document)" />
+ /// has been called.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="tokenStream">The TokenStream with the content
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or tokenStream is <c>null</c> </throws>
+ public Field(System.String name, TokenStream tokenStream, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (tokenStream == null)
+ throw new System.NullReferenceException("tokenStream cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ this.fieldsData = null;
+ this.tokenStream = tokenStream;
+
+ this.internalIsStored = false;
+
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+
+ /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value_Renamed">The binary value
+ /// </param>
+ /// <param name="store">How <c>value</c> should be stored (compressed or not)
+ /// </param>
+ /// <throws> IllegalArgumentException if store is <c>Store.NO</c> </throws>
+ public Field(System.String name, byte[] value_Renamed, Store store):this(name, value_Renamed, 0, value_Renamed.Length, store)
+ {
+ }
+
+ /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value_Renamed">The binary value
+ /// </param>
+ /// <param name="offset">Starting offset in value where this Field's bytes are
+ /// </param>
+ /// <param name="length">Number of bytes to use for this Field, starting at offset
+ /// </param>
+ /// <param name="store">How <c>value</c> should be stored (compressed or not)
+ /// </param>
+ /// <throws> IllegalArgumentException if store is <c>Store.NO</c> </throws>
+ public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store)
+ {
+
+ if (name == null)
+ throw new System.ArgumentException("name cannot be null");
+ if (value_Renamed == null)
+ throw new System.ArgumentException("value cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ fieldsData = value_Renamed;
+
+ if (store == Store.NO)
+ throw new System.ArgumentException("binary values can't be unstored");
+
+ internalIsStored = store.IsStored();
+ internalIsIndexed = false;
+ internalIsTokenized = false;
+ internalOmitTermFreqAndPositions = false;
+ internalOmitNorms = true;
+
+ internalIsBinary = true;
+ internalBinaryLength = length;
+ internalbinaryOffset = offset;
+
+ SetStoreTermVector(TermVector.NO);
+ }
+ }
+
+ public static class FieldExtensions
+ {
+ public static bool IsStored(this Field.Store store)
+ {
+ switch(store)
+ {
+ case Field.Store.YES:
+ return true;
+ case Field.Store.NO:
+ return false;
+ default:
+ throw new ArgumentOutOfRangeException("store", "Invalid value for Field.Store");
+ }
+ }
+
+ public static bool IsIndexed(this Field.Index index)
+ {
+ switch(index)
+ {
+ case Field.Index.NO:
+ return false;
+ case Field.Index.ANALYZED:
+ case Field.Index.NOT_ANALYZED:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool IsAnalyzed(this Field.Index index)
+ {
+ switch (index)
+ {
+ case Field.Index.NO:
+ case Field.Index.NOT_ANALYZED:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ return false;
+ case Field.Index.ANALYZED:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool OmitNorms(this Field.Index index)
+ {
+ switch (index)
+ {
+ case Field.Index.ANALYZED:
+ case Field.Index.NOT_ANALYZED:
+ return false;
+ case Field.Index.NO:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool IsStored(this Field.TermVector tv)
+ {
+ switch(tv)
+ {
+ case Field.TermVector.NO:
+ return false;
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_OFFSETS:
+ case Field.TermVector.WITH_POSITIONS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static bool WithPositions(this Field.TermVector tv)
+ {
+ switch (tv)
+ {
+ case Field.TermVector.NO:
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_OFFSETS:
+ return false;
+ case Field.TermVector.WITH_POSITIONS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static bool WithOffsets(this Field.TermVector tv)
+ {
+ switch (tv)
+ {
+ case Field.TermVector.NO:
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_POSITIONS:
+ return false;
+ case Field.TermVector.WITH_OFFSETS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static Field.Index ToIndex(bool indexed, bool analyed)
+ {
+ return ToIndex(indexed, analyed, false);
+ }
+
+ public static Field.Index ToIndex(bool indexed, bool analyzed, bool omitNorms)
+ {
+
+ // If it is not indexed nothing else matters
+ if (!indexed)
+ {
+ return Field.Index.NO;
+ }
+
+ // typical, non-expert
+ if (!omitNorms)
+ {
+ if (analyzed)
+ {
+ return Field.Index.ANALYZED;
+ }
+ return Field.Index.NOT_ANALYZED;
+ }
+
+ // Expert: Norms omitted
+ if (analyzed)
+ {
+ return Field.Index.ANALYZED_NO_NORMS;
+ }
+ return Field.Index.NOT_ANALYZED_NO_NORMS;
+ }
+
+ /// <summary>
+ /// Get the best representation of a TermVector given the flags.
+ /// </summary>
+ public static Field.TermVector ToTermVector(bool stored, bool withOffsets, bool withPositions)
+ {
+ // If it is not stored, nothing else matters.
+ if (!stored)
+ {
+ return Field.TermVector.NO;
+ }
+
+ if (withOffsets)
+ {
+ if (withPositions)
+ {
+ return Field.TermVector.WITH_POSITIONS_OFFSETS;
+ }
+ return Field.TermVector.WITH_OFFSETS;
+ }
+
+ if (withPositions)
+ {
+ return Field.TermVector.WITH_POSITIONS;
+ }
+ return Field.TermVector.YES;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/FieldSelector.cs b/src/core/Document/FieldSelector.cs
new file mode 100644
index 0000000..f940f08
--- /dev/null
+++ b/src/core/Document/FieldSelector.cs
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary> Similar to a <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/io/FileFilter.html">
+ /// java.io.FileFilter</a>, the FieldSelector allows one to make decisions about
+ /// what Fields get loaded on a <see cref="Document" /> by <see cref="Lucene.Net.Index.IndexReader.Document(int,Lucene.Net.Documents.FieldSelector)" />
+ /// </summary>
+ public interface FieldSelector
+ {
+
+ /// <summary> </summary>
+ /// <param name="fieldName">the field to accept or reject
+ /// </param>
+ /// <returns> an instance of <see cref="FieldSelectorResult" />
+ /// if the <see cref="Field" /> named <c>fieldName</c> should be loaded.
+ /// </returns>
+ FieldSelectorResult Accept(System.String fieldName);
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/FieldSelectorResult.cs b/src/core/Document/FieldSelectorResult.cs
new file mode 100644
index 0000000..7d3a889
--- /dev/null
+++ b/src/core/Document/FieldSelectorResult.cs
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using System.Runtime.InteropServices;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary>Provides information about what should be done with this Field</summary>
+ public enum FieldSelectorResult
+ {
+ /// <summary>
+ ///
+ /// </summary>
+ INVALID, // TODO: This is kinda a kludgy workaround for the fact enums can't be null
+
+ /// <summary> Load this <see cref="Field" /> every time the <see cref="Document" /> is loaded, reading in the data as it is encountered.
+ /// <see cref="Document.GetField(String)" /> and <see cref="Document.GetFieldable(String)" /> should not return null.
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should be called by the Reader.
+ /// </summary>
+ LOAD,
+
+ /// <summary> Lazily load this <see cref="Field" />. This means the <see cref="Field" /> is valid, but it may not actually contain its data until
+ /// invoked. <see cref="Document.GetField(String)" /> SHOULD NOT BE USED. <see cref="Document.GetFieldable(String)" /> is safe to use and should
+ /// return a valid instance of a <see cref="IFieldable" />.
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should be called by the Reader.
+ /// </summary>
+ LAZY_LOAD,
+
+ /// <summary> Do not load the <see cref="Field" />. <see cref="Document.GetField(String)" /> and <see cref="Document.GetFieldable(String)" /> should return null.
+ /// <see cref="Document.Add(IFieldable)" /> is not called.
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should not be called by the Reader.
+ /// </summary>
+ NO_LOAD,
+
+ /// <summary> Load this field as in the <see cref="LOAD" /> case, but immediately return from <see cref="Field" /> loading for the <see cref="Document" />. Thus, the
+ /// Document may not have its complete set of Fields. <see cref="Document.GetField(String)" /> and <see cref="Document.GetFieldable(String)" /> should
+ /// both be valid for this <see cref="Field" />
+ /// <p/>
+ /// <see cref="Document.Add(IFieldable)" /> should be called by the Reader.
+ /// </summary>
+ LOAD_AND_BREAK,
+
+ /// <summary>Expert: Load the size of this <see cref="Field" /> rather than its value.
+ /// Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
+ /// The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
+ /// </summary>
+ SIZE,
+
+ /// <summary>Expert: Like <see cref="SIZE" /> but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded </summary>
+ SIZE_AND_BREAK
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/Fieldable.cs b/src/core/Document/Fieldable.cs
new file mode 100644
index 0000000..89d37d1
--- /dev/null
+++ b/src/core/Document/Fieldable.cs
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using FieldInvertState = Lucene.Net.Index.FieldInvertState;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary> Synonymous with <see cref="Field" />.
+ ///
+ /// <p/><bold>WARNING</bold>: This interface may change within minor versions, despite Lucene's backward compatibility requirements.
+ /// This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards
+ /// compatibility promises remain intact. For example, Lucene can still
+ /// read and write indices created within the same major version.
+ /// <p/>
+ ///
+ ///
+ /// </summary>
+ public interface IFieldable
+ {
+ /// <summary>Gets or sets the boost factor for hits for this field. This value will be
+ /// multiplied into the score of all hits on this this field of this
+ /// document.
+ ///
+ /// <p/>The boost is multiplied by <see cref="Lucene.Net.Documents.Document.Boost" /> of the document
+ /// containing this field. If a document has multiple fields with the same
+ /// name, all such values are multiplied together. This product is then
+ /// used to compute the norm factor for the field. By
+ /// default, in the <see cref="Lucene.Net.Search.Similarity.ComputeNorm(String,Lucene.Net.Index.FieldInvertState)"/>
+ /// method, the boost value is multiplied
+ /// by the <see cref="Lucene.Net.Search.Similarity.LengthNorm(String,int)"/>
+ /// and then rounded by <see cref="Lucene.Net.Search.Similarity.EncodeNorm(float)" /> before it is stored in the
+ /// index. One should attempt to ensure that this product does not overflow
+ /// the range of that encoding.
+ ///
+ /// <p/>The default value is 1.0.
+ ///
+ /// <p/>Note: this value is not stored directly with the document in the index.
+ /// Documents returned from <see cref="Lucene.Net.Index.IndexReader.Document(int)" /> and
+ /// <see cref="Lucene.Net.Search.Searcher.Doc(int)" /> may thus not have the same value present as when
+ /// this field was indexed.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Documents.Document.Boost">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Search.Similarity.ComputeNorm(String, FieldInvertState)">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Search.Similarity.EncodeNorm(float)">
+ /// </seealso>
+ float Boost { get; set; }
+
+ /// <summary>Returns the name of the field as an interned string.
+ /// For example "date", "title", "body", ...
+ /// </summary>
+ string Name { get; }
+
+ /// <summary>The value of the field as a String, or null.
+ /// <p/>
+ /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value
+ /// unless isBinary()==true, in which case GetBinaryValue() will be used.
+ ///
+ /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
+ /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
+ /// else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
+ /// </summary>
+ string StringValue { get; }
+
+ /// <summary>The value of the field as a Reader, which can be used at index time to generate indexed tokens.</summary>
+ /// <seealso cref="StringValue()">
+ /// </seealso>
+ TextReader ReaderValue { get; }
+
+ /// <summary>The TokenStream for this field to be used when indexing, or null.</summary>
+ /// <seealso cref="StringValue()">
+ /// </seealso>
+ TokenStream TokenStreamValue { get; }
+
+ /// <summary>True if the value of the field is to be stored in the index for return
+ /// with search hits.
+ /// </summary>
+ bool IsStored { get; }
+
+ /// <summary>True if the value of the field is to be indexed, so that it may be
+ /// searched on.
+ /// </summary>
+ bool IsIndexed { get; }
+
+ /// <summary>True if the value of the field should be tokenized as text prior to
+ /// indexing. Un-tokenized fields are indexed as a single word and may not be
+ /// Reader-valued.
+ /// </summary>
+ bool IsTokenized { get; }
+
+ /// <summary>True if the term or terms used to index this field are stored as a term
+ /// vector, available from <see cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int,String)" />.
+ /// These methods do not provide access to the original content of the field,
+ /// only to terms used to index it. If the original content must be
+ /// preserved, use the <c>stored</c> attribute instead.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int, String)">
+ /// </seealso>
+ bool IsTermVectorStored { get; }
+
+ /// <summary> True if terms are stored as term vector together with their offsets
+ /// (start and end positon in source text).
+ /// </summary>
+ bool IsStoreOffsetWithTermVector { get; }
+
+ /// <summary> True if terms are stored as term vector together with their token positions.</summary>
+ bool IsStorePositionWithTermVector { get; }
+
+ /// <summary>True if the value of the field is stored as binary </summary>
+ bool IsBinary { get; }
+
+ /// <summary>
+ /// True if norms are omitted for this indexed field.
+ /// <para>
+ /// Expert:
+ /// If set, omit normalization factors associated with this indexed field.
+ /// This effectively disables indexing boosts and length normalization for this field.
+ /// </para>
+ /// </summary>
+ bool OmitNorms { get; set; }
+
+
+ /// <summary> Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
+ /// it's values via <see cref="StringValue()" /> or <see cref="GetBinaryValue()" /> is only valid as long as the <see cref="Lucene.Net.Index.IndexReader" /> that
+ /// retrieved the <see cref="Document" /> is still open.
+ ///
+ /// </summary>
+ /// <value> true if this field can be loaded lazily </value>
+ bool IsLazy { get; }
+
+ /// <summary> Returns offset into byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> index of the first character in byte[] segment that represents this Field value </value>
+ int BinaryOffset { get; }
+
+ /// <summary> Returns length of byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ /// </summary>
+ /// <value> length of byte[] segment that represents this Field value </value>
+ int BinaryLength { get; }
+
+ /// <summary> Return the raw byte[] for the binary field. Note that
+ /// you must also call <see cref="BinaryLength" /> and <see cref="BinaryOffset" />
+ /// to know which range of bytes in this
+ /// returned array belong to the field.
+ /// </summary>
+ /// <returns> reference to the Field value as byte[]. </returns>
+ byte[] GetBinaryValue();
+
+ /// <summary> Return the raw byte[] for the binary field. Note that
+ /// you must also call <see cref="BinaryLength" /> and <see cref="BinaryOffset" />
+ /// to know which range of bytes in this
+ /// returned array belong to the field.<p/>
+ /// About reuse: if you pass in the result byte[] and it is
+ /// used, likely the underlying implementation will hold
+ /// onto this byte[] and return it in future calls to
+ /// <see cref="GetBinaryValue()" /> or <see cref="GetBinaryValue()" />.
+ /// So if you subsequently re-use the same byte[] elsewhere
+ /// it will alter this Fieldable's value.
+ /// </summary>
+ /// <param name="result"> User defined buffer that will be used if
+ /// possible. If this is null or not large enough, a new
+ /// buffer is allocated
+ /// </param>
+ /// <returns> reference to the Field value as byte[].
+ /// </returns>
+ byte[] GetBinaryValue(byte[] result);
+
+ /// Expert:
+ /// <para>
+ /// If set, omit term freq, positions and payloads from
+ /// postings for this field.
+ /// </para>
+ /// <para>
+ /// <b>NOTE</b>: While this option reduces storage space
+ /// required in the index, it also means any query
+ /// requiring positional information, such as
+ /// <see cref="Lucene.Net.Search.PhraseQuery"/> or
+ /// <see cref="Lucene.Net.Search.Spans.SpanQuery"/>
+ /// subclasses will silently fail to find results.
+ /// </para>
+ bool OmitTermFreqAndPositions { set; get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/LoadFirstFieldSelector.cs b/src/core/Document/LoadFirstFieldSelector.cs
new file mode 100644
index 0000000..4f353f6
--- /dev/null
+++ b/src/core/Document/LoadFirstFieldSelector.cs
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+namespace Lucene.Net.Documents
+{
+
+ /// <summary> Load the First field and break.
+ /// <p/>
+ /// See <see cref="FieldSelectorResult.LOAD_AND_BREAK" />
+ /// </summary>
+ [Serializable]
+ public class LoadFirstFieldSelector : FieldSelector
+ {
+
+ public virtual FieldSelectorResult Accept(System.String fieldName)
+ {
+ return FieldSelectorResult.LOAD_AND_BREAK;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/MapFieldSelector.cs b/src/core/Document/MapFieldSelector.cs
new file mode 100644
index 0000000..92a8959
--- /dev/null
+++ b/src/core/Document/MapFieldSelector.cs
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary>A <see cref="FieldSelector" /> based on a Map of field names to <see cref="FieldSelectorResult" />s</summary>
+ [Serializable]
+ public class MapFieldSelector : FieldSelector
+ {
+ internal IDictionary<string, FieldSelectorResult> fieldSelections;
+
+ /// <summary>Create a a MapFieldSelector</summary>
+ /// <param name="fieldSelections">maps from field names (String) to <see cref="FieldSelectorResult" />s
+ /// </param>
+ public MapFieldSelector(IDictionary<string, FieldSelectorResult> fieldSelections)
+ {
+ this.fieldSelections = fieldSelections;
+ }
+
+ /// <summary>Create a a MapFieldSelector</summary>
+ /// <param name="fields">fields to LOAD. List of Strings. All other fields are NO_LOAD.
+ /// </param>
+ public MapFieldSelector(IList<string> fields)
+ {
+ fieldSelections = new HashMap<string, FieldSelectorResult>(fields.Count * 5 / 3);
+ foreach(var field in fields)
+ fieldSelections[field] = FieldSelectorResult.LOAD;
+ }
+
+ /// <summary>Create a a MapFieldSelector</summary>
+ /// <param name="fields">fields to LOAD. All other fields are NO_LOAD.
+ /// </param>
+ public MapFieldSelector(params System.String[] fields)
+ : this(fields.ToList()) // TODO: this is slow
+ {
+ }
+
+ /// <summary>Load field according to its associated value in fieldSelections</summary>
+ /// <param name="field">a field name
+ /// </param>
+ /// <returns> the fieldSelections value that field maps to or NO_LOAD if none.
+ /// </returns>
+ public virtual FieldSelectorResult Accept(System.String field)
+ {
+ FieldSelectorResult selection = fieldSelections[field];
+ return selection != FieldSelectorResult.INVALID ? selection : FieldSelectorResult.NO_LOAD; // TODO: See FieldSelectorResult
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/NumberTools.cs b/src/core/Document/NumberTools.cs
new file mode 100644
index 0000000..f877120
--- /dev/null
+++ b/src/core/Document/NumberTools.cs
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Documents
+{
+
+ // do not remove this class in 3.0, it may be needed to decode old indexes!
+
+ /// <summary> Provides support for converting longs to Strings, and back again. The strings
+ /// are structured so that lexicographic sorting order is preserved.
+ ///
+ /// <p/>
+ /// That is, if l1 is less than l2 for any two longs l1 and l2, then
+ /// NumberTools.longToString(l1) is lexicographically less than
+ /// NumberTools.longToString(l2). (Similarly for "greater than" and "equals".)
+ ///
+ /// <p/>
+ /// This class handles <b>all</b> long values (unlike
+ /// <see cref="Lucene.Net.Documents.DateField" />).
+ ///
+ /// </summary>
+ /// <deprecated> For new indexes use <see cref="NumericUtils" /> instead, which
+ /// provides a sortable binary representation (prefix encoded) of numeric
+ /// values.
+ /// To index and efficiently query numeric values use <see cref="NumericField" />
+ /// and <see cref="NumericRangeQuery{T}" />.
+ /// This class is included for use with existing
+ /// indices and will be removed in a future release (possibly Lucene 4.0).
+ /// </deprecated>
+ [Obsolete("For new indexes use NumericUtils instead, which provides a sortable binary representation (prefix encoded) of numeric values. To index and efficiently query numeric values use NumericField and NumericRangeQuery. This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")]
+ public class NumberTools
+ {
+
+ private const int RADIX = 36;
+
+ private const char NEGATIVE_PREFIX = '-';
+
+ // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
+ private const char POSITIVE_PREFIX = '0';
+
+ //NB: this must be less than
+ /// <summary> Equivalent to longToString(Long.MIN_VALUE)</summary>
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000";
+#else
+ public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000000";
+#endif
+
+ /// <summary> Equivalent to longToString(Long.MAX_VALUE)</summary>
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7";
+#else
+ public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "7fffffffffffffff";
+#endif
+
+ /// <summary> The length of (all) strings returned by <see cref="LongToString" /></summary>
+ public static readonly int STR_SIZE = MIN_STRING_VALUE.Length;
+
+ /// <summary> Converts a long to a String suitable for indexing.</summary>
+ public static System.String LongToString(long l)
+ {
+
+ if (l == System.Int64.MinValue)
+ {
+ // special case, because long is not symmetric around zero
+ return MIN_STRING_VALUE;
+ }
+
+ System.Text.StringBuilder buf = new System.Text.StringBuilder(STR_SIZE);
+
+ if (l < 0)
+ {
+ buf.Append(NEGATIVE_PREFIX);
+ l = System.Int64.MaxValue + l + 1;
+ }
+ else
+ {
+ buf.Append(POSITIVE_PREFIX);
+ }
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ System.String num = ToString(l);
+#else
+ System.String num = System.Convert.ToString(l, RADIX);
+#endif
+
+ int padLen = STR_SIZE - num.Length - buf.Length;
+ while (padLen-- > 0)
+ {
+ buf.Append('0');
+ }
+ buf.Append(num);
+
+ return buf.ToString();
+ }
+
+ /// <summary> Converts a String that was returned by <see cref="LongToString" /> back to a
+ /// long.
+ ///
+ /// </summary>
+ /// <throws> IllegalArgumentException </throws>
+ /// <summary> if the input is null
+ /// </summary>
+ /// <throws> NumberFormatException </throws>
+ /// <summary> if the input does not parse (it was not a String returned by
+ /// longToString()).
+ /// </summary>
+ public static long StringToLong(System.String str)
+ {
+ if (str == null)
+ {
+ throw new System.NullReferenceException("string cannot be null");
+ }
+ if (str.Length != STR_SIZE)
+ {
+ throw new System.FormatException("string is the wrong size");
+ }
+
+ if (str.Equals(MIN_STRING_VALUE))
+ {
+ return System.Int64.MinValue;
+ }
+
+ char prefix = str[0];
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ long l = ToLong(str.Substring(1));
+#else
+ long l = System.Convert.ToInt64(str.Substring(1), RADIX);
+#endif
+
+ if (prefix == POSITIVE_PREFIX)
+ {
+ // nop
+ }
+ else if (prefix == NEGATIVE_PREFIX)
+ {
+ l = l - System.Int64.MaxValue - 1;
+ }
+ else
+ {
+ throw new System.FormatException("string does not begin with the correct prefix");
+ }
+
+ return l;
+ }
+
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ #region BASE36 OPS
+ static System.String digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+ static long[] powersOf36 =
+ {
+ 1L,
+ 36L,
+ 36L*36L,
+ 36L*36L*36L,
+ 36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L
+ };
+
+ public static System.String ToString(long lval)
+ {
+ if (lval == 0)
+ {
+ return "0";
+ }
+
+ int maxStrLen = powersOf36.Length;
+ long curval = lval;
+
+ char[] tb = new char[maxStrLen];
+ int outpos = 0;
+ for (int i = 0; i < maxStrLen; i++)
+ {
+ long pval = powersOf36[maxStrLen - i - 1];
+ int pos = (int)(curval / pval);
+ tb[outpos++] = digits.Substring(pos, 1).ToCharArray()[0];
+ curval = curval % pval;
+ }
+ if (outpos == 0)
+ tb[outpos++] = '0';
+ return new System.String(tb, 0, outpos).TrimStart('0');
+ }
+
+ public static long ToLong(System.String t)
+ {
+ long ival = 0;
+ char[] tb = t.ToCharArray();
+ for (int i = 0; i < tb.Length; i++)
+ {
+ ival += powersOf36[i] * digits.IndexOf(tb[tb.Length - i - 1]);
+ }
+ return ival;
+ }
+ #endregion
+#endif
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/NumericField.cs b/src/core/Document/NumericField.cs
new file mode 100644
index 0000000..e77dee4
--- /dev/null
+++ b/src/core/Document/NumericField.cs
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Search;
+using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using FieldCache = Lucene.Net.Search.FieldCache;
+using SortField = Lucene.Net.Search.SortField;
+
+namespace Lucene.Net.Documents
+{
+ // javadocs
+
+ /// <summary> <p/>This class provides a <see cref="Field" /> that enables indexing
+ /// of numeric values for efficient range filtering and
+ /// sorting. Here's an example usage, adding an int value:
+ /// <code>
+ /// document.add(new NumericField(name).setIntValue(value));
+ /// </code>
+ ///
+ /// For optimal performance, re-use the
+ /// <c>NumericField</c> and <see cref="Document" /> instance for more than
+ /// one document:
+ ///
+ /// <code>
+ /// NumericField field = new NumericField(name);
+ /// Document document = new Document();
+ /// document.add(field);
+ ///
+ /// for(all documents) {
+ /// ...
+ /// field.setIntValue(value)
+ /// writer.addDocument(document);
+ /// ...
+ /// }
+ /// </code>
+ ///
+ /// <p/>The .Net native types <c>int</c>, <c>long</c>,
+ /// <c>float</c> and <c>double</c> are
+ /// directly supported. However, any value that can be
+ /// converted into these native types can also be indexed.
+ /// For example, date/time values represented by a
+ /// <see cref="System.DateTime" /> can be translated into a long
+ /// value using the <c>java.util.Date.getTime</c> method. If you
+ /// don't need millisecond precision, you can quantize the
+ /// value, either by dividing the result of
+ /// <c>java.util.Date.getTime</c> or using the separate getters
+ /// (for year, month, etc.) to construct an <c>int</c> or
+ /// <c>long</c> value.<p/>
+ ///
+ /// <p/>To perform range querying or filtering against a
+ /// <c>NumericField</c>, use <see cref="NumericRangeQuery{T}" /> or <see cref="NumericRangeFilter{T}" />
+ ///. To sort according to a
+ /// <c>NumericField</c>, use the normal numeric sort types, eg
+ /// <see cref="SortField.INT" /> <c>NumericField</c> values
+ /// can also be loaded directly from <see cref="FieldCache" />.<p/>
+ ///
+ /// <p/>By default, a <c>NumericField</c>'s value is not stored but
+ /// is indexed for range filtering and sorting. You can use
+ /// the <see cref="NumericField(String,Field.Store,bool)" />
+ /// constructor if you need to change these defaults.<p/>
+ ///
+ /// <p/>You may add the same field name as a <c>NumericField</c> to
+ /// the same document more than once. Range querying and
+ /// filtering will be the logical OR of all values; so a range query
+ /// will hit all documents that have at least one value in
+ /// the range. However sort behavior is not defined. If you need to sort,
+ /// you should separately index a single-valued <c>NumericField</c>.<p/>
+ ///
+ /// <p/>A <c>NumericField</c> will consume somewhat more disk space
+ /// in the index than an ordinary single-valued field.
+ /// However, for a typical index that includes substantial
+ /// textual content per document, this increase will likely
+ /// be in the noise. <p/>
+ ///
+ /// <p/>Within Lucene, each numeric value is indexed as a
+ /// <em>trie</em> structure, where each term is logically
+ /// assigned to larger and larger pre-defined brackets (which
+ /// are simply lower-precision representations of the value).
+ /// The step size between each successive bracket is called the
+ /// <c>precisionStep</c>, measured in bits. Smaller
+ /// <c>precisionStep</c> values result in larger number
+ /// of brackets, which consumes more disk space in the index
+ /// but may result in faster range search performance. The
+ /// default value, 4, was selected for a reasonable tradeoff
+ /// of disk space consumption versus performance. You can
+ /// use the expert constructor <see cref="NumericField(String,int,Field.Store,bool)" />
+ /// if you'd
+ /// like to change the value. Note that you must also
+ /// specify a congruent value when creating <see cref="NumericRangeQuery{T}" />
+ /// or <see cref="NumericRangeFilter{T}" />.
+ /// For low cardinality fields larger precision steps are good.
+ /// If the cardinality is &lt; 100, it is fair
+ /// to use <see cref="int.MaxValue" />, which produces one
+ /// term per value.
+ ///
+ /// <p/>For more information on the internals of numeric trie
+ /// indexing, including the <a
+ /// href="../search/NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>
+ /// configuration, see <see cref="NumericRangeQuery{T}" />. The format of
+ /// indexed values is described in <see cref="NumericUtils" />.
+ ///
+ /// <p/>If you only need to sort by numeric value, and never
+ /// run range querying/filtering, you can index using a
+ /// <c>precisionStep</c> of <see cref="int.MaxValue" />.
+ /// This will minimize disk space consumed. <p/>
+ ///
+ /// <p/>More advanced users can instead use <see cref="NumericTokenStream" />
+ /// directly, when indexing numbers. This
+ /// class is a wrapper around this token stream type for
+ /// easier, more intuitive usage.<p/>
+ ///
+ /// <p/><b>NOTE:</b> This class is only used during
+ /// indexing. When retrieving the stored field value from a
+ /// <see cref="Document" /> instance after search, you will get a
+ /// conventional <see cref="IFieldable" /> instance where the numeric
+ /// values are returned as <see cref="String" />s (according to
+ /// <c>toString(value)</c> of the used data type).
+ ///
+ /// <p/><font color="red"><b>NOTE:</b> This API is
+ /// experimental and might change in incompatible ways in the
+ /// next release.</font>
+ ///
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+ [Serializable]
+ public sealed class NumericField:AbstractField
+ {
+
+ new private readonly NumericTokenStream tokenStream;
+
+ /// <summary> Creates a field for numeric values using the default <c>precisionStep</c>
+ /// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// This constructor creates an indexed, but not stored field.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ public NumericField(System.String name):this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true)
+ {
+ }
+
+ /// <summary> Creates a field for numeric values using the default <c>precisionStep</c>
+ /// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ /// <param name="store">if the field should be stored in plain text form
+ /// (according to <c>toString(value)</c> of the used data type)
+ /// </param>
+ /// <param name="index">if the field should be indexed using <see cref="NumericTokenStream" />
+ /// </param>
+ public NumericField(System.String name, Field.Store store, bool index):this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index)
+ {
+ }
+
+ /// <summary> Creates a field for numeric values with the specified
+ /// <c>precisionStep</c>. The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// This constructor creates an indexed, but not stored field.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ /// <param name="precisionStep">the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
+ /// </param>
+ public NumericField(System.String name, int precisionStep):this(name, precisionStep, Field.Store.NO, true)
+ {
+ }
+
+ /// <summary> Creates a field for numeric values with the specified
+ /// <c>precisionStep</c>. The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ /// <param name="name">the field name
+ /// </param>
+ /// <param name="precisionStep">the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
+ /// </param>
+ /// <param name="store">if the field should be stored in plain text form
+ /// (according to <c>toString(value)</c> of the used data type)
+ /// </param>
+ /// <param name="index">if the field should be indexed using <see cref="NumericTokenStream" />
+ /// </param>
+ public NumericField(System.String name, int precisionStep, Field.Store store, bool index):base(name, store, index?Field.Index.ANALYZED_NO_NORMS:Field.Index.NO, Field.TermVector.NO)
+ {
+ OmitTermFreqAndPositions = true;
+ tokenStream = new NumericTokenStream(precisionStep);
+ }
+
+ /// <summary>Returns a <see cref="NumericTokenStream" /> for indexing the numeric value. </summary>
+ public override TokenStream TokenStreamValue
+ {
+ get { return IsIndexed ? tokenStream : null; }
+ }
+
+ /// <summary>Returns always <c>null</c> for numeric fields </summary>
+ public override byte[] GetBinaryValue(byte[] result)
+ {
+ return null;
+ }
+
+ /// <summary>Returns always <c>null</c> for numeric fields </summary>
+ public override TextReader ReaderValue
+ {
+ get { return null; }
+ }
+
+ /// <summary>Returns the numeric value as a string (how it is stored, when <see cref="Field.Store.YES" /> is chosen). </summary>
+ public override string StringValue
+ {
+ get { return (fieldsData == null) ? null : fieldsData.ToString(); }
+ }
+
+ /// <summary>Returns the current numeric value as a subclass of <see cref="Number" />, <c>null</c> if not yet initialized. </summary>
+ public ValueType NumericValue
+ {
+ get { return (System.ValueType) fieldsData; }
+ }
+
+ /// <summary> Initializes the field with the supplied <c>long</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).SetLongValue(value))</c>
+ /// </returns>
+ public NumericField SetLongValue(long value_Renamed)
+ {
+ tokenStream.SetLongValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// <summary> Initializes the field with the supplied <c>int</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).setIntValue(value))</c>
+ /// </returns>
+ public NumericField SetIntValue(int value_Renamed)
+ {
+ tokenStream.SetIntValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// <summary> Initializes the field with the supplied <c>double</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</c>
+ /// </returns>
+ public NumericField SetDoubleValue(double value_Renamed)
+ {
+ tokenStream.SetDoubleValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// <summary> Initializes the field with the supplied <c>float</c> value.</summary>
+ /// <param name="value_Renamed">the numeric value
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>document.add(new NumericField(name, precisionStep).setFloatValue(value))</c>
+ /// </returns>
+ public NumericField SetFloatValue(float value_Renamed)
+ {
+ tokenStream.SetFloatValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Document/SetBasedFieldSelector.cs b/src/core/Document/SetBasedFieldSelector.cs
new file mode 100644
index 0000000..14e3e02
--- /dev/null
+++ b/src/core/Document/SetBasedFieldSelector.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Documents
+{
+ /// <summary> Declare what fields to load normally and what fields to load lazily
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public class SetBasedFieldSelector : FieldSelector
+ {
+ private ISet<string> fieldsToLoad;
+ private ISet<string> lazyFieldsToLoad;
+
+ /// <summary> Pass in the Set of <see cref="Field" /> names to load and the Set of <see cref="Field" /> names to load lazily. If both are null, the
+ /// Document will not have any <see cref="Field" /> on it.
+ /// </summary>
+ /// <param name="fieldsToLoad">A Set of <see cref="String" /> field names to load. May be empty, but not null
+ /// </param>
+ /// <param name="lazyFieldsToLoad">A Set of <see cref="String" /> field names to load lazily. May be empty, but not null
+ /// </param>
+ public SetBasedFieldSelector(ISet<string> fieldsToLoad, ISet<string> lazyFieldsToLoad)
+ {
+ this.fieldsToLoad = fieldsToLoad;
+ this.lazyFieldsToLoad = lazyFieldsToLoad;
+ }
+
+ /// <summary> Indicate whether to load the field with the given name or not. If the <see cref="AbstractField.Name()" /> is not in either of the
+ /// initializing Sets, then <see cref="Lucene.Net.Documents.FieldSelectorResult.NO_LOAD" /> is returned. If a Field name
+ /// is in both <c>fieldsToLoad</c> and <c>lazyFieldsToLoad</c>, lazy has precedence.
+ ///
+ /// </summary>
+ /// <param name="fieldName">The <see cref="Field" /> name to check
+ /// </param>
+ /// <returns> The <see cref="FieldSelectorResult" />
+ /// </returns>
+ public virtual FieldSelectorResult Accept(System.String fieldName)
+ {
+ FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
+ if (fieldsToLoad.Contains(fieldName) == true)
+ {
+ result = FieldSelectorResult.LOAD;
+ }
+ if (lazyFieldsToLoad.Contains(fieldName) == true)
+ {
+ result = FieldSelectorResult.LAZY_LOAD;
+ }
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/AbstractAllTermDocs.cs b/src/core/Index/AbstractAllTermDocs.cs
new file mode 100644
index 0000000..935b7fa
--- /dev/null
+++ b/src/core/Index/AbstractAllTermDocs.cs
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+ /// <summary>
+ /// Base class for enumerating all but deleted docs.
+ ///
+ /// <p/>NOTE: this class is meant only to be used internally
+ /// by Lucene; it's only public so it can be shared across
+ /// packages. This means the API is freely subject to
+ /// change, and, the class could be removed entirely, in any
+ /// Lucene release. Use directly at your own risk! */
+ /// </summary>
+ public abstract class AbstractAllTermDocs : TermDocs
+ {
+ protected int maxDoc;
+ protected int internalDoc = -1;
+
+ protected AbstractAllTermDocs(int maxDoc)
+ {
+ this.maxDoc = maxDoc;
+ }
+
+ public void Seek(Term term)
+ {
+ if (term == null)
+ {
+ internalDoc = -1;
+ }
+ else
+ {
+ throw new NotSupportedException();
+ }
+ }
+
+ public void Seek(TermEnum termEnum)
+ {
+ throw new NotSupportedException();
+ }
+
+ public int Doc
+ {
+ get { return internalDoc; }
+ }
+
+ public int Freq
+ {
+ get { return 1; }
+ }
+
+ public bool Next()
+ {
+ return SkipTo(internalDoc + 1);
+ }
+
+ public int Read(int[] docs, int[] freqs)
+ {
+ int length = docs.Length;
+ int i = 0;
+ while (i < length && internalDoc < maxDoc)
+ {
+ if (!IsDeleted(internalDoc))
+ {
+ docs[i] = internalDoc;
+ freqs[i] = 1;
+ ++i;
+ }
+ internalDoc++;
+ }
+ return i;
+ }
+
+ public bool SkipTo(int target)
+ {
+ internalDoc = target;
+ while (internalDoc < maxDoc)
+ {
+ if (!IsDeleted(internalDoc))
+ {
+ return true;
+ }
+ internalDoc++;
+ }
+ return false;
+ }
+
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+
+ public abstract bool IsDeleted(int doc);
+ }
+}
diff --git a/src/core/Index/AllTermDocs.cs b/src/core/Index/AllTermDocs.cs
new file mode 100644
index 0000000..da5f16d
--- /dev/null
+++ b/src/core/Index/AllTermDocs.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using BitVector = Lucene.Net.Util.BitVector;
+
+namespace Lucene.Net.Index
+{
+
+ class AllTermDocs : AbstractAllTermDocs
+ {
+ protected internal BitVector deletedDocs;
+
+ protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc)
+ {
+ lock (parent)
+ {
+ this.deletedDocs = parent.deletedDocs;
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+
+ public override bool IsDeleted(int doc)
+ {
+ return deletedDocs != null && deletedDocs.Get(doc);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/BufferedDeletes.cs b/src/core/Index/BufferedDeletes.cs
new file mode 100644
index 0000000..52ef1df
--- /dev/null
+++ b/src/core/Index/BufferedDeletes.cs
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Holds buffered deletes, by docID, term or query. We
+ /// hold two instances of this class: one for the deletes
+ /// prior to the last flush, the other for deletes after
+ /// the last flush. This is so if we need to abort
+ /// (discard all buffered docs) we can also discard the
+ /// buffered deletes yet keep the deletes done during
+ /// previously flushed segments.
+ /// </summary>
+ class BufferedDeletes
+ {
+ internal int numTerms;
+ internal IDictionary<Term,Num> terms = null;
+ internal IDictionary<Query, int> queries = new HashMap<Query, int>();
+ internal List<int> docIDs = new List<int>();
+ internal long bytesUsed;
+ internal bool doTermSort;
+
+ public BufferedDeletes(bool doTermSort)
+ {
+ this.doTermSort = doTermSort;
+ if (doTermSort)
+ {
+ //TODO: Used in place of TreeMap
+ terms = new SortedDictionary<Term, Num>();
+ }
+ else
+ {
+ terms = new HashMap<Term, Num>();
+ }
+ }
+
+
+ // Number of documents a delete term applies to.
+ internal sealed class Num
+ {
+ internal int num;
+
+ internal Num(int num)
+ {
+ this.num = num;
+ }
+
+ internal int GetNum()
+ {
+ return num;
+ }
+
+ internal void SetNum(int num)
+ {
+ // Only record the new number if it's greater than the
+ // current one. This is important because if multiple
+ // threads are replacing the same doc at nearly the
+ // same time, it's possible that one thread that got a
+ // higher docID is scheduled before the other
+ // threads.
+ if (num > this.num)
+ this.num = num;
+ }
+ }
+
+ internal virtual int Size()
+ {
+ // We use numTerms not terms.size() intentionally, so
+ // that deletes by the same term multiple times "count",
+ // ie if you ask to flush every 1000 deletes then even
+ // dup'd terms are counted towards that 1000
+ return numTerms + queries.Count + docIDs.Count;
+ }
+
+ internal virtual void Update(BufferedDeletes @in)
+ {
+ numTerms += @in.numTerms;
+ bytesUsed += @in.bytesUsed;
+ foreach (KeyValuePair<Term, Num> term in @in.terms)
+ {
+ terms[term.Key] = term.Value;
+ }
+ foreach (KeyValuePair<Query, int> term in @in.queries)
+ {
+ queries[term.Key] = term.Value;
+ }
+
+ docIDs.AddRange(@in.docIDs);
+ @in.Clear();
+ }
+
+ internal virtual void Clear()
+ {
+ terms.Clear();
+ queries.Clear();
+ docIDs.Clear();
+ numTerms = 0;
+ bytesUsed = 0;
+ }
+
+ internal virtual void AddBytesUsed(long b)
+ {
+ bytesUsed += b;
+ }
+
+ internal virtual bool Any()
+ {
+ return terms.Count > 0 || docIDs.Count > 0 || queries.Count > 0;
+ }
+
+ // Remaps all buffered deletes based on a completed
+ // merge
+ internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
+ {
+ lock (this)
+ {
+ IDictionary<Term, Num> newDeleteTerms;
+
+ // Remap delete-by-term
+ if (terms.Count > 0)
+ {
+ if (doTermSort)
+ {
+ newDeleteTerms = new SortedDictionary<Term, Num>();
+ }
+ else
+ {
+ newDeleteTerms = new HashMap<Term, Num>();
+ }
+ foreach(var entry in terms)
+ {
+ Num num = entry.Value;
+ newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum()));
+ }
+ }
+ else
+ newDeleteTerms = null;
+
+ // Remap delete-by-docID
+ List<int> newDeleteDocIDs;
+
+ if (docIDs.Count > 0)
+ {
+ newDeleteDocIDs = new List<int>(docIDs.Count);
+ foreach(int num in docIDs)
+ {
+ newDeleteDocIDs.Add(mapper.Remap(num));
+ }
+ }
+ else
+ newDeleteDocIDs = null;
+
+ // Remap delete-by-query
+ HashMap<Query, int> newDeleteQueries;
+
+ if (queries.Count > 0)
+ {
+ newDeleteQueries = new HashMap<Query, int>(queries.Count);
+ foreach(var entry in queries)
+ {
+ int num = entry.Value;
+ newDeleteQueries[entry.Key] = mapper.Remap(num);
+ }
+ }
+ else
+ newDeleteQueries = null;
+
+ if (newDeleteTerms != null)
+ terms = newDeleteTerms;
+ if (newDeleteDocIDs != null)
+ docIDs = newDeleteDocIDs;
+ if (newDeleteQueries != null)
+ queries = newDeleteQueries;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ByteBlockPool.cs b/src/core/Index/ByteBlockPool.cs
new file mode 100644
index 0000000..041c756
--- /dev/null
+++ b/src/core/Index/ByteBlockPool.cs
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* Class that Posting and PostingVector use to write byte
+* streams into shared fixed-size byte[] arrays. The idea
+* is to allocate slices of increasing lengths For
+* example, the first slice is 5 bytes, the next slice is
+* 14, etc. We start by writing our bytes into the first
+* 5 bytes. When we hit the end of the slice, we allocate
+* the next slice and then write the address of the new
+* slice into the last 4 bytes of the previous slice (the
+* "forwarding address").
+*
+* Each slice is filled with 0's initially, and we mark
+* the end with a non-zero byte. This way the methods
+* that are writing into the slice don't need to record
+* its length and instead allocate a new slice once they
+* hit a non-zero byte. */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ sealed public class ByteBlockPool
+ {
+ private void InitBlock()
+ {
+ byteUpto = DocumentsWriter.BYTE_BLOCK_SIZE;
+ }
+
+ public /*internal*/ abstract class Allocator
+ {
+ public /*internal*/ abstract void RecycleByteBlocks(byte[][] blocks, int start, int end);
+ public /*internal*/ abstract void RecycleByteBlocks(IList<byte[]> blocks);
+ public /*internal*/ abstract byte[] GetByteBlock(bool trackAllocations);
+ }
+
+ public byte[][] buffers = new byte[10][];
+
+ internal int bufferUpto = - 1; // Which buffer we are upto
+ public int byteUpto; // Where we are in head buffer
+
+ public byte[] buffer; // Current head buffer
+ public int byteOffset = - DocumentsWriter.BYTE_BLOCK_SIZE; // Current head offset
+
+ private readonly bool trackAllocations;
+ private readonly Allocator allocator;
+
+ public ByteBlockPool(Allocator allocator, bool trackAllocations)
+ {
+ InitBlock();
+ this.allocator = allocator;
+ this.trackAllocations = trackAllocations;
+ }
+
+ public void Reset()
+ {
+ if (bufferUpto != - 1)
+ {
+ // We allocated at least one buffer
+
+ for (int i = 0; i < bufferUpto; i++)
+ // Fully zero fill buffers that we fully used
+ System.Array.Clear(buffers[i], 0, buffers[i].Length);
+
+ // Partial zero fill the final buffer
+ System.Array.Clear(buffers[bufferUpto], 0, byteUpto);
+
+ if (bufferUpto > 0)
+ // Recycle all but the first buffer
+ allocator.RecycleByteBlocks(buffers, 1, 1 + bufferUpto);
+
+ // Re-use the first buffer
+ bufferUpto = 0;
+ byteUpto = 0;
+ byteOffset = 0;
+ buffer = buffers[0];
+ }
+ }
+
+ public void NextBuffer()
+ {
+ if (1 + bufferUpto == buffers.Length)
+ {
+ var newBuffers = new byte[(int) (buffers.Length * 1.5)][];
+ Array.Copy(buffers, 0, newBuffers, 0, buffers.Length);
+ buffers = newBuffers;
+ }
+ buffer = buffers[1 + bufferUpto] = allocator.GetByteBlock(trackAllocations);
+ bufferUpto++;
+
+ byteUpto = 0;
+ byteOffset += DocumentsWriter.BYTE_BLOCK_SIZE;
+ }
+
+ public int NewSlice(int size)
+ {
+ if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE - size)
+ NextBuffer();
+ int upto = byteUpto;
+ byteUpto += size;
+ buffer[byteUpto - 1] = 16;
+ return upto;
+ }
+
+ // Size of each slice. These arrays should be at most 16
+ // elements (index is encoded with 4 bits). First array
+ // is just a compact way to encode X+1 with a max. Second
+ // array is the length of each slice, ie first slice is 5
+ // bytes, next slice is 14 bytes, etc.
+ internal static readonly int[] nextLevelArray = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
+ internal static readonly int[] levelSizeArray = new int[]{5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
+ internal static readonly int FIRST_LEVEL_SIZE = levelSizeArray[0];
+ public readonly static int FIRST_LEVEL_SIZE_For_NUnit_Test = levelSizeArray[0];
+
+ public int AllocSlice(byte[] slice, int upto)
+ {
+
+ int level = slice[upto] & 15;
+ int newLevel = nextLevelArray[level];
+ int newSize = levelSizeArray[newLevel];
+
+ // Maybe allocate another block
+ if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE - newSize)
+ NextBuffer();
+
+ int newUpto = byteUpto;
+ int offset = newUpto + byteOffset;
+ byteUpto += newSize;
+
+ // Copy forward the past 3 bytes (which we are about
+ // to overwrite with the forwarding address):
+ buffer[newUpto] = slice[upto - 3];
+ buffer[newUpto + 1] = slice[upto - 2];
+ buffer[newUpto + 2] = slice[upto - 1];
+
+ // Write forwarding address at end of last slice:
+ slice[upto - 3] = (byte) (Number.URShift(offset, 24));
+ slice[upto - 2] = (byte) (Number.URShift(offset, 16));
+ slice[upto - 1] = (byte) (Number.URShift(offset, 8));
+ slice[upto] = (byte) offset;
+
+ // Write new level:
+ buffer[byteUpto - 1] = (byte) (16 | newLevel);
+
+ return newUpto + 3;
+ }
+
+ public static int FIRST_LEVEL_SIZE_ForNUnit
+ {
+ get { return FIRST_LEVEL_SIZE; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ByteSliceReader.cs b/src/core/Index/ByteSliceReader.cs
new file mode 100644
index 0000000..8b672fe
--- /dev/null
+++ b/src/core/Index/ByteSliceReader.cs
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+ /* IndexInput that knows how to read the byte slices written
+ * by Posting and PostingVector. We read the bytes in
+ * each slice until we hit the end of that slice at which
+ * point we read the forwarding address of the next slice
+ * and then jump to it.*/
+ public sealed class ByteSliceReader : IndexInput
+ {
+ internal ByteBlockPool pool;
+ internal int bufferUpto;
+ internal byte[] buffer;
+ public int upto;
+ internal int limit;
+ internal int level;
+ public int bufferOffset;
+
+ public int endIndex;
+
+ public void Init(ByteBlockPool pool, int startIndex, int endIndex)
+ {
+
+ System.Diagnostics.Debug.Assert(endIndex - startIndex >= 0);
+ System.Diagnostics.Debug.Assert(startIndex >= 0);
+ System.Diagnostics.Debug.Assert(endIndex >= 0);
+
+ this.pool = pool;
+ this.endIndex = endIndex;
+
+ level = 0;
+ bufferUpto = startIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
+ bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
+ buffer = pool.buffers[bufferUpto];
+ upto = startIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+
+ int firstSize = ByteBlockPool.levelSizeArray[0];
+
+ if (startIndex + firstSize >= endIndex)
+ {
+ // There is only this one slice to read
+ limit = endIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+ }
+ else
+ limit = upto + firstSize - 4;
+ }
+
+ public bool Eof()
+ {
+ System.Diagnostics.Debug.Assert(upto + bufferOffset <= endIndex);
+ return upto + bufferOffset == endIndex;
+ }
+
+ public override byte ReadByte()
+ {
+ System.Diagnostics.Debug.Assert(!Eof());
+ System.Diagnostics.Debug.Assert(upto <= limit);
+ if (upto == limit)
+ NextSlice();
+ return buffer[upto++];
+ }
+
+ public long WriteTo(IndexOutput @out)
+ {
+ long size = 0;
+ while (true)
+ {
+ if (limit + bufferOffset == endIndex)
+ {
+ System.Diagnostics.Debug.Assert(endIndex - bufferOffset >= upto);
+ @out.WriteBytes(buffer, upto, limit - upto);
+ size += limit - upto;
+ break;
+ }
+ else
+ {
+ @out.WriteBytes(buffer, upto, limit - upto);
+ size += limit - upto;
+ NextSlice();
+ }
+ }
+
+ return size;
+ }
+
+ public void NextSlice()
+ {
+
+ // Skip to our next slice
+ int nextIndex = ((buffer[limit] & 0xff) << 24) + ((buffer[1 + limit] & 0xff) << 16) + ((buffer[2 + limit] & 0xff) << 8) + (buffer[3 + limit] & 0xff);
+
+ level = ByteBlockPool.nextLevelArray[level];
+ int newSize = ByteBlockPool.levelSizeArray[level];
+
+ bufferUpto = nextIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
+ bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
+
+ buffer = pool.buffers[bufferUpto];
+ upto = nextIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+
+ if (nextIndex + newSize >= endIndex)
+ {
+ // We are advancing to the final slice
+ System.Diagnostics.Debug.Assert(endIndex - nextIndex > 0);
+ limit = endIndex - bufferOffset;
+ }
+ else
+ {
+ // This is not the final slice (subtract 4 for the
+ // forwarding address at the end of this new slice)
+ limit = upto + newSize - 4;
+ }
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ while (len > 0)
+ {
+ int numLeft = limit - upto;
+ if (numLeft < len)
+ {
+ // Read entire slice
+ Array.Copy(buffer, upto, b, offset, numLeft);
+ offset += numLeft;
+ len -= numLeft;
+ NextSlice();
+ }
+ else
+ {
+ // This slice is the last one
+ Array.Copy(buffer, upto, b, offset, len);
+ upto += len;
+ break;
+ }
+ }
+ }
+
+ public override long FilePointer
+ {
+ get { throw new NotImplementedException(); }
+ }
+
+ public override long Length()
+ {
+ throw new NotImplementedException();
+ }
+ public override void Seek(long pos)
+ {
+ throw new NotImplementedException();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing...
+ }
+
+ override public Object Clone()
+ {
+ System.Diagnostics.Debug.Fail("Port issue:", "Let see if we need this ByteSliceReader.Clone()"); // {{Aroush-2.9}}
+ return null;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ByteSliceWriter.cs b/src/core/Index/ByteSliceWriter.cs
new file mode 100644
index 0000000..86bbca0
--- /dev/null
+++ b/src/core/Index/ByteSliceWriter.cs
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+ /// <summary> Class to write byte streams into slices of shared
+ /// byte[]. This is used by DocumentsWriter to hold the
+ /// posting list for many terms in RAM.
+ /// </summary>
+ public sealed class ByteSliceWriter
+ {
+ private byte[] slice;
+ private int upto;
+ private readonly ByteBlockPool pool;
+
+ internal int offset0;
+
+ public ByteSliceWriter(ByteBlockPool pool)
+ {
+ this.pool = pool;
+ }
+
+ /// <summary> Set up the writer to write at address.</summary>
+ public void Init(int address)
+ {
+ slice = pool.buffers[address >> DocumentsWriter.BYTE_BLOCK_SHIFT];
+ System.Diagnostics.Debug.Assert(slice != null);
+ upto = address & DocumentsWriter.BYTE_BLOCK_MASK;
+ offset0 = address;
+ System.Diagnostics.Debug.Assert(upto < slice.Length);
+ }
+
+ /// <summary>Write byte into byte slice stream </summary>
+ public void WriteByte(byte b)
+ {
+ System.Diagnostics.Debug.Assert(slice != null);
+ if (slice[upto] != 0)
+ {
+ upto = pool.AllocSlice(slice, upto);
+ slice = pool.buffer;
+ offset0 = pool.byteOffset;
+ System.Diagnostics.Debug.Assert(slice != null);
+ }
+ slice[upto++] = b;
+ System.Diagnostics.Debug.Assert(upto != slice.Length);
+ }
+
+ public void WriteBytes(byte[] b, int offset, int len)
+ {
+ int offsetEnd = offset + len;
+ while (offset < offsetEnd)
+ {
+ if (slice[upto] != 0)
+ {
+ // End marker
+ upto = pool.AllocSlice(slice, upto);
+ slice = pool.buffer;
+ offset0 = pool.byteOffset;
+ }
+
+ slice[upto++] = b[offset++];
+ System.Diagnostics.Debug.Assert(upto != slice.Length);
+ }
+ }
+
+ public int Address
+ {
+ get { return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); }
+ }
+
+ public void WriteVInt(int i)
+ {
+ while ((i & ~ 0x7F) != 0)
+ {
+ WriteByte((byte) ((i & 0x7f) | 0x80));
+ i = Number.URShift(i, 7);
+ }
+ WriteByte((byte) i);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/CharBlockPool.cs b/src/core/Index/CharBlockPool.cs
new file mode 100644
index 0000000..0631fe0
--- /dev/null
+++ b/src/core/Index/CharBlockPool.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class CharBlockPool
+ {
+ private void InitBlock()
+ {
+ charUpto = DocumentsWriter.CHAR_BLOCK_SIZE;
+ }
+
+ public char[][] buffers = new char[10][];
+ internal int numBuffer;
+
+ internal int bufferUpto = - 1; // Which buffer we are upto
+ public int charUpto; // Where we are in head buffer
+
+ public char[] buffer; // Current head buffer
+ public int charOffset = - DocumentsWriter.CHAR_BLOCK_SIZE; // Current head offset
+ private readonly DocumentsWriter docWriter;
+
+ public CharBlockPool(DocumentsWriter docWriter)
+ {
+ InitBlock();
+ this.docWriter = docWriter;
+ }
+
+ public void Reset()
+ {
+ docWriter.RecycleCharBlocks(buffers, 1 + bufferUpto);
+ bufferUpto = - 1;
+ charUpto = DocumentsWriter.CHAR_BLOCK_SIZE;
+ charOffset = - DocumentsWriter.CHAR_BLOCK_SIZE;
+ }
+
+ public void NextBuffer()
+ {
+ if (1 + bufferUpto == buffers.Length)
+ {
+ var newBuffers = new char[(int) (buffers.Length * 1.5)][];
+ Array.Copy(buffers, 0, newBuffers, 0, buffers.Length);
+ buffers = newBuffers;
+ }
+ buffer = buffers[1 + bufferUpto] = docWriter.GetCharBlock();
+ bufferUpto++;
+
+ charUpto = 0;
+ charOffset += DocumentsWriter.CHAR_BLOCK_SIZE;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/CheckIndex.cs b/src/core/Index/CheckIndex.cs
new file mode 100644
index 0000000..8917903
--- /dev/null
+++ b/src/core/Index/CheckIndex.cs
@@ -0,0 +1,1017 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Support;
+using AbstractField = Lucene.Net.Documents.AbstractField;
+using Document = Lucene.Net.Documents.Document;
+using Directory = Lucene.Net.Store.Directory;
+using FSDirectory = Lucene.Net.Store.FSDirectory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Basic tool and API to check the health of an index and
+ /// write a new segments file that removes reference to
+ /// problematic segments.
+ ///
+ /// <p/>As this tool checks every byte in the index, on a large
+ /// index it can take quite a long time to run.
+ ///
+ /// <p/><b>WARNING</b>: this tool and API is new and
+ /// experimental and is subject to suddenly change in the
+ /// next release. Please make a complete backup of your
+ /// index before using this to fix your index!
+ /// </summary>
+ public class CheckIndex
+ {
+ private StreamWriter infoStream;
+ private readonly Directory dir;
+
+ /// <summary> Returned from <see cref="CheckIndex_Renamed_Method()" /> detailing the health and status of the index.
+ ///
+ /// <p/><b>WARNING</b>: this API is new and experimental and is
+ /// subject to suddenly change in the next release.
+ ///
+ /// </summary>
+
+ public class Status
+ {
+
+ /// <summary>True if no problems were found with the index. </summary>
+ public bool clean;
+
+ /// <summary>True if we were unable to locate and load the segments_N file. </summary>
+ public bool missingSegments;
+
+ /// <summary>True if we were unable to open the segments_N file. </summary>
+ public bool cantOpenSegments;
+
+ /// <summary>True if we were unable to read the version number from segments_N file. </summary>
+ public bool missingSegmentVersion;
+
+ /// <summary>Name of latest segments_N file in the index. </summary>
+ public System.String segmentsFileName;
+
+ /// <summary>Number of segments in the index. </summary>
+ public int numSegments;
+
+ /// <summary>String description of the version of the index. </summary>
+ public System.String segmentFormat;
+
+ /// <summary>Empty unless you passed specific segments list to check as optional 3rd argument.</summary>
+ /// <seealso>
+ /// <cref>CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList)</cref>
+ /// </seealso>
+ public List<string> segmentsChecked = new List<string>();
+
+ /// <summary>True if the index was created with a newer version of Lucene than the CheckIndex tool. </summary>
+ public bool toolOutOfDate;
+
+ /// <summary>List of <see cref="SegmentInfoStatus" /> instances, detailing status of each segment. </summary>
+ public IList<SegmentInfoStatus> segmentInfos = new List<SegmentInfoStatus>();
+
+ /// <summary>Directory index is in. </summary>
+ public Directory dir;
+
+ /// <summary> SegmentInfos instance containing only segments that
+ /// had no problems (this is used with the <see cref="CheckIndex.FixIndex" />
+ /// method to repair the index.
+ /// </summary>
+ internal SegmentInfos newSegments;
+
+ /// <summary>How many documents will be lost to bad segments. </summary>
+ public int totLoseDocCount;
+
+ /// <summary>How many bad segments were found. </summary>
+ public int numBadSegments;
+
+ /// <summary>True if we checked only specific segments (<see cref="CheckIndex.CheckIndex_Renamed_Method(List{string})" />)
+ /// was called with non-null
+ /// argument).
+ /// </summary>
+ public bool partial;
+
+ /// <summary>Holds the userData of the last commit in the index </summary>
+ public IDictionary<string, string> userData;
+
+ /// <summary>Holds the status of each segment in the index.
+ /// See <see cref="SegmentInfos" />.
+ ///
+ /// <p/><b>WARNING</b>: this API is new and experimental and is
+ /// subject to suddenly change in the next release.
+ /// </summary>
+ public class SegmentInfoStatus
+ {
+ /// <summary>Name of the segment. </summary>
+ public System.String name;
+
+ /// <summary>Document count (does not take deletions into account). </summary>
+ public int docCount;
+
+ /// <summary>True if segment is compound file format. </summary>
+ public bool compound;
+
+ /// <summary>Number of files referenced by this segment. </summary>
+ public int numFiles;
+
+ /// <summary>Net size (MB) of the files referenced by this
+ /// segment.
+ /// </summary>
+ public double sizeMB;
+
+ /// <summary>Doc store offset, if this segment shares the doc
+ /// store files (stored fields and term vectors) with
+ /// other segments. This is -1 if it does not share.
+ /// </summary>
+ public int docStoreOffset = - 1;
+
+ /// <summary>String of the shared doc store segment, or null if
+ /// this segment does not share the doc store files.
+ /// </summary>
+ public System.String docStoreSegment;
+
+ /// <summary>True if the shared doc store files are compound file
+ /// format.
+ /// </summary>
+ public bool docStoreCompoundFile;
+
+ /// <summary>True if this segment has pending deletions. </summary>
+ public bool hasDeletions;
+
+ /// <summary>Name of the current deletions file name. </summary>
+ public System.String deletionsFileName;
+
+ /// <summary>Number of deleted documents. </summary>
+ public int numDeleted;
+
+ /// <summary>True if we were able to open a SegmentReader on this
+ /// segment.
+ /// </summary>
+ public bool openReaderPassed;
+
+ /// <summary>Number of fields in this segment. </summary>
+ internal int numFields;
+
+ /// <summary>True if at least one of the fields in this segment
+ /// does not omitTermFreqAndPositions.
+ /// </summary>
+ /// <seealso cref="AbstractField.OmitTermFreqAndPositions">
+ /// </seealso>
+ public bool hasProx;
+
+ /// <summary>Map&lt;String, String&gt; that includes certain
+ /// debugging details that IndexWriter records into
+ /// each segment it creates
+ /// </summary>
+ public IDictionary<string, string> diagnostics;
+
+ /// <summary>Status for testing of field norms (null if field norms could not be tested). </summary>
+ public FieldNormStatus fieldNormStatus;
+
+ /// <summary>Status for testing of indexed terms (null if indexed terms could not be tested). </summary>
+ public TermIndexStatus termIndexStatus;
+
+ /// <summary>Status for testing of stored fields (null if stored fields could not be tested). </summary>
+ public StoredFieldStatus storedFieldStatus;
+
+ /// <summary>Status for testing of term vectors (null if term vectors could not be tested). </summary>
+ public TermVectorStatus termVectorStatus;
+ }
+
+ /// <summary> Status from testing field norms.</summary>
+ public sealed class FieldNormStatus
+ {
+ /// <summary>Number of fields successfully tested </summary>
+ public long totFields = 0L;
+
+ /// <summary>Exception thrown during term index test (null on success) </summary>
+ public System.Exception error = null;
+ }
+
+ /// <summary> Status from testing term index.</summary>
+ public sealed class TermIndexStatus
+ {
+ /// <summary>Total term count </summary>
+ public long termCount = 0L;
+
+ /// <summary>Total frequency across all terms. </summary>
+ public long totFreq = 0L;
+
+ /// <summary>Total number of positions. </summary>
+ public long totPos = 0L;
+
+ /// <summary>Exception thrown during term index test (null on success) </summary>
+ public System.Exception error = null;
+ }
+
+ /// <summary> Status from testing stored fields.</summary>
+ public sealed class StoredFieldStatus
+ {
+
+ /// <summary>Number of documents tested. </summary>
+ public int docCount = 0;
+
+ /// <summary>Total number of stored fields tested. </summary>
+ public long totFields = 0;
+
+ /// <summary>Exception thrown during stored fields test (null on success) </summary>
+ public System.Exception error = null;
+ }
+
+ /// <summary> Status from testing stored fields.</summary>
+ public sealed class TermVectorStatus
+ {
+
+ /// <summary>Number of documents tested. </summary>
+ public int docCount = 0;
+
+ /// <summary>Total number of term vectors tested. </summary>
+ public long totVectors = 0;
+
+ /// <summary>Exception thrown during term vector test (null on success) </summary>
+ public System.Exception error = null;
+ }
+ }
+
+ /// <summary>Create a new CheckIndex on the directory. </summary>
+ public CheckIndex(Directory dir)
+ {
+ this.dir = dir;
+ infoStream = null;
+ }
+
+ /// <summary>Set infoStream where messages should go. If null, no
+ /// messages are printed
+ /// </summary>
+ public virtual void SetInfoStream(StreamWriter @out)
+ {
+ infoStream = @out;
+ }
+
+ private void Msg(System.String msg)
+ {
+ if (infoStream != null)
+ infoStream.WriteLine(msg);
+ }
+
+ private class MySegmentTermDocs:SegmentTermDocs
+ {
+
+ internal int delCount;
+
+ internal MySegmentTermDocs(SegmentReader p):base(p)
+ {
+ }
+
+ public override void Seek(Term term)
+ {
+ base.Seek(term);
+ delCount = 0;
+ }
+
+ protected internal override void SkippingDoc()
+ {
+ delCount++;
+ }
+ }
+
+ /// <summary>Returns a <see cref="Status" /> instance detailing
+ /// the state of the index.
+ ///
+ /// <p/>As this method checks every byte in the index, on a large
+ /// index it can take quite a long time to run.
+ ///
+ /// <p/><b>WARNING</b>: make sure
+ /// you only call this when the index is not opened by any
+ /// writer.
+ /// </summary>
+ public virtual Status CheckIndex_Renamed_Method()
+ {
+ return CheckIndex_Renamed_Method(null);
+ }
+
+ /// <summary>Returns a <see cref="Status" /> instance detailing
+ /// the state of the index.
+ ///
+ /// </summary>
+ /// <param name="onlySegments">list of specific segment names to check
+ ///
+ /// <p/>As this method checks every byte in the specified
+ /// segments, on a large index it can take quite a long
+ /// time to run.
+ ///
+ /// <p/><b>WARNING</b>: make sure
+ /// you only call this when the index is not opened by any
+ /// writer.
+ /// </param>
+ public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments)
+ {
+ System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
+ SegmentInfos sis = new SegmentInfos();
+ Status result = new Status();
+ result.dir = dir;
+ try
+ {
+ sis.Read(dir);
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not read any segments file in directory");
+ result.missingSegments = true;
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ return result;
+ }
+
+ int numSegments = sis.Count;
+ var segmentsFileName = sis.GetCurrentSegmentFileName();
+ IndexInput input = null;
+ try
+ {
+ input = dir.OpenInput(segmentsFileName);
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not open segments file in directory");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ result.cantOpenSegments = true;
+ return result;
+ }
+ int format = 0;
+ try
+ {
+ format = input.ReadInt();
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not read segment file version in directory");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ result.missingSegmentVersion = true;
+ return result;
+ }
+ finally
+ {
+ if (input != null)
+ input.Close();
+ }
+
+ System.String sFormat = "";
+ bool skip = false;
+
+ if (format == SegmentInfos.FORMAT)
+ sFormat = "FORMAT [Lucene Pre-2.1]";
+ if (format == SegmentInfos.FORMAT_LOCKLESS)
+ sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
+ else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
+ sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
+ else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
+ sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
+ else
+ {
+ if (format == SegmentInfos.FORMAT_CHECKSUM)
+ sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_DEL_COUNT)
+ sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_HAS_PROX)
+ sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_USER_DATA)
+ sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
+ else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
+ sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
+ else if (format < SegmentInfos.CURRENT_FORMAT)
+ {
+ sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+ skip = true;
+ }
+ else
+ {
+ sFormat = format + " [Lucene 1.3 or prior]";
+ }
+ }
+
+ result.segmentsFileName = segmentsFileName;
+ result.numSegments = numSegments;
+ result.segmentFormat = sFormat;
+ result.userData = sis.UserData;
+ System.String userDataString;
+ if (sis.UserData.Count > 0)
+ {
+ userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
+ }
+ else
+ {
+ userDataString = "";
+ }
+
+ Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
+
+ if (onlySegments != null)
+ {
+ result.partial = true;
+ if (infoStream != null)
+ infoStream.Write("\nChecking only these segments:");
+ foreach(string s in onlySegments)
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" " + s);
+ }
+ }
+ result.segmentsChecked.AddRange(onlySegments);
+ Msg(":");
+ }
+
+ if (skip)
+ {
+ Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
+ result.toolOutOfDate = true;
+ return result;
+ }
+
+
+ result.newSegments = (SegmentInfos) sis.Clone();
+ result.newSegments.Clear();
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo info = sis.Info(i);
+ if (onlySegments != null && !onlySegments.Contains(info.name))
+ continue;
+ var segInfoStat = new Status.SegmentInfoStatus();
+ result.segmentInfos.Add(segInfoStat);
+ Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+ segInfoStat.name = info.name;
+ segInfoStat.docCount = info.docCount;
+
+ int toLoseDocCount = info.docCount;
+
+ SegmentReader reader = null;
+
+ try
+ {
+ Msg(" compound=" + info.GetUseCompoundFile());
+ segInfoStat.compound = info.GetUseCompoundFile();
+ Msg(" hasProx=" + info.HasProx);
+ segInfoStat.hasProx = info.HasProx;
+ Msg(" numFiles=" + info.Files().Count);
+ segInfoStat.numFiles = info.Files().Count;
+ Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
+ segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
+ IDictionary<string, string> diagnostics = info.Diagnostics;
+ segInfoStat.diagnostics = diagnostics;
+ if (diagnostics.Count > 0)
+ {
+ Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
+ }
+
+ int docStoreOffset = info.DocStoreOffset;
+ if (docStoreOffset != - 1)
+ {
+ Msg(" docStoreOffset=" + docStoreOffset);
+ segInfoStat.docStoreOffset = docStoreOffset;
+ Msg(" docStoreSegment=" + info.DocStoreSegment);
+ segInfoStat.docStoreSegment = info.DocStoreSegment;
+ Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
+ segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
+ }
+ System.String delFileName = info.GetDelFileName();
+ if (delFileName == null)
+ {
+ Msg(" no deletions");
+ segInfoStat.hasDeletions = false;
+ }
+ else
+ {
+ Msg(" has deletions [delFileName=" + delFileName + "]");
+ segInfoStat.hasDeletions = true;
+ segInfoStat.deletionsFileName = delFileName;
+ }
+ if (infoStream != null)
+ infoStream.Write(" test: open reader.........");
+ reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
+
+ segInfoStat.openReaderPassed = true;
+
+ int numDocs = reader.NumDocs();
+ toLoseDocCount = numDocs;
+ if (reader.HasDeletions)
+ {
+ if (reader.deletedDocs.Count() != info.GetDelCount())
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
+ }
+ if (reader.deletedDocs.Count() > reader.MaxDoc)
+ {
+ throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
+ }
+ if (info.docCount - numDocs != info.GetDelCount())
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ segInfoStat.numDeleted = info.docCount - numDocs;
+ Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
+ }
+ else
+ {
+ if (info.GetDelCount() != 0)
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ Msg("OK");
+ }
+ if (reader.MaxDoc != info.docCount)
+ throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
+
+ // Test getFieldNames()
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: fields..............");
+ }
+ ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
+ Msg("OK [" + fieldNames.Count + " fields]");
+ segInfoStat.numFields = fieldNames.Count;
+
+ // Test Field Norms
+ segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
+
+ // Test the Term Index
+ segInfoStat.termIndexStatus = TestTermIndex(info, reader);
+
+ // Test Stored Fields
+ segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
+
+ // Test Term Vectors
+ segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
+
+ // Rethrow the first exception we encountered
+ // This will cause stats for failed segments to be incremented properly
+ if (segInfoStat.fieldNormStatus.error != null)
+ {
+ throw new SystemException("Field Norm test failed");
+ }
+ else if (segInfoStat.termIndexStatus.error != null)
+ {
+ throw new SystemException("Term Index test failed");
+ }
+ else if (segInfoStat.storedFieldStatus.error != null)
+ {
+ throw new SystemException("Stored Field test failed");
+ }
+ else if (segInfoStat.termVectorStatus.error != null)
+ {
+ throw new System.SystemException("Term Vector test failed");
+ }
+
+ Msg("");
+ }
+ catch (System.Exception t)
+ {
+ Msg("FAILED");
+ const string comment = "fixIndex() would remove reference to this segment";
+ Msg(" WARNING: " + comment + "; full exception:");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ Msg("");
+ result.totLoseDocCount += toLoseDocCount;
+ result.numBadSegments++;
+ continue;
+ }
+ finally
+ {
+ if (reader != null)
+ reader.Close();
+ }
+
+ // Keeper
+ result.newSegments.Add((SegmentInfo)info.Clone());
+ }
+
+ if (0 == result.numBadSegments)
+ {
+ result.clean = true;
+ Msg("No problems were detected with this index.\n");
+ }
+ else
+ Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
+
+ return result;
+ }
+
+ /// <summary> Test field norms.</summary>
+ private Status.FieldNormStatus TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader)
+ {
+ var status = new Status.FieldNormStatus();
+
+ try
+ {
+ // Test Field Norms
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: field norms.........");
+ }
+
+ var b = new byte[reader.MaxDoc];
+ foreach(string fieldName in fieldNames)
+ {
+ if (reader.HasNorms(fieldName))
+ {
+ reader.Norms(fieldName, b, 0);
+ ++status.totFields;
+ }
+ }
+
+ Msg("OK [" + status.totFields + " fields]");
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// <summary> Test the term index.</summary>
+ private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
+ {
+ var status = new Status.TermIndexStatus();
+
+ try
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: terms, freq, prox...");
+ }
+
+ TermEnum termEnum = reader.Terms();
+ TermPositions termPositions = reader.TermPositions();
+
+ // Used only to count up # deleted docs for this term
+ var myTermDocs = new MySegmentTermDocs(reader);
+
+ int maxDoc = reader.MaxDoc;
+
+ while (termEnum.Next())
+ {
+ status.termCount++;
+ Term term = termEnum.Term;
+ int docFreq = termEnum.DocFreq();
+ termPositions.Seek(term);
+ int lastDoc = - 1;
+ int freq0 = 0;
+ status.totFreq += docFreq;
+ while (termPositions.Next())
+ {
+ freq0++;
+ int doc = termPositions.Doc;
+ int freq = termPositions.Freq;
+ if (doc <= lastDoc)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+ }
+ if (doc >= maxDoc)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+ }
+
+ lastDoc = doc;
+ if (freq <= 0)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+ }
+
+ int lastPos = - 1;
+ status.totPos += freq;
+ for (int j = 0; j < freq; j++)
+ {
+ int pos = termPositions.NextPosition();
+ if (pos < - 1)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
+ }
+ if (pos < lastPos)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
+ }
+ lastPos = pos;
+ }
+ }
+
+ // Now count how many deleted docs occurred in
+ // this term:
+ int delCount;
+ if (reader.HasDeletions)
+ {
+ myTermDocs.Seek(term);
+ while (myTermDocs.Next())
+ {
+ }
+ delCount = myTermDocs.delCount;
+ }
+ else
+ {
+ delCount = 0;
+ }
+
+ if (freq0 + delCount != docFreq)
+ {
+ throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+ }
+ }
+
+ Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// <summary> Test stored fields for a segment.</summary>
+ private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
+ {
+ var status = new Status.StoredFieldStatus();
+
+ try
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: stored fields.......");
+ }
+
+ // Scan stored fields for all documents
+ for (int j = 0; j < info.docCount; ++j)
+ {
+ if (!reader.IsDeleted(j))
+ {
+ status.docCount++;
+ Document doc = reader.Document(j);
+ status.totFields += doc.GetFields().Count;
+ }
+ }
+
+ // Validate docCount
+ if (status.docCount != reader.NumDocs())
+ {
+ throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
+ }
+
+ Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// <summary> Test term vectors for a segment.</summary>
+ private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
+ {
+ var status = new Status.TermVectorStatus();
+
+ try
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: term vectors........");
+ }
+
+ for (int j = 0; j < info.docCount; ++j)
+ {
+ if (!reader.IsDeleted(j))
+ {
+ status.docCount++;
+ ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
+ if (tfv != null)
+ {
+ status.totVectors += tfv.Length;
+ }
+ }
+ }
+
+ Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// <summary>Repairs the index using previously returned result
+ /// from <see cref="CheckIndex" />. Note that this does not
+ /// remove any of the unreferenced files after it's done;
+ /// you must separately open an <see cref="IndexWriter" />, which
+ /// deletes unreferenced files when it's created.
+ ///
+ /// <p/><b>WARNING</b>: this writes a
+ /// new segments file into the index, effectively removing
+ /// all documents in broken segments from the index.
+ /// BE CAREFUL.
+ ///
+ /// <p/><b>WARNING</b>: Make sure you only call this when the
+ /// index is not opened by any writer.
+ /// </summary>
+ public virtual void FixIndex(Status result)
+ {
+ if (result.partial)
+ throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
+ result.newSegments.Commit(result.dir);
+ }
+
+ private static bool assertsOn;
+
+ private static bool TestAsserts()
+ {
+ assertsOn = true;
+ return true;
+ }
+
+ private static bool AssertsOn()
+ {
+ System.Diagnostics.Debug.Assert(TestAsserts());
+ return assertsOn;
+ }
+
+ /// <summary>Command-line interface to check and fix an index.
+ /// <p/>
+ /// Run it like this:
+ /// <code>
+ /// java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+ /// </code>
+ /// <list type="bullet">
+ /// <item><c>-fix</c>: actually write a new segments_N file, removing any problematic segments</item>
+ /// <item><c>-segment X</c>: only check the specified
+ /// segment(s). This can be specified multiple times,
+ /// to check more than one segment, eg <c>-segment _2
+ /// -segment _a</c>. You can't use this with the -fix
+ /// option.</item>
+ /// </list>
+ /// <p/><b>WARNING</b>: <c>-fix</c> should only be used on an emergency basis as it will cause
+ /// documents (perhaps many) to be permanently removed from the index. Always make
+ /// a backup copy of your index before running this! Do not run this tool on an index
+ /// that is actively being written to. You have been warned!
+ /// <p/> Run without -fix, this tool will open the index, report version information
+ /// and report any exceptions it hits and what action it would take if -fix were
+ /// specified. With -fix, this tool will remove any segments that have issues and
+ /// write a new segments_N file. This means all documents contained in the affected
+ /// segments will be removed.
+ /// <p/>
+ /// This tool exits with exit code 1 if the index cannot be opened or has any
+ /// corruption, else 0.
+ /// </summary>
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+
+ bool doFix = false;
+ var onlySegments = new List<string>();
+ System.String indexPath = null;
+ int i = 0;
+ while (i < args.Length)
+ {
+ if (args[i].Equals("-fix"))
+ {
+ doFix = true;
+ i++;
+ }
+ else if (args[i].Equals("-segment"))
+ {
+ if (i == args.Length - 1)
+ {
+ System.Console.Out.WriteLine("ERROR: missing name for -segment option");
+ System.Environment.Exit(1);
+ }
+ onlySegments.Add(args[i + 1]);
+ i += 2;
+ }
+ else
+ {
+ if (indexPath != null)
+ {
+ System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
+ System.Environment.Exit(1);
+ }
+ indexPath = args[i];
+ i++;
+ }
+ }
+
+ if (indexPath == null)
+ {
+ System.Console.Out.WriteLine("\nERROR: index path not specified");
+ System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n");
+ System.Environment.Exit(1);
+ }
+
+ if (!AssertsOn())
+ System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled");
+
+ if (onlySegments.Count == 0)
+ onlySegments = null;
+ else if (doFix)
+ {
+ System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment");
+ System.Environment.Exit(1);
+ }
+
+ System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n");
+ Directory dir = null;
+ try
+ {
+ dir = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath));
+ }
+ catch (Exception t)
+ {
+ Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
+ Console.Out.WriteLine(t.StackTrace);
+ Environment.Exit(1);
+ }
+
+ var checker = new CheckIndex(dir);
+ var tempWriter = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding)
+ {AutoFlush = true};
+ checker.SetInfoStream(tempWriter);
+
+ Status result = checker.CheckIndex_Renamed_Method(onlySegments);
+ if (result.missingSegments)
+ {
+ System.Environment.Exit(1);
+ }
+
+ if (!result.clean)
+ {
+ if (!doFix)
+ {
+ System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
+ }
+ else
+ {
+ Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+ Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+ for (var s = 0; s < 5; s++)
+ {
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
+ System.Console.Out.WriteLine(" " + (5 - s) + "...");
+ }
+ Console.Out.WriteLine("Writing...");
+ checker.FixIndex(result);
+ Console.Out.WriteLine("OK");
+ Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
+ }
+ }
+ System.Console.Out.WriteLine("");
+
+ int exitCode;
+ if (result != null && result.clean == true)
+ exitCode = 0;
+ else
+ exitCode = 1;
+ System.Environment.Exit(exitCode);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/CompoundFileReader.cs b/src/core/Index/CompoundFileReader.cs
new file mode 100644
index 0000000..74f4fb4
--- /dev/null
+++ b/src/core/Index/CompoundFileReader.cs
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Linq;
+using Lucene.Net.Support;
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using Lock = Lucene.Net.Store.Lock;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// <summary> Class for accessing a compound stream.
+ /// This class implements a directory, but is limited to only read operations.
+ /// Directory methods that would normally modify data throw an exception.
+ /// </summary>
+ public class CompoundFileReader : Directory
+ {
+
+ private readonly int readBufferSize;
+
+ private sealed class FileEntry
+ {
+ internal long offset;
+ internal long length;
+ }
+
+ private bool isDisposed;
+
+ // Base info
+ private readonly Directory directory;
+ private readonly System.String fileName;
+
+ private IndexInput stream;
+ private HashMap<string, FileEntry> entries = new HashMap<string, FileEntry>();
+
+
+ public CompoundFileReader(Directory dir, System.String name):this(dir, name, BufferedIndexInput.BUFFER_SIZE)
+ {
+ }
+
+ public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
+ {
+ directory = dir;
+ fileName = name;
+ this.readBufferSize = readBufferSize;
+
+ bool success = false;
+
+ try
+ {
+ stream = dir.OpenInput(name, readBufferSize);
+
+ // read the directory and init files
+ int count = stream.ReadVInt();
+ FileEntry entry = null;
+ for (int i = 0; i < count; i++)
+ {
+ long offset = stream.ReadLong();
+ System.String id = stream.ReadString();
+
+ if (entry != null)
+ {
+ // set length of the previous entry
+ entry.length = offset - entry.offset;
+ }
+
+ entry = new FileEntry {offset = offset};
+ entries[id] = entry;
+ }
+
+ // set the length of the final entry
+ if (entry != null)
+ {
+ entry.length = stream.Length() - entry.offset;
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success && (stream != null))
+ {
+ try
+ {
+ stream.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ }
+ }
+ }
+
+ public virtual Directory Directory
+ {
+ get { return directory; }
+ }
+
+ public virtual string Name
+ {
+ get { return fileName; }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ lock (this)
+ {
+ if (isDisposed) return;
+ if (disposing)
+ {
+ if (entries != null)
+ {
+ entries.Clear();
+ }
+ if (stream != null)
+ {
+ stream.Close();
+ }
+ }
+
+ entries = null;
+ stream = null;
+ isDisposed = true;
+ }
+ }
+
+ public override IndexInput OpenInput(System.String id)
+ {
+ lock (this)
+ {
+ // Default to readBufferSize passed in when we were opened
+ return OpenInput(id, readBufferSize);
+ }
+ }
+
+ public override IndexInput OpenInput(System.String id, int readBufferSize)
+ {
+ lock (this)
+ {
+ if (stream == null)
+ throw new System.IO.IOException("Stream closed");
+
+ FileEntry entry = entries[id];
+ if (entry == null)
+ throw new System.IO.IOException("No sub-file with id " + id + " found");
+
+ return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize);
+ }
+ }
+
+ /// <summary>Returns an array of strings, one for each file in the directory. </summary>
+ public override System.String[] ListAll()
+ {
+ return entries.Keys.ToArray();
+ }
+
+ /// <summary>Returns true iff a file with the given name exists. </summary>
+ public override bool FileExists(System.String name)
+ {
+ return entries.ContainsKey(name);
+ }
+
+ /// <summary>Returns the time the compound file was last modified. </summary>
+ public override long FileModified(System.String name)
+ {
+ return directory.FileModified(fileName);
+ }
+
+ /// <summary>Set the modified time of the compound file to now. </summary>
+ public override void TouchFile(System.String name)
+ {
+ directory.TouchFile(fileName);
+ }
+
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public override void DeleteFile(System.String name)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public void RenameFile(System.String from, System.String to)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary>Returns the length of a file in the directory.</summary>
+ /// <throws> IOException if the file does not exist </throws>
+ public override long FileLength(System.String name)
+ {
+ FileEntry e = entries[name];
+ if (e == null)
+ throw new System.IO.IOException("File " + name + " does not exist");
+ return e.length;
+ }
+
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public override IndexOutput CreateOutput(System.String name)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public override Lock MakeLock(System.String name)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary>Implementation of an IndexInput that reads from a portion of the
+ /// compound file. The visibility is left as "package" *only* because
+ /// this helps with testing since JUnit test cases in a different class
+ /// can then access package fields of this class.
+ /// </summary>
+ public /*internal*/ sealed class CSIndexInput : BufferedIndexInput
+ {
+ internal IndexInput base_Renamed;
+ internal long fileOffset;
+ internal long length;
+
+ private bool isDisposed;
+
+ internal CSIndexInput(IndexInput @base, long fileOffset, long length):this(@base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE)
+ {
+ }
+
+ internal CSIndexInput(IndexInput @base, long fileOffset, long length, int readBufferSize):base(readBufferSize)
+ {
+ this.base_Renamed = (IndexInput) @base.Clone();
+ this.fileOffset = fileOffset;
+ this.length = length;
+ }
+
+ public override System.Object Clone()
+ {
+ var clone = (CSIndexInput) base.Clone();
+ clone.base_Renamed = (IndexInput) base_Renamed.Clone();
+ clone.fileOffset = fileOffset;
+ clone.length = length;
+ return clone;
+ }
+
+ /// <summary>Expert: implements buffer refill. Reads bytes from the current
+ /// position in the input.
+ /// </summary>
+ /// <param name="b">the array to read bytes into
+ /// </param>
+ /// <param name="offset">the offset in the array to start storing bytes
+ /// </param>
+ /// <param name="len">the number of bytes to read
+ /// </param>
+ public override void ReadInternal(byte[] b, int offset, int len)
+ {
+ long start = FilePointer;
+ if (start + len > length)
+ throw new System.IO.IOException("read past EOF");
+ base_Renamed.Seek(fileOffset + start);
+ base_Renamed.ReadBytes(b, offset, len, false);
+ }
+
+ /// <summary>Expert: implements seek. Sets current position in this file, where
+ /// the next <see cref="ReadInternal(byte[],int,int)" /> will occur.
+ /// </summary>
+ /// <seealso cref="ReadInternal(byte[],int,int)">
+ /// </seealso>
+ public override void SeekInternal(long pos)
+ {
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (base_Renamed != null)
+ {
+ base_Renamed.Close();
+ }
+ }
+
+ isDisposed = true;
+ }
+
+ public override long Length()
+ {
+ return length;
+ }
+
+ public IndexInput base_Renamed_ForNUnit
+ {
+ get { return base_Renamed; }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/CompoundFileWriter.cs b/src/core/Index/CompoundFileWriter.cs
new file mode 100644
index 0000000..e2905e1
--- /dev/null
+++ b/src/core/Index/CompoundFileWriter.cs
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// <summary> Combines multiple files into a single compound file.
+ /// The file format:<br/>
+ /// <list type="bullet">
+ /// <item>VInt fileCount</item>
+ /// <item>{Directory}
+ /// fileCount entries with the following structure:</item>
+ /// <list type="bullet">
+ /// <item>long dataOffset</item>
+ /// <item>String fileName</item>
+ /// </list>
+ /// <item>{File Data}
+ /// fileCount entries with the raw data of the corresponding file</item>
+ /// </list>
+ ///
+ /// The fileCount integer indicates how many files are contained in this compound
+ /// file. The {directory} that follows has that many entries. Each directory entry
+ /// contains a long pointer to the start of this file's data section, and a String
+ /// with that file's name.
+ /// </summary>
+ public sealed class CompoundFileWriter : IDisposable
+ {
+
+ private sealed class FileEntry
+ {
+ /// <summary>source file </summary>
+ internal System.String file;
+
+ /// <summary>temporary holder for the start of directory entry for this file </summary>
+ internal long directoryOffset;
+
+ /// <summary>temporary holder for the start of this file's data section </summary>
+ internal long dataOffset;
+ }
+
+
+ private readonly Directory directory;
+ private readonly String fileName;
+ private readonly HashSet<string> ids;
+ private readonly LinkedList<FileEntry> entries;
+ private bool merged = false;
+ private readonly SegmentMerger.CheckAbort checkAbort;
+
+ /// <summary>Create the compound stream in the specified file. The file name is the
+ /// entire name (no extensions are added).
+ /// </summary>
+ /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws>
+ public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null)
+ {
+ }
+
+ internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort)
+ {
+ if (dir == null)
+ throw new ArgumentNullException("dir");
+ if (name == null)
+ throw new ArgumentNullException("name");
+ this.checkAbort = checkAbort;
+ directory = dir;
+ fileName = name;
+ ids = new HashSet<string>();
+ entries = new LinkedList<FileEntry>();
+ }
+
+ /// <summary>Returns the directory of the compound file. </summary>
+ public Directory Directory
+ {
+ get { return directory; }
+ }
+
+ /// <summary>Returns the name of the compound file. </summary>
+ public string Name
+ {
+ get { return fileName; }
+ }
+
+ /// <summary>Add a source stream. <c>file</c> is the string by which the
+ /// sub-stream will be known in the compound stream.
+ ///
+ /// </summary>
+ /// <throws> IllegalStateException if this writer is closed </throws>
+ /// <throws> NullPointerException if <c>file</c> is null </throws>
+ /// <throws> IllegalArgumentException if a file with the same name </throws>
+ /// <summary> has been added already
+ /// </summary>
+ public void AddFile(String file)
+ {
+ if (merged)
+ throw new InvalidOperationException("Can't add extensions after merge has been called");
+
+ if (file == null)
+ throw new ArgumentNullException("file");
+
+ try
+ {
+ ids.Add(file);
+ }
+ catch (Exception)
+ {
+ throw new ArgumentException("File " + file + " already added");
+ }
+
+ var entry = new FileEntry {file = file};
+ entries.AddLast(entry);
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary>Merge files with the extensions added up to now.
+ /// All files with these extensions are combined sequentially into the
+ /// compound stream. After successful merge, the source files
+ /// are deleted.
+ /// </summary>
+ /// <throws> IllegalStateException if close() had been called before or </throws>
+ /// <summary> if no file has been added to this object
+ /// </summary>
+ public void Dispose()
+ {
+ // Extract into protected method if class ever becomes unsealed
+
+ // TODO: Dispose shouldn't throw exceptions!
+ if (merged)
+ throw new SystemException("Merge already performed");
+
+ if ((entries.Count == 0))
+ throw new SystemException("No entries to merge have been defined");
+
+ merged = true;
+
+ // open the compound stream
+ IndexOutput os = null;
+ try
+ {
+ os = directory.CreateOutput(fileName);
+
+ // Write the number of entries
+ os.WriteVInt(entries.Count);
+
+ // Write the directory with all offsets at 0.
+ // Remember the positions of directory entries so that we can
+ // adjust the offsets later
+ long totalSize = 0;
+ foreach (FileEntry fe in entries)
+ {
+ fe.directoryOffset = os.FilePointer;
+ os.WriteLong(0); // for now
+ os.WriteString(fe.file);
+ totalSize += directory.FileLength(fe.file);
+ }
+
+ // Pre-allocate size of file as optimization --
+ // this can potentially help IO performance as
+ // we write the file and also later during
+ // searching. It also uncovers a disk-full
+ // situation earlier and hopefully without
+ // actually filling disk to 100%:
+ long finalLength = totalSize + os.FilePointer;
+ os.SetLength(finalLength);
+
+ // Open the files and copy their data into the stream.
+ // Remember the locations of each file's data section.
+ var buffer = new byte[16384];
+ foreach (FileEntry fe in entries)
+ {
+ fe.dataOffset = os.FilePointer;
+ CopyFile(fe, os, buffer);
+ }
+
+ // Write the data offsets into the directory of the compound stream
+ foreach (FileEntry fe in entries)
+ {
+ os.Seek(fe.directoryOffset);
+ os.WriteLong(fe.dataOffset);
+ }
+
+ System.Diagnostics.Debug.Assert(finalLength == os.Length);
+
+ // Close the output stream. Set the os to null before trying to
+ // close so that if an exception occurs during the close, the
+ // finally clause below will not attempt to close the stream
+ // the second time.
+ IndexOutput tmp = os;
+ os = null;
+ tmp.Close();
+ }
+ finally
+ {
+ if (os != null)
+ try
+ {
+ os.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ }
+ }
+
+
+ /// <summary>Copy the contents of the file with specified extension into the
+ /// provided output stream. Use the provided buffer for moving data
+ /// to reduce memory allocation.
+ /// </summary>
+ private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
+ {
+ IndexInput isRenamed = null;
+ try
+ {
+ long startPtr = os.FilePointer;
+
+ isRenamed = directory.OpenInput(source.file);
+ long length = isRenamed.Length();
+ long remainder = length;
+ int chunk = buffer.Length;
+
+ while (remainder > 0)
+ {
+ var len = (int) Math.Min(chunk, remainder);
+ isRenamed.ReadBytes(buffer, 0, len, false);
+ os.WriteBytes(buffer, len);
+ remainder -= len;
+ if (checkAbort != null)
+ // Roughly every 2 MB we will check if
+ // it's time to abort
+ checkAbort.Work(80);
+ }
+
+ // Verify that remainder is 0
+ if (remainder != 0)
+ throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
+
+ // Verify that the output length diff is equal to original file
+ long endPtr = os.FilePointer;
+ long diff = endPtr - startPtr;
+ if (diff != length)
+ throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
+ }
+ finally
+ {
+ if (isRenamed != null)
+ isRenamed.Close();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ConcurrentMergeScheduler.cs b/src/core/Index/ConcurrentMergeScheduler.cs
new file mode 100644
index 0000000..8b8a300
--- /dev/null
+++ b/src/core/Index/ConcurrentMergeScheduler.cs
@@ -0,0 +1,504 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>A <see cref="MergeScheduler" /> that runs each merge using a
+ /// separate thread, up until a maximum number of threads
+ /// (<see cref="MaxThreadCount" />) at which when a merge is
+ /// needed, the thread(s) that are updating the index will
+ /// pause until one or more merges completes. This is a
+ /// simple way to use concurrency in the indexing process
+ /// without having to create and manage application level
+ /// threads.
+ /// </summary>
+
+ public class ConcurrentMergeScheduler:MergeScheduler
+ {
+
+ private int mergeThreadPriority = - 1;
+
+ protected internal IList<MergeThread> mergeThreads = new List<MergeThread>();
+
+ // Max number of threads allowed to be merging at once
+ private int _maxThreadCount = 1;
+
+ protected internal Directory dir;
+
+ private bool closed;
+ protected internal IndexWriter writer;
+ protected internal int mergeThreadCount;
+
+ public ConcurrentMergeScheduler()
+ {
+ if (allInstances != null)
+ {
+ // Only for testing
+ AddMyself();
+ }
+ }
+
+ /// <summary>Gets or sets the max # simultaneous threads that may be
+ /// running. If a merge is necessary yet we already have
+ /// this many threads running, the incoming thread (that
+ /// is calling add/updateDocument) will block until
+ /// a merge thread has completed.
+ /// </summary>
+ public virtual int MaxThreadCount
+ {
+ set
+ {
+ if (value < 1)
+ throw new System.ArgumentException("count should be at least 1");
+ _maxThreadCount = value;
+ }
+ get { return _maxThreadCount; }
+ }
+
+ /// <summary>Return the priority that merge threads run at. By
+ /// default the priority is 1 plus the priority of (ie,
+ /// slightly higher priority than) the first thread that
+ /// calls merge.
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual int GetMergeThreadPriority()
+ {
+ lock (this)
+ {
+ InitMergeThreadPriority();
+ return mergeThreadPriority;
+ }
+ }
+
+ /// <summary>Set the priority that merge threads run at. </summary>
+ public virtual void SetMergeThreadPriority(int pri)
+ {
+ lock (this)
+ {
+ if (pri > (int) System.Threading.ThreadPriority.Highest || pri < (int) System.Threading.ThreadPriority.Lowest)
+ throw new System.ArgumentException("priority must be in range " + (int) System.Threading.ThreadPriority.Lowest + " .. " + (int) System.Threading.ThreadPriority.Highest + " inclusive");
+ mergeThreadPriority = pri;
+
+ int numThreads = MergeThreadCount();
+ for (int i = 0; i < numThreads; i++)
+ {
+ MergeThread merge = mergeThreads[i];
+ merge.SetThreadPriority(pri);
+ }
+ }
+ }
+
+ private bool Verbose()
+ {
+ return writer != null && writer.Verbose;
+ }
+
+ private void Message(System.String message)
+ {
+ if (Verbose())
+ writer.Message("CMS: " + message);
+ }
+
+ private void InitMergeThreadPriority()
+ {
+ lock (this)
+ {
+ if (mergeThreadPriority == - 1)
+ {
+ // Default to slightly higher priority than our
+ // calling thread
+ mergeThreadPriority = 1 + (System.Int32) ThreadClass.Current().Priority;
+ if (mergeThreadPriority > (int) System.Threading.ThreadPriority.Highest)
+ mergeThreadPriority = (int) System.Threading.ThreadPriority.Highest;
+ }
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ //if (disposing)
+ //{
+ closed = true;
+ //}
+ }
+
+ public virtual void Sync()
+ {
+ lock (this)
+ {
+ while (MergeThreadCount() > 0)
+ {
+ if (Verbose())
+ Message("now wait for threads; currently " + mergeThreads.Count + " still running");
+ int count = mergeThreads.Count;
+ if (Verbose())
+ {
+ for (int i = 0; i < count; i++)
+ Message(" " + i + ": " + mergeThreads[i]);
+ }
+
+ System.Threading.Monitor.Wait(this);
+
+ }
+ }
+ }
+
+ private int MergeThreadCount()
+ {
+ lock (this)
+ {
+ int count = 0;
+ int numThreads = mergeThreads.Count;
+ for (int i = 0; i < numThreads; i++)
+ {
+ if (mergeThreads[i].IsAlive)
+ {
+ count++;
+ }
+ }
+ return count;
+ }
+ }
+
+ public override void Merge(IndexWriter writer)
+ {
+ // TODO: .NET doesn't support this
+ // assert !Thread.holdsLock(writer);
+
+ this.writer = writer;
+
+ InitMergeThreadPriority();
+
+ dir = writer.Directory;
+
+ // First, quickly run through the newly proposed merges
+ // and add any orthogonal merges (ie a merge not
+ // involving segments already pending to be merged) to
+ // the queue. If we are way behind on merging, many of
+ // these newly proposed merges will likely already be
+ // registered.
+
+ if (Verbose())
+ {
+ Message("now merge");
+ Message(" index: " + writer.SegString());
+ }
+
+ // Iterate, pulling from the IndexWriter's queue of
+ // pending merges, until it's empty:
+ while (true)
+ {
+ // TODO: we could be careful about which merges to do in
+ // the BG (eg maybe the "biggest" ones) vs FG, which
+ // merges to do first (the easiest ones?), etc.
+
+ MergePolicy.OneMerge merge = writer.GetNextMerge();
+ if (merge == null)
+ {
+ if (Verbose())
+ Message(" no more merges pending; now return");
+ return ;
+ }
+
+ // We do this w/ the primary thread to keep
+ // deterministic assignment of segment names
+ writer.MergeInit(merge);
+
+ bool success = false;
+ try
+ {
+ lock (this)
+ {
+ while (MergeThreadCount() >= _maxThreadCount)
+ {
+ if (Verbose())
+ Message(" too many merge threads running; stalling...");
+
+ System.Threading.Monitor.Wait(this);
+
+
+ }
+
+ if (Verbose())
+ Message(" consider merge " + merge.SegString(dir));
+
+ System.Diagnostics.Debug.Assert(MergeThreadCount() < _maxThreadCount);
+
+ // OK to spawn a new merge thread to handle this
+ // merge:
+ MergeThread merger = GetMergeThread(writer, merge);
+ mergeThreads.Add(merger);
+ if (Verbose())
+ Message(" launch new thread [" + merger.Name + "]");
+
+ merger.Start();
+ success = true;
+ }
+ }
+ finally
+ {
+ if (!success)
+ {
+ writer.MergeFinish(merge);
+ }
+ }
+ }
+ }
+
+ /// <summary>Does the actual merge, by calling <see cref="IndexWriter.Merge" /> </summary>
+ protected internal virtual void DoMerge(MergePolicy.OneMerge merge)
+ {
+ writer.Merge(merge);
+ }
+
+ /// <summary>Create and return a new MergeThread </summary>
+ protected internal virtual MergeThread GetMergeThread(IndexWriter writer, MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+ var thread = new MergeThread(this, writer, merge);
+ thread.SetThreadPriority(mergeThreadPriority);
+ thread.IsBackground = true;
+ thread.Name = "Lucene Merge Thread #" + mergeThreadCount++;
+ return thread;
+ }
+ }
+
+ public /*protected internal*/ class MergeThread:ThreadClass
+ {
+ private void InitBlock(ConcurrentMergeScheduler enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ConcurrentMergeScheduler enclosingInstance;
+ public ConcurrentMergeScheduler Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal IndexWriter writer;
+ internal MergePolicy.OneMerge startMerge;
+ internal MergePolicy.OneMerge runningMerge;
+
+ public MergeThread(ConcurrentMergeScheduler enclosingInstance, IndexWriter writer, MergePolicy.OneMerge startMerge)
+ {
+ InitBlock(enclosingInstance);
+ this.writer = writer;
+ this.startMerge = startMerge;
+ }
+
+ public virtual void SetRunningMerge(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+ runningMerge = merge;
+ }
+ }
+
+ public virtual MergePolicy.OneMerge RunningMerge
+ {
+ get
+ {
+ lock (this)
+ {
+ return runningMerge;
+ }
+ }
+ }
+
+ public virtual void SetThreadPriority(int pri)
+ {
+ try
+ {
+ Priority = (System.Threading.ThreadPriority) pri;
+ }
+ catch (System.NullReferenceException)
+ {
+ // Strangely, Sun's JDK 1.5 on Linux sometimes
+ // throws NPE out of here...
+ }
+ catch (System.Security.SecurityException)
+ {
+ // Ignore this because we will still run fine with
+ // normal thread priority
+ }
+ }
+
+ override public void Run()
+ {
+
+ // First time through the while loop we do the merge
+ // that we were started with:
+ MergePolicy.OneMerge merge = this.startMerge;
+
+ try
+ {
+
+ if (Enclosing_Instance.Verbose())
+ Enclosing_Instance.Message(" merge thread: start");
+
+ while (true)
+ {
+ SetRunningMerge(merge);
+ Enclosing_Instance.DoMerge(merge);
+
+ // Subsequent times through the loop we do any new
+ // merge that writer says is necessary:
+ merge = writer.GetNextMerge();
+ if (merge != null)
+ {
+ writer.MergeInit(merge);
+ if (Enclosing_Instance.Verbose())
+ Enclosing_Instance.Message(" merge thread: do another merge " + merge.SegString(Enclosing_Instance.dir));
+ }
+ else
+ break;
+ }
+
+ if (Enclosing_Instance.Verbose())
+ Enclosing_Instance.Message(" merge thread: done");
+ }
+ catch (System.Exception exc)
+ {
+ // Ignore the exception if it was due to abort:
+ if (!(exc is MergePolicy.MergeAbortedException))
+ {
+ if (!Enclosing_Instance.suppressExceptions)
+ {
+ // suppressExceptions is normally only set during
+ // testing.
+ Lucene.Net.Index.ConcurrentMergeScheduler.anyExceptions = true;
+ Enclosing_Instance.HandleMergeException(exc);
+ }
+ }
+ }
+ finally
+ {
+ lock (Enclosing_Instance)
+ {
+ System.Threading.Monitor.PulseAll(Enclosing_Instance);
+ Enclosing_Instance.mergeThreads.Remove(this);
+ bool removed = !Enclosing_Instance.mergeThreads.Contains(this);
+ System.Diagnostics.Debug.Assert(removed);
+ }
+ }
+ }
+
+ public override System.String ToString()
+ {
+ MergePolicy.OneMerge merge = RunningMerge ?? startMerge;
+ return "merge thread: " + merge.SegString(Enclosing_Instance.dir);
+ }
+ }
+
+ /// <summary>Called when an exception is hit in a background merge
+ /// thread
+ /// </summary>
+ protected internal virtual void HandleMergeException(System.Exception exc)
+ {
+ // When an exception is hit during merge, IndexWriter
+ // removes any partial files and then allows another
+ // merge to run. If whatever caused the error is not
+ // transient then the exception will keep happening,
+ // so, we sleep here to avoid saturating CPU in such
+ // cases:
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 250));
+
+ throw new MergePolicy.MergeException(exc, dir);
+ }
+
+ internal static bool anyExceptions = false;
+
+ /// <summary>Used for testing </summary>
+ public static bool AnyUnhandledExceptions()
+ {
+ if (allInstances == null)
+ {
+ throw new System.SystemException("setTestMode() was not called; often this is because your test case's setUp method fails to call super.setUp in LuceneTestCase");
+ }
+ lock (allInstances)
+ {
+ int count = allInstances.Count;
+ // Make sure all outstanding threads are done so we see
+ // any exceptions they may produce:
+ for (int i = 0; i < count; i++)
+ allInstances[i].Sync();
+ bool v = anyExceptions;
+ anyExceptions = false;
+ return v;
+ }
+ }
+
+ public static void ClearUnhandledExceptions()
+ {
+ lock (allInstances)
+ {
+ anyExceptions = false;
+ }
+ }
+
+ /// <summary>Used for testing </summary>
+ private void AddMyself()
+ {
+ lock (allInstances)
+ {
+ int size = allInstances.Count;
+ int upto = 0;
+ for (int i = 0; i < size; i++)
+ {
+ ConcurrentMergeScheduler other = allInstances[i];
+ if (!(other.closed && 0 == other.MergeThreadCount()))
+ // Keep this one for now: it still has threads or
+ // may spawn new threads
+ allInstances[upto++] = other;
+ }
+ allInstances.RemoveRange(upto, allInstances.Count - upto);
+ allInstances.Add(this);
+ }
+ }
+
+ private bool suppressExceptions;
+
+ /// <summary>Used for testing </summary>
+ public /*internal*/ virtual void SetSuppressExceptions()
+ {
+ suppressExceptions = true;
+ }
+
+ /// <summary>Used for testing </summary>
+ public /*internal*/ virtual void ClearSuppressExceptions()
+ {
+ suppressExceptions = false;
+ }
+
+ /// <summary>Used for testing </summary>
+ private static List<ConcurrentMergeScheduler> allInstances;
+ public static void SetTestMode()
+ {
+ allInstances = new List<ConcurrentMergeScheduler>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/CorruptIndexException.cs b/src/core/Index/CorruptIndexException.cs
new file mode 100644
index 0000000..d846cb3
--- /dev/null
+++ b/src/core/Index/CorruptIndexException.cs
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This exception is thrown when Lucene detects
+ /// an inconsistency in the index.
+ /// </summary>
+ [Serializable]
+ public class CorruptIndexException:System.IO.IOException
+ {
+ public CorruptIndexException(String message):base(message)
+ {
+ }
+ public CorruptIndexException(String message, Exception exp):base(message, exp)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DefaultSkipListReader.cs b/src/core/Index/DefaultSkipListReader.cs
new file mode 100644
index 0000000..a1cddde
--- /dev/null
+++ b/src/core/Index/DefaultSkipListReader.cs
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Support;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Implements the skip list reader for the default posting list format
+ /// that stores positions and payloads.
+ ///
+ /// </summary>
+ class DefaultSkipListReader:MultiLevelSkipListReader
+ {
+ private bool currentFieldStoresPayloads;
+ private readonly long[] freqPointer;
+ private readonly long[] proxPointer;
+ private readonly int[] payloadLength;
+
+ private long lastFreqPointer;
+ private long lastProxPointer;
+ private int lastPayloadLength;
+
+
+ internal DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval):base(skipStream, maxSkipLevels, skipInterval)
+ {
+ freqPointer = new long[maxSkipLevels];
+ proxPointer = new long[maxSkipLevels];
+ payloadLength = new int[maxSkipLevels];
+ }
+
+ internal virtual void Init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, bool storesPayloads)
+ {
+ base.Init(skipPointer, df);
+ this.currentFieldStoresPayloads = storesPayloads;
+ lastFreqPointer = freqBasePointer;
+ lastProxPointer = proxBasePointer;
+
+ for (int i = 0; i < freqPointer.Length; i++) freqPointer[i] = freqBasePointer;
+ for (int i = 0; i < proxPointer.Length; i++) proxPointer[i] = proxBasePointer;
+ for (int i = 0; i < payloadLength.Length; i++) payloadLength[i] = 0;
+ }
+
+ /// <summary>Returns the freq pointer of the doc to which the last call of
+ /// <see cref="MultiLevelSkipListReader.SkipTo(int)" /> has skipped.
+ /// </summary>
+ internal virtual long GetFreqPointer()
+ {
+ return lastFreqPointer;
+ }
+
+ /// <summary>Returns the prox pointer of the doc to which the last call of
+ /// <see cref="MultiLevelSkipListReader.SkipTo(int)" /> has skipped.
+ /// </summary>
+ internal virtual long GetProxPointer()
+ {
+ return lastProxPointer;
+ }
+
+ /// <summary>Returns the payload length of the payload stored just before
+ /// the doc to which the last call of <see cref="MultiLevelSkipListReader.SkipTo(int)" />
+ /// has skipped.
+ /// </summary>
+ internal virtual int GetPayloadLength()
+ {
+ return lastPayloadLength;
+ }
+
+ protected internal override void SeekChild(int level)
+ {
+ base.SeekChild(level);
+ freqPointer[level] = lastFreqPointer;
+ proxPointer[level] = lastProxPointer;
+ payloadLength[level] = lastPayloadLength;
+ }
+
+ protected internal override void SetLastSkipData(int level)
+ {
+ base.SetLastSkipData(level);
+ lastFreqPointer = freqPointer[level];
+ lastProxPointer = proxPointer[level];
+ lastPayloadLength = payloadLength[level];
+ }
+
+
+ protected internal override int ReadSkipData(int level, IndexInput skipStream)
+ {
+ int delta;
+ if (currentFieldStoresPayloads)
+ {
+ // the current field stores payloads.
+ // if the doc delta is odd then we have
+ // to read the current payload length
+ // because it differs from the length of the
+ // previous payload
+ delta = skipStream.ReadVInt();
+ if ((delta & 1) != 0)
+ {
+ payloadLength[level] = skipStream.ReadVInt();
+ }
+ delta = Number.URShift(delta, 1);
+ }
+ else
+ {
+ delta = skipStream.ReadVInt();
+ }
+ freqPointer[level] += skipStream.ReadVInt();
+ proxPointer[level] += skipStream.ReadVInt();
+
+ return delta;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DefaultSkipListWriter.cs b/src/core/Index/DefaultSkipListWriter.cs
new file mode 100644
index 0000000..77412af
--- /dev/null
+++ b/src/core/Index/DefaultSkipListWriter.cs
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// <summary> Implements the skip list writer for the default posting list format
+ /// that stores positions and payloads.
+ ///
+ /// </summary>
+ class DefaultSkipListWriter:MultiLevelSkipListWriter
+ {
+ private int[] lastSkipDoc;
+ private int[] lastSkipPayloadLength;
+ private long[] lastSkipFreqPointer;
+ private long[] lastSkipProxPointer;
+
+ private IndexOutput freqOutput;
+ private IndexOutput proxOutput;
+
+ private int curDoc;
+ private bool curStorePayloads;
+ private int curPayloadLength;
+ private long curFreqPointer;
+ private long curProxPointer;
+
+ internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput):base(skipInterval, numberOfSkipLevels, docCount)
+ {
+ this.freqOutput = freqOutput;
+ this.proxOutput = proxOutput;
+
+ lastSkipDoc = new int[numberOfSkipLevels];
+ lastSkipPayloadLength = new int[numberOfSkipLevels];
+ lastSkipFreqPointer = new long[numberOfSkipLevels];
+ lastSkipProxPointer = new long[numberOfSkipLevels];
+ }
+
+ internal virtual void SetFreqOutput(IndexOutput freqOutput)
+ {
+ this.freqOutput = freqOutput;
+ }
+
+ internal virtual void SetProxOutput(IndexOutput proxOutput)
+ {
+ this.proxOutput = proxOutput;
+ }
+
+ /// <summary> Sets the values for the current skip data. </summary>
+ internal virtual void SetSkipData(int doc, bool storePayloads, int payloadLength)
+ {
+ this.curDoc = doc;
+ this.curStorePayloads = storePayloads;
+ this.curPayloadLength = payloadLength;
+ this.curFreqPointer = freqOutput.FilePointer;
+ if (proxOutput != null)
+ this.curProxPointer = proxOutput.FilePointer;
+ }
+
+ protected internal override void ResetSkip()
+ {
+ base.ResetSkip();
+ for (int i = 0; i < lastSkipDoc.Length; i++) lastSkipDoc[i] = 0;
+ for (int i = 0; i < lastSkipPayloadLength.Length; i++) lastSkipPayloadLength[i] = -1; // we don't have to write the first length in the skip list
+ for (int i = 0; i < lastSkipFreqPointer.Length; i++) lastSkipFreqPointer[i] = freqOutput.FilePointer;
+ if (proxOutput != null)
+ for (int i = 0; i < lastSkipProxPointer.Length; i++) lastSkipProxPointer[i] = proxOutput.FilePointer;
+ }
+
+ protected internal override void WriteSkipData(int level, IndexOutput skipBuffer)
+ {
+ // To efficiently store payloads in the posting lists we do not store the length of
+ // every payload. Instead we omit the length for a payload if the previous payload had
+ // the same length.
+ // However, in order to support skipping the payload length at every skip point must be known.
+ // So we use the same length encoding that we use for the posting lists for the skip data as well:
+ // Case 1: current field does not store payloads
+ // SkipDatum --> DocSkip, FreqSkip, ProxSkip
+ // DocSkip,FreqSkip,ProxSkip --> VInt
+ // DocSkip records the document number before every SkipInterval th document in TermFreqs.
+ // Document numbers are represented as differences from the previous value in the sequence.
+ // Case 2: current field stores payloads
+ // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
+ // DocSkip,FreqSkip,ProxSkip --> VInt
+ // PayloadLength --> VInt
+ // In this case DocSkip/2 is the difference between
+ // the current and the previous value. If DocSkip
+ // is odd, then a PayloadLength encoded as VInt follows,
+ // if DocSkip is even, then it is assumed that the
+ // current payload length equals the length at the previous
+ // skip point
+ if (curStorePayloads)
+ {
+ int delta = curDoc - lastSkipDoc[level];
+ if (curPayloadLength == lastSkipPayloadLength[level])
+ {
+ // the current payload length equals the length at the previous skip point,
+ // so we don't store the length again
+ skipBuffer.WriteVInt(delta * 2);
+ }
+ else
+ {
+ // the payload length is different from the previous one. We shift the DocSkip,
+ // set the lowest bit and store the current payload length as VInt.
+ skipBuffer.WriteVInt(delta * 2 + 1);
+ skipBuffer.WriteVInt(curPayloadLength);
+ lastSkipPayloadLength[level] = curPayloadLength;
+ }
+ }
+ else
+ {
+ // current field does not store payloads
+ skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]);
+ }
+ skipBuffer.WriteVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
+ skipBuffer.WriteVInt((int) (curProxPointer - lastSkipProxPointer[level]));
+
+ lastSkipDoc[level] = curDoc;
+ //System.out.println("write doc at level " + level + ": " + curDoc);
+
+ lastSkipFreqPointer[level] = curFreqPointer;
+ lastSkipProxPointer[level] = curProxPointer;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DirectoryReader.cs b/src/core/Index/DirectoryReader.cs
new file mode 100644
index 0000000..574448d
--- /dev/null
+++ b/src/core/Index/DirectoryReader.cs
@@ -0,0 +1,1548 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using Directory = Lucene.Net.Store.Directory;
+using Lock = Lucene.Net.Store.Lock;
+using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> An IndexReader which reads indexes with multiple segments.</summary>
+ public class DirectoryReader:IndexReader
+ {
+ /*new*/ private class AnonymousClassFindSegmentsFile:SegmentInfos.FindSegmentsFile
+ {
+ private void InitBlock(bool readOnly, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor)
+ {
+ this.readOnly = readOnly;
+ this.deletionPolicy = deletionPolicy;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+ }
+ private bool readOnly;
+ private IndexDeletionPolicy deletionPolicy;
+ private int termInfosIndexDivisor;
+ internal AnonymousClassFindSegmentsFile(bool readOnly, Lucene.Net.Index.IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor, Lucene.Net.Store.Directory Param1):base(Param1)
+ {
+ InitBlock(readOnly, deletionPolicy, termInfosIndexDivisor);
+ }
+ public /*protected internal*/ override System.Object DoBody(System.String segmentFileName)
+ {
+ var infos = new SegmentInfos();
+ infos.Read(directory, segmentFileName);
+ if (readOnly)
+ return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor);
+ else
+ return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor);
+ }
+ }
+ private class AnonymousClassFindSegmentsFile1:SegmentInfos.FindSegmentsFile
+ {
+ private void InitBlock(bool openReadOnly, DirectoryReader enclosingInstance)
+ {
+ this.openReadOnly = openReadOnly;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private bool openReadOnly;
+ private DirectoryReader enclosingInstance;
+ public DirectoryReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFindSegmentsFile1(bool openReadOnly, DirectoryReader enclosingInstance, Lucene.Net.Store.Directory Param1):base(Param1)
+ {
+ InitBlock(openReadOnly, enclosingInstance);
+ }
+ public /*protected internal*/ override System.Object DoBody(System.String segmentFileName)
+ {
+ var infos = new SegmentInfos();
+ infos.Read(directory, segmentFileName);
+ return Enclosing_Instance.DoReopen(infos, false, openReadOnly);
+ }
+ }
+ protected internal Directory internalDirectory;
+ protected internal bool readOnly;
+
+ internal IndexWriter writer;
+
+ private IndexDeletionPolicy deletionPolicy;
+ private readonly HashSet<string> synced = new HashSet<string>();
+ private Lock writeLock;
+ private readonly SegmentInfos segmentInfos;
+ private readonly SegmentInfos segmentInfosStart;
+ private bool stale;
+ private readonly int termInfosIndexDivisor;
+
+ private bool rollbackHasChanges;
+
+ private SegmentReader[] subReaders;
+ private int[] starts; // 1st docno for each segment
+ private System.Collections.Generic.IDictionary<string, byte[]> normsCache = new HashMap<string, byte[]>();
+ private int maxDoc = 0;
+ private int numDocs = - 1;
+ private bool hasDeletions = false;
+
+ // Max version in index as of when we opened; this can be
+ // > our current segmentInfos version in case we were
+ // opened on a past IndexCommit:
+ private long maxIndexVersion;
+
+ internal static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor)
+ {
+ return (IndexReader) new AnonymousClassFindSegmentsFile(readOnly, deletionPolicy, termInfosIndexDivisor, directory).Run(commit);
+ }
+
+ /// <summary>Construct reading the named set of readers. </summary>
+ internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
+ {
+ internalDirectory = directory;
+ this.readOnly = readOnly;
+ this.segmentInfos = sis;
+ this.deletionPolicy = deletionPolicy;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+
+ if (!readOnly)
+ {
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ synced.UnionWith(sis.Files(directory, true));
+ }
+
+ // To reduce the chance of hitting FileNotFound
+ // (and having to retry), we open segments in
+ // reverse because IndexWriter merges & deletes
+ // the newest segments first.
+
+ var readers = new SegmentReader[sis.Count];
+ for (int i = sis.Count - 1; i >= 0; i--)
+ {
+ bool success = false;
+ try
+ {
+ readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Close all readers we had opened:
+ for (i++; i < sis.Count; i++)
+ {
+ try
+ {
+ readers[i].Close();
+ }
+ catch (System.Exception)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+
+ Initialize(readers);
+ }
+
+ // Used by near real-time search
+ internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor)
+ {
+ this.internalDirectory = writer.Directory;
+ this.readOnly = true;
+ segmentInfos = infos;
+ segmentInfosStart = (SegmentInfos) infos.Clone();
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+ if (!readOnly)
+ {
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ synced.UnionWith(infos.Files(internalDirectory, true));
+ }
+
+ // IndexWriter synchronizes externally before calling
+ // us, which ensures infos will not change; so there's
+ // no need to process segments in reverse order
+ int numSegments = infos.Count;
+ var readers = new SegmentReader[numSegments];
+ Directory dir = writer.Directory;
+ int upto = 0;
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ bool success = false;
+ try
+ {
+ SegmentInfo info = infos.Info(i);
+ if (info.dir == dir)
+ {
+ readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Close all readers we had opened:
+ for (upto--; upto >= 0; upto--)
+ {
+ try
+ {
+ readers[upto].Close();
+ }
+ catch (System.Exception)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+
+ this.writer = writer;
+
+ if (upto < readers.Length)
+ {
+ // This means some segments were in a foreign Directory
+ var newReaders = new SegmentReader[upto];
+ Array.Copy(readers, 0, newReaders, 0, upto);
+ readers = newReaders;
+ }
+
+ Initialize(readers);
+ }
+
+ /// <summary>This constructor is only used for <see cref="Reopen()" /> </summary>
+ internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
+ IEnumerable<KeyValuePair<string, byte[]>> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor)
+ {
+ this.internalDirectory = directory;
+ this.readOnly = readOnly;
+ this.segmentInfos = infos;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+ if (!readOnly)
+ {
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ synced.UnionWith(infos.Files(directory, true));
+ }
+
+ // we put the old SegmentReaders in a map, that allows us
+ // to lookup a reader using its segment name
+ IDictionary<string, int> segmentReaders = new HashMap<string, int>();
+
+ if (oldReaders != null)
+ {
+ // create a Map SegmentName->SegmentReader
+ for (int i = 0; i < oldReaders.Length; i++)
+ {
+ segmentReaders[oldReaders[i].SegmentName] = i;
+ }
+ }
+
+ var newReaders = new SegmentReader[infos.Count];
+
+ // remember which readers are shared between the old and the re-opened
+ // DirectoryReader - we have to incRef those readers
+ var readerShared = new bool[infos.Count];
+
+ for (int i = infos.Count - 1; i >= 0; i--)
+ {
+ // find SegmentReader for this segment
+ if (!segmentReaders.ContainsKey(infos.Info(i).name))
+ {
+ // this is a new segment, no old SegmentReader can be reused
+ newReaders[i] = null;
+ }
+ else
+ {
+ // there is an old reader for this segment - we'll try to reopen it
+ newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]];
+ }
+
+ bool success = false;
+ try
+ {
+ SegmentReader newReader;
+ if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile())
+ {
+
+ // We should never see a totally new segment during cloning
+ System.Diagnostics.Debug.Assert(!doClone);
+
+ // this is a new reader; in case we hit an exception we can close it safely
+ newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor);
+ }
+ else
+ {
+ newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly);
+ }
+ if (newReader == newReaders[i])
+ {
+ // this reader will be shared between the old and the new one,
+ // so we must incRef it
+ readerShared[i] = true;
+ newReader.IncRef();
+ }
+ else
+ {
+ readerShared[i] = false;
+ newReaders[i] = newReader;
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ for (i++; i < infos.Count; i++)
+ {
+ if (newReaders[i] != null)
+ {
+ try
+ {
+ if (!readerShared[i])
+ {
+ // this is a new subReader that is not used by the old one,
+ // we can close it
+ newReaders[i].Close();
+ }
+ else
+ {
+ // this subReader is also used by the old reader, so instead
+ // closing we must decRef it
+ newReaders[i].DecRef();
+ }
+ }
+ catch (System.IO.IOException)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
+ Initialize(newReaders);
+
+ // try to copy unchanged norms from the old normsCache to the new one
+ if (oldNormsCache != null)
+ {
+ foreach(var entry in oldNormsCache)
+ {
+ String field = entry.Key;
+ if (!HasNorms(field))
+ {
+ continue;
+ }
+
+ byte[] oldBytes = entry.Value;
+
+ var bytes = new byte[MaxDoc];
+
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ int oldReaderIndex = segmentReaders[subReaders[i].SegmentName];
+
+ // this SegmentReader was not re-opened, we can copy all of its norms
+ if (segmentReaders.ContainsKey(subReaders[i].SegmentName) &&
+ (oldReaders[oldReaderIndex] == subReaders[i]
+ || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field]))
+ {
+ // we don't have to synchronize here: either this constructor is called from a SegmentReader,
+ // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
+ // which is synchronized
+ Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]);
+ }
+ else
+ {
+ subReaders[i].Norms(field, bytes, starts[i]);
+ }
+ }
+
+ normsCache[field] = bytes; // update cache
+ }
+ }
+ }
+
+ private void Initialize(SegmentReader[] subReaders)
+ {
+ this.subReaders = subReaders;
+ starts = new int[subReaders.Length + 1]; // build starts array
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ starts[i] = maxDoc;
+ maxDoc += subReaders[i].MaxDoc; // compute maxDocs
+
+ if (subReaders[i].HasDeletions)
+ hasDeletions = true;
+ }
+ starts[subReaders.Length] = maxDoc;
+
+ if (!readOnly)
+ {
+ maxIndexVersion = SegmentInfos.ReadCurrentVersion(internalDirectory);
+ }
+ }
+
+ public override Object Clone()
+ {
+ lock (this)
+ {
+ try
+ {
+ return Clone(readOnly); // Preserve current readOnly
+ }
+ catch (Exception ex)
+ {
+ throw new SystemException(ex.Message, ex); // TODO: why rethrow this way?
+ }
+ }
+ }
+
+ public override IndexReader Clone(bool openReadOnly)
+ {
+ lock (this)
+ {
+ DirectoryReader newReader = DoReopen((SegmentInfos) segmentInfos.Clone(), true, openReadOnly);
+
+ if (this != newReader)
+ {
+ newReader.deletionPolicy = deletionPolicy;
+ }
+ newReader.writer = writer;
+ // If we're cloning a non-readOnly reader, move the
+ // writeLock (if there is one) to the new reader:
+ if (!openReadOnly && writeLock != null)
+ {
+ // In near real-time search, reader is always readonly
+ System.Diagnostics.Debug.Assert(writer == null);
+ newReader.writeLock = writeLock;
+ newReader.hasChanges = hasChanges;
+ newReader.hasDeletions = hasDeletions;
+ writeLock = null;
+ hasChanges = false;
+ }
+
+ return newReader;
+ }
+ }
+
+ public override IndexReader Reopen()
+ {
+ // Preserve current readOnly
+ return DoReopen(readOnly, null);
+ }
+
+ public override IndexReader Reopen(bool openReadOnly)
+ {
+ return DoReopen(openReadOnly, null);
+ }
+
+ public override IndexReader Reopen(IndexCommit commit)
+ {
+ return DoReopen(true, commit);
+ }
+
+ private IndexReader DoReopenFromWriter(bool openReadOnly, IndexCommit commit)
+ {
+ System.Diagnostics.Debug.Assert(readOnly);
+
+ if (!openReadOnly)
+ {
+ throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)");
+ }
+
+ if (commit != null)
+ {
+ throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
+ }
+
+ // TODO: right now we *always* make a new reader; in
+ // the future we could have write make some effort to
+ // detect that no changes have occurred
+ return writer.GetReader();
+ }
+
+ internal virtual IndexReader DoReopen(bool openReadOnly, IndexCommit commit)
+ {
+ EnsureOpen();
+
+ System.Diagnostics.Debug.Assert(commit == null || openReadOnly);
+
+ // If we were obtained by writer.getReader(), re-ask the
+ // writer to get a new reader.
+ if (writer != null)
+ {
+ return DoReopenFromWriter(openReadOnly, commit);
+ }
+ else
+ {
+ return DoReopenNoWriter(openReadOnly, commit);
+ }
+ }
+
+ private IndexReader DoReopenNoWriter(bool openReadOnly, IndexCommit commit)
+ {
+ lock (this)
+ {
+ if (commit == null)
+ {
+ if (hasChanges)
+ {
+ // We have changes, which means we are not readOnly:
+ System.Diagnostics.Debug.Assert(readOnly == false);
+ // and we hold the write lock:
+ System.Diagnostics.Debug.Assert(writeLock != null);
+ // so no other writer holds the write lock, which
+ // means no changes could have been done to the index:
+ System.Diagnostics.Debug.Assert(IsCurrent());
+
+ if (openReadOnly)
+ {
+ return Clone(openReadOnly);
+ }
+ else
+ {
+ return this;
+ }
+ }
+ else if (IsCurrent())
+ {
+ if (openReadOnly != readOnly)
+ {
+ // Just fallback to clone
+ return Clone(openReadOnly);
+ }
+ else
+ {
+ return this;
+ }
+ }
+ }
+ else
+ {
+ if (internalDirectory != commit.Directory)
+ throw new System.IO.IOException("the specified commit does not match the specified Directory");
+ if (segmentInfos != null && commit.SegmentsFileName.Equals(segmentInfos.GetCurrentSegmentFileName()))
+ {
+ if (readOnly != openReadOnly)
+ {
+ // Just fallback to clone
+ return Clone(openReadOnly);
+ }
+ else
+ {
+ return this;
+ }
+ }
+ }
+
+ return (IndexReader)new AnonymousFindSegmentsFile(internalDirectory, openReadOnly, this).Run(commit);
+ }
+ }
+
+ class AnonymousFindSegmentsFile : SegmentInfos.FindSegmentsFile
+ {
+ readonly DirectoryReader enclosingInstance;
+ readonly bool openReadOnly;
+ readonly Directory dir;
+ public AnonymousFindSegmentsFile(Directory directory, bool openReadOnly, DirectoryReader dirReader) : base(directory)
+ {
+ this.dir = directory;
+ this.openReadOnly = openReadOnly;
+ enclosingInstance = dirReader;
+ }
+
+ public override object DoBody(string segmentFileName)
+ {
+ var infos = new SegmentInfos();
+ infos.Read(dir, segmentFileName);
+ return enclosingInstance.DoReopen(infos, false, openReadOnly);
+ }
+ }
+
+ private DirectoryReader DoReopen(SegmentInfos infos, bool doClone, bool openReadOnly)
+ {
+ lock (this)
+ {
+ DirectoryReader reader;
+ if (openReadOnly)
+ {
+ reader = new ReadOnlyDirectoryReader(internalDirectory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor);
+ }
+ else
+ {
+ reader = new DirectoryReader(internalDirectory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor);
+ }
+ return reader;
+ }
+ }
+
+
+ /// <summary>Version number when this IndexReader was opened. </summary>
+ public override long Version
+ {
+ get
+ {
+ EnsureOpen();
+ return segmentInfos.Version;
+ }
+ }
+
+ public override ITermFreqVector[] GetTermFreqVectors(int n)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment
+ }
+
+ public override ITermFreqVector GetTermFreqVector(int n, System.String field)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVector(n - starts[i], field);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], field, mapper);
+ }
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], mapper);
+ }
+
+ /// <summary> Checks is the index is optimized (if it has a single segment and no deletions)</summary>
+ /// <returns> &amp;lt;c&amp;gt;true&amp;lt;/c&amp;gt; if the index is optimized; &amp;lt;c&amp;gt;false&amp;lt;/c&amp;gt; otherwise </returns>
+ public override bool IsOptimized()
+ {
+ EnsureOpen();
+ return segmentInfos.Count == 1 && !HasDeletions;
+ }
+
+ public override int NumDocs()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ // NOTE: multiple threads may wind up init'ing
+ // numDocs... but that's harmless
+ if (numDocs == - 1)
+ {
+ // check cache
+ int n = subReaders.Sum(t => t.NumDocs()); // cache miss--recompute
+ numDocs = n;
+ }
+ return numDocs;
+ }
+
+ public override int MaxDoc
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return maxDoc;
+ }
+ }
+
+ // inherit javadoc
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].Document(n - starts[i], fieldSelector); // dispatch to segment reader
+ }
+
+ public override bool IsDeleted(int n)
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader
+ }
+
+ public override bool HasDeletions
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return hasDeletions;
+ }
+ }
+
+ protected internal override void DoDelete(int n)
+ {
+ numDocs = - 1; // invalidate cache
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].DeleteDocument(n - starts[i]); // dispatch to segment reader
+ hasDeletions = true;
+ }
+
+ protected internal override void DoUndeleteAll()
+ {
+ foreach (SegmentReader t in subReaders)
+ t.UndeleteAll();
+
+ hasDeletions = false;
+ numDocs = - 1; // invalidate cache
+ }
+
+ private int ReaderIndex(int n)
+ {
+ // find reader for doc n:
+ return ReaderIndex(n, this.starts, this.subReaders.Length);
+ }
+
+ internal static int ReaderIndex(int n, int[] starts, int numSubReaders)
+ {
+ // find reader for doc n:
+ int lo = 0; // search starts array
+ int hi = numSubReaders - 1; // for first element less
+
+ while (hi >= lo)
+ {
+ int mid = Number.URShift((lo + hi), 1);
+ int midValue = starts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else
+ {
+ // found a match
+ while (mid + 1 < numSubReaders && starts[mid + 1] == midValue)
+ {
+ mid++; // scan to last match
+ }
+ return mid;
+ }
+ }
+ return hi;
+ }
+
+ public override bool HasNorms(System.String field)
+ {
+ EnsureOpen();
+ return subReaders.Any(t => t.HasNorms(field));
+ }
+
+ public override byte[] Norms(System.String field)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = normsCache[field];
+ if (bytes != null)
+ return bytes; // cache hit
+ if (!HasNorms(field))
+ return null;
+
+ bytes = new byte[MaxDoc];
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].Norms(field, bytes, starts[i]);
+ normsCache[field] = bytes; // update cache
+ return bytes;
+ }
+ }
+
+ public override void Norms(System.String field, byte[] result, int offset)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = normsCache[field];
+ if (bytes == null && !HasNorms(field))
+ {
+ byte val = DefaultSimilarity.EncodeNorm(1.0f);
+ for (int index = offset; index < result.Length; index++)
+ result.SetValue(val, index);
+ }
+ else if (bytes != null)
+ {
+ // cache hit
+ Array.Copy(bytes, 0, result, offset, MaxDoc);
+ }
+ else
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ // read from segments
+ subReaders[i].Norms(field, result, offset + starts[i]);
+ }
+ }
+ }
+ }
+
+ protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
+ {
+ lock (normsCache)
+ {
+ normsCache.Remove(field); // clear cache
+ }
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch
+ }
+
+ public override TermEnum Terms()
+ {
+ EnsureOpen();
+ return new MultiTermEnum(this, subReaders, starts, null);
+ }
+
+ public override TermEnum Terms(Term term)
+ {
+ EnsureOpen();
+ return new MultiTermEnum(this, subReaders, starts, term);
+ }
+
+ public override int DocFreq(Term t)
+ {
+ EnsureOpen();
+ int total = 0; // sum freqs in segments
+ for (int i = 0; i < subReaders.Length; i++)
+ total += subReaders[i].DocFreq(t);
+ return total;
+ }
+
+ public override TermDocs TermDocs()
+ {
+ EnsureOpen();
+ return new MultiTermDocs(this, subReaders, starts);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ EnsureOpen();
+ return new MultiTermPositions(this, subReaders, starts);
+ }
+
+ /// <summary> Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory
+ /// owner.
+ ///
+ /// </summary>
+ /// <throws> StaleReaderException if the index has changed since this reader was opened </throws>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> Lucene.Net.Store.LockObtainFailedException </throws>
+ /// <summary> if another writer has this index open (<c>write.lock</c> could not be
+ /// obtained)
+ /// </summary>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ protected internal override void AcquireWriteLock()
+ {
+
+ if (readOnly)
+ {
+ // NOTE: we should not reach this code w/ the core
+ // IndexReader classes; however, an external subclass
+ // of IndexReader could reach this.
+ ReadOnlySegmentReader.NoWrite();
+ }
+
+ if (segmentInfos != null)
+ {
+ EnsureOpen();
+ if (stale)
+ throw new StaleReaderException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
+
+ if (this.writeLock == null)
+ {
+ Lock writeLock = internalDirectory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
+ if (!writeLock.Obtain(IndexWriter.WRITE_LOCK_TIMEOUT))
+ // obtain write lock
+ {
+ throw new LockObtainFailedException("Index locked for write: " + writeLock);
+ }
+ this.writeLock = writeLock;
+
+ // we have to check whether index has changed since this reader was opened.
+ // if so, this reader is no longer valid for
+ // deletion
+ if (SegmentInfos.ReadCurrentVersion(internalDirectory) > maxIndexVersion)
+ {
+ stale = true;
+ this.writeLock.Release();
+ this.writeLock = null;
+ throw new StaleReaderException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
+ }
+ }
+ }
+ }
+
+ /// <summary> Commit changes resulting from delete, undeleteAll, or setNorm operations
+ /// <p/>
+ /// If an exception is hit, then either no changes or all changes will have been committed to the index (transactional
+ /// semantics).
+ ///
+ /// </summary>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ protected internal override void DoCommit(IDictionary<string, string> commitUserData)
+ {
+ if (hasChanges)
+ {
+ segmentInfos.UserData = commitUserData;
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ var deleter = new IndexFileDeleter(internalDirectory, deletionPolicy ?? new KeepOnlyLastCommitDeletionPolicy(), segmentInfos, null, null, synced);
+
+ segmentInfos.UpdateGeneration(deleter.LastSegmentInfos);
+
+ // Checkpoint the state we are about to change, in
+ // case we have to roll back:
+ StartCommit();
+
+ bool success = false;
+ try
+ {
+ foreach (SegmentReader t in subReaders)
+ t.Commit();
+
+ // Sync all files we just wrote
+ foreach(string fileName in segmentInfos.Files(internalDirectory, false))
+ {
+ if(!synced.Contains(fileName))
+ {
+ System.Diagnostics.Debug.Assert(internalDirectory.FileExists(fileName));
+ internalDirectory.Sync(fileName);
+ synced.Add(fileName);
+ }
+ }
+
+ segmentInfos.Commit(internalDirectory);
+ success = true;
+ }
+ finally
+ {
+
+ if (!success)
+ {
+
+ // Rollback changes that were made to
+ // SegmentInfos but failed to get [fully]
+ // committed. This way this reader instance
+ // remains consistent (matched to what's
+ // actually in the index):
+ RollbackCommit();
+
+ // Recompute deletable files & remove them (so
+ // partially written .del files, etc, are
+ // removed):
+ deleter.Refresh();
+ }
+ }
+
+ // Have the deleter remove any now unreferenced
+ // files due to this commit:
+ deleter.Checkpoint(segmentInfos, true);
+ deleter.Dispose();
+
+ maxIndexVersion = segmentInfos.Version;
+
+ if (writeLock != null)
+ {
+ writeLock.Release(); // release write lock
+ writeLock = null;
+ }
+ }
+ hasChanges = false;
+ }
+
+ internal virtual void StartCommit()
+ {
+ rollbackHasChanges = hasChanges;
+ foreach (SegmentReader t in subReaders)
+ {
+ t.StartCommit();
+ }
+ }
+
+ internal virtual void RollbackCommit()
+ {
+ hasChanges = rollbackHasChanges;
+ foreach (SegmentReader t in subReaders)
+ {
+ t.RollbackCommit();
+ }
+ }
+
+ public override IDictionary<string, string> CommitUserData
+ {
+ get
+ {
+ EnsureOpen();
+ return segmentInfos.UserData;
+ }
+ }
+
+ public override bool IsCurrent()
+ {
+ EnsureOpen();
+ if (writer == null || writer.IsClosed())
+ {
+ // we loaded SegmentInfos from the directory
+ return SegmentInfos.ReadCurrentVersion(internalDirectory) == segmentInfos.Version;
+ }
+ else
+ {
+ return writer.NrtIsCurrent(segmentInfosStart);
+ }
+ }
+
+ protected internal override void DoClose()
+ {
+ lock (this)
+ {
+ System.IO.IOException ioe = null;
+ normsCache = null;
+ foreach (SegmentReader t in subReaders)
+ {
+ // try to close each reader, even if an exception is thrown
+ try
+ {
+ t.DecRef();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (ioe == null)
+ ioe = e;
+ }
+ }
+
+ // NOTE: only needed in case someone had asked for
+ // FieldCache for top-level reader (which is generally
+ // not a good idea):
+ Search.FieldCache_Fields.DEFAULT.Purge(this);
+
+ // throw the first exception
+ if (ioe != null)
+ throw ioe;
+ }
+ }
+
+ public override ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
+ {
+ EnsureOpen();
+ return GetFieldNames(fieldNames, this.subReaders);
+ }
+
+ internal static ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames, IndexReader[] subReaders)
+ {
+ // maintain a unique set of field names
+ ISet<string> fieldSet = Support.Compatibility.SetFactory.CreateHashSet<string>();
+ foreach (IndexReader reader in subReaders)
+ {
+ fieldSet.UnionWith(reader.GetFieldNames(fieldNames));
+ }
+ return fieldSet;
+ }
+
+ public override IndexReader[] GetSequentialSubReaders()
+ {
+ return subReaders;
+ }
+
+ /// <summary>Returns the directory this index resides in. </summary>
+ public override Directory Directory()
+ {
+ // Don't ensureOpen here -- in certain cases, when a
+ // cloned/reopened reader needs to commit, it may call
+ // this method on the closed original reader
+ return internalDirectory;
+ }
+
+ public override int TermInfosIndexDivisor
+ {
+ get { return termInfosIndexDivisor; }
+ }
+
+ /// <summary> Expert: return the IndexCommit that this reader has opened.
+ /// <p/>
+ /// <p/><b>WARNING</b>: this API is new and experimental and may suddenly change.<p/>
+ /// </summary>
+ public override IndexCommit IndexCommit
+ {
+ get { return new ReaderCommit(segmentInfos, internalDirectory); }
+ }
+
+ /// <seealso cref="Lucene.Net.Index.IndexReader.ListCommits">
+ /// </seealso>
+ public static new ICollection<IndexCommit> ListCommits(Directory dir)
+ {
+ String[] files = dir.ListAll();
+
+ ICollection<IndexCommit> commits = new List<IndexCommit>();
+
+ var latest = new SegmentInfos();
+ latest.Read(dir);
+ long currentGen = latest.Generation;
+
+ commits.Add(new ReaderCommit(latest, dir));
+
+ foreach (string fileName in files)
+ {
+ if (fileName.StartsWith(IndexFileNames.SEGMENTS) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN) && SegmentInfos.GenerationFromSegmentsFileName(fileName) < currentGen)
+ {
+
+ var sis = new SegmentInfos();
+ try
+ {
+ // IOException allowed to throw there, in case
+ // segments_N is corrupt
+ sis.Read(dir, fileName);
+ }
+ catch (System.IO.FileNotFoundException)
+ {
+ // LUCENE-948: on NFS (and maybe others), if
+ // you have writers switching back and forth
+ // between machines, it's very likely that the
+ // dir listing will be stale and will claim a
+ // file segments_X exists when in fact it
+ // doesn't. So, we catch this and handle it
+ // as if the file does not exist
+ sis = null;
+ }
+
+ if (sis != null)
+ commits.Add(new ReaderCommit(sis, dir));
+ }
+ }
+
+ return commits;
+ }
+
+ private sealed class ReaderCommit:IndexCommit
+ {
+ private readonly String segmentsFileName;
+ private readonly ICollection<string> files;
+ private readonly Directory dir;
+ private readonly long generation;
+ private readonly long version;
+ private readonly bool isOptimized;
+ private readonly IDictionary<string, string> userData;
+
+ internal ReaderCommit(SegmentInfos infos, Directory dir)
+ {
+ segmentsFileName = infos.GetCurrentSegmentFileName();
+ this.dir = dir;
+ userData = infos.UserData;
+ files = infos.Files(dir, true);
+ version = infos.Version;
+ generation = infos.Generation;
+ isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions();
+ }
+ public override string ToString()
+ {
+ return "DirectoryReader.ReaderCommit(" + segmentsFileName + ")";
+ }
+
+ public override bool IsOptimized
+ {
+ get { return isOptimized; }
+ }
+
+ public override string SegmentsFileName
+ {
+ get { return segmentsFileName; }
+ }
+
+ public override ICollection<string> FileNames
+ {
+ get { return files; }
+ }
+
+ public override Directory Directory
+ {
+ get { return dir; }
+ }
+
+ public override long Version
+ {
+ get { return version; }
+ }
+
+ public override long Generation
+ {
+ get { return generation; }
+ }
+
+ public override bool IsDeleted
+ {
+ get { return false; }
+ }
+
+ public override IDictionary<string, string> UserData
+ {
+ get { return userData; }
+ }
+
+ public override void Delete()
+ {
+ throw new System.NotSupportedException("This IndexCommit does not support deletions");
+ }
+ }
+
+ internal class MultiTermEnum:TermEnum
+ {
+ internal IndexReader topReader; // used for matching TermEnum to TermDocs
+ private readonly SegmentMergeQueue queue;
+
+ private Term term;
+ private int docFreq;
+ internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
+
+ public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t)
+ {
+ this.topReader = topReader;
+ queue = new SegmentMergeQueue(readers.Length);
+ matchingSegments = new SegmentMergeInfo[readers.Length + 1];
+ for (int i = 0; i < readers.Length; i++)
+ {
+ IndexReader reader = readers[i];
+
+ TermEnum termEnum = t != null ? reader.Terms(t) : reader.Terms();
+
+ var smi = new SegmentMergeInfo(starts[i], termEnum, reader) {ord = i};
+ if (t == null?smi.Next():termEnum.Term != null)
+ queue.Add(smi);
+ // initialize queue
+ else
+ smi.Dispose();
+ }
+
+ if (t != null && queue.Size() > 0)
+ {
+ Next();
+ }
+ }
+
+ public override bool Next()
+ {
+ foreach (SegmentMergeInfo smi in matchingSegments)
+ {
+ if (smi == null)
+ break;
+ if (smi.Next())
+ queue.Add(smi);
+ else
+ smi.Dispose(); // done with segment
+ }
+
+ int numMatchingSegments = 0;
+ matchingSegments[0] = null;
+
+ SegmentMergeInfo top = queue.Top();
+
+ if (top == null)
+ {
+ term = null;
+ return false;
+ }
+
+ term = top.term;
+ docFreq = 0;
+
+ while (top != null && term.CompareTo(top.term) == 0)
+ {
+ matchingSegments[numMatchingSegments++] = top;
+ queue.Pop();
+ docFreq += top.termEnum.DocFreq(); // increment freq
+ top = queue.Top();
+ }
+
+ matchingSegments[numMatchingSegments] = null;
+ return true;
+ }
+
+ public override Term Term
+ {
+ get { return term; }
+ }
+
+ public override int DocFreq()
+ {
+ return docFreq;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ queue.Dispose();
+ }
+ }
+ }
+
+ internal class MultiTermDocs : TermDocs
+ {
+ internal IndexReader topReader; // used for matching TermEnum to TermDocs
+ protected internal IndexReader[] readers;
+ protected internal int[] starts;
+ protected internal Term term;
+
+ protected internal int base_Renamed = 0;
+ protected internal int pointer = 0;
+
+ private readonly TermDocs[] readerTermDocs;
+ protected internal TermDocs current; // == readerTermDocs[pointer]
+
+ private MultiTermEnum tenum; // the term enum used for seeking... can be null
+ internal int matchingSegmentPos; // position into the matching segments from tenum
+ internal SegmentMergeInfo smi; // current segment mere info... can be null
+
+ public MultiTermDocs(IndexReader topReader, IndexReader[] r, int[] s)
+ {
+ this.topReader = topReader;
+ readers = r;
+ starts = s;
+
+ readerTermDocs = new TermDocs[r.Length];
+ }
+
+ public virtual int Doc
+ {
+ get { return base_Renamed + current.Doc; }
+ }
+
+ public virtual int Freq
+ {
+ get { return current.Freq; }
+ }
+
+ public virtual void Seek(Term term)
+ {
+ this.term = term;
+ this.base_Renamed = 0;
+ this.pointer = 0;
+ this.current = null;
+ this.tenum = null;
+ this.smi = null;
+ this.matchingSegmentPos = 0;
+ }
+
+ public virtual void Seek(TermEnum termEnum)
+ {
+ Seek(termEnum.Term);
+ var multiTermEnum = termEnum as MultiTermEnum;
+ if (multiTermEnum != null)
+ {
+ tenum = multiTermEnum;
+ if (topReader != tenum.topReader)
+ tenum = null;
+ }
+ }
+
+ public virtual bool Next()
+ {
+ for (; ; )
+ {
+ if (current != null && current.Next())
+ {
+ return true;
+ }
+ else if (pointer < readers.Length)
+ {
+ if (tenum != null)
+ {
+ smi = tenum.matchingSegments[matchingSegmentPos++];
+ if (smi == null)
+ {
+ pointer = readers.Length;
+ return false;
+ }
+ pointer = smi.ord;
+ }
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ /// <summary>Optimized implementation. </summary>
+ public virtual int Read(int[] docs, int[] freqs)
+ {
+ while (true)
+ {
+ while (current == null)
+ {
+ if (pointer < readers.Length)
+ {
+ // try next segment
+ if (tenum != null)
+ {
+ smi = tenum.matchingSegments[matchingSegmentPos++];
+ if (smi == null)
+ {
+ pointer = readers.Length;
+ return 0;
+ }
+ pointer = smi.ord;
+ }
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ int end = current.Read(docs, freqs);
+ if (end == 0)
+ {
+ // none left in segment
+ current = null;
+ }
+ else
+ {
+ // got some
+ int b = base_Renamed; // adjust doc numbers
+ for (int i = 0; i < end; i++)
+ docs[i] += b;
+ return end;
+ }
+ }
+ }
+
+ /* A Possible future optimization could skip entire segments */
+ public virtual bool SkipTo(int target)
+ {
+ for (; ; )
+ {
+ if (current != null && current.SkipTo(target - base_Renamed))
+ {
+ return true;
+ }
+ else if (pointer < readers.Length)
+ {
+ if (tenum != null)
+ {
+ SegmentMergeInfo smi = tenum.matchingSegments[matchingSegmentPos++];
+ if (smi == null)
+ {
+ pointer = readers.Length;
+ return false;
+ }
+ pointer = smi.ord;
+ }
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ return false;
+ }
+ }
+
+ private TermDocs TermDocs(int i)
+ {
+ TermDocs result = readerTermDocs[i] ?? (readerTermDocs[i] = TermDocs(readers[i]));
+ if (smi != null)
+ {
+ System.Diagnostics.Debug.Assert((smi.ord == i));
+ System.Diagnostics.Debug.Assert((smi.termEnum.Term.Equals(term)));
+ result.Seek(smi.termEnum);
+ }
+ else
+ {
+ result.Seek(term);
+ }
+ return result;
+ }
+
+ protected internal virtual TermDocs TermDocs(IndexReader reader)
+ {
+ return term == null ? reader.TermDocs(null):reader.TermDocs();
+ }
+
+ public virtual void Close()
+ {
+ Dispose();
+ }
+
+ public virtual void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ foreach (TermDocs t in readerTermDocs)
+ {
+ if (t != null)
+ t.Close();
+ }
+ }
+ }
+ }
+
+ internal class MultiTermPositions:MultiTermDocs, TermPositions
+ {
+ public MultiTermPositions(IndexReader topReader, IndexReader[] r, int[] s):base(topReader, r, s)
+ {
+ }
+
+ protected internal override TermDocs TermDocs(IndexReader reader)
+ {
+ return reader.TermPositions();
+ }
+
+ public virtual int NextPosition()
+ {
+ return ((TermPositions) current).NextPosition();
+ }
+
+ public virtual int PayloadLength
+ {
+ get { return ((TermPositions) current).PayloadLength; }
+ }
+
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ return ((TermPositions) current).GetPayload(data, offset);
+ }
+
+
+ // TODO: Remove warning after API has been finalized
+
+ public virtual bool IsPayloadAvailable
+ {
+ get { return ((TermPositions) current).IsPayloadAvailable; }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocConsumer.cs b/src/core/Index/DocConsumer.cs
new file mode 100644
index 0000000..238e38c
--- /dev/null
+++ b/src/core/Index/DocConsumer.cs
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocConsumer
+ {
+ public abstract DocConsumerPerThread AddThread(DocumentsWriterThreadState perThread);
+ public abstract void Flush(System.Collections.Generic.ICollection<DocConsumerPerThread> threads, SegmentWriteState state);
+ public abstract void CloseDocStore(SegmentWriteState state);
+ public abstract void Abort();
+ public abstract bool FreeRAM();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocConsumerPerThread.cs b/src/core/Index/DocConsumerPerThread.cs
new file mode 100644
index 0000000..7c7ed02
--- /dev/null
+++ b/src/core/Index/DocConsumerPerThread.cs
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocConsumerPerThread
+ {
+
+ /// <summary>Process the document. If there is
+ /// something for this document to be done in docID order,
+ /// you should encapsulate that as a
+ /// DocumentsWriter.DocWriter and return it.
+ /// DocumentsWriter then calls finish() on this object
+ /// when it's its turn.
+ /// </summary>
+ public abstract DocumentsWriter.DocWriter ProcessDocument();
+
+ public abstract void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldConsumer.cs b/src/core/Index/DocFieldConsumer.cs
new file mode 100644
index 0000000..7fc59da
--- /dev/null
+++ b/src/core/Index/DocFieldConsumer.cs
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocFieldConsumer
+ {
+
+ internal FieldInfos fieldInfos;
+
+ /// <summary>Called when DocumentsWriter decides to create a new
+ /// segment
+ /// </summary>
+ public abstract void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state);
+
+ /// <summary>Called when DocumentsWriter decides to close the doc
+ /// stores
+ /// </summary>
+ public abstract void CloseDocStore(SegmentWriteState state);
+
+ /// <summary>Called when an aborting exception is hit </summary>
+ public abstract void Abort();
+
+ /// <summary>Add a new thread </summary>
+ public abstract DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread);
+
+ /// <summary>Called when DocumentsWriter is using too much RAM.
+ /// The consumer should free RAM, if possible, returning
+ /// true if any RAM was in fact freed.
+ /// </summary>
+ public abstract bool FreeRAM();
+
+ internal virtual void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ this.fieldInfos = fieldInfos;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldConsumerPerField.cs b/src/core/Index/DocFieldConsumerPerField.cs
new file mode 100644
index 0000000..27636e2
--- /dev/null
+++ b/src/core/Index/DocFieldConsumerPerField.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocFieldConsumerPerField
+ {
+ /// <summary>Processes all occurrences of a single field </summary>
+ public abstract void ProcessFields(IFieldable[] fields, int count);
+ public abstract void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldConsumerPerThread.cs b/src/core/Index/DocFieldConsumerPerThread.cs
new file mode 100644
index 0000000..8f533ac
--- /dev/null
+++ b/src/core/Index/DocFieldConsumerPerThread.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocFieldConsumerPerThread
+ {
+ public abstract void StartDocument();
+ public abstract DocumentsWriter.DocWriter FinishDocument();
+ public abstract DocFieldConsumerPerField AddField(FieldInfo fi);
+ public abstract void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldConsumers.cs b/src/core/Index/DocFieldConsumers.cs
new file mode 100644
index 0000000..61b9b1d
--- /dev/null
+++ b/src/core/Index/DocFieldConsumers.cs
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This is just a "splitter" class: it lets you wrap two
+ /// DocFieldConsumer instances as a single consumer.
+ /// </summary>
+
+ sealed class DocFieldConsumers : DocFieldConsumer
+ {
+ private void InitBlock()
+ {
+ docFreeList = new PerDoc[1];
+ }
+ internal DocFieldConsumer one;
+ internal DocFieldConsumer two;
+
+ public DocFieldConsumers(DocFieldConsumer one, DocFieldConsumer two)
+ {
+ InitBlock();
+ this.one = one;
+ this.two = two;
+ }
+
+ internal override void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ base.SetFieldInfos(fieldInfos);
+ one.SetFieldInfos(fieldInfos);
+ two.SetFieldInfos(fieldInfos);
+ }
+
+ public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)
+ {
+
+ var oneThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
+ var twoThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
+
+ foreach(var entry in threadsAndFields)
+ {
+ DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key;
+ ICollection<DocFieldConsumerPerField> fields = entry.Value;
+
+ IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator();
+ ICollection<DocFieldConsumerPerField> oneFields = new HashSet<DocFieldConsumerPerField>();
+ ICollection<DocFieldConsumerPerField> twoFields = new HashSet<DocFieldConsumerPerField>();
+ while (fieldsIt.MoveNext())
+ {
+ DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current;
+ oneFields.Add(perField.one);
+ twoFields.Add(perField.two);
+ }
+
+ oneThreadsAndFields[perThread.one] = oneFields;
+ twoThreadsAndFields[perThread.two] = twoFields;
+ }
+
+
+ one.Flush(oneThreadsAndFields, state);
+ two.Flush(twoThreadsAndFields, state);
+ }
+
+ public override void CloseDocStore(SegmentWriteState state)
+ {
+ try
+ {
+ one.CloseDocStore(state);
+ }
+ finally
+ {
+ two.CloseDocStore(state);
+ }
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+
+ public override bool FreeRAM()
+ {
+ bool any = one.FreeRAM();
+ any |= two.FreeRAM();
+ return any;
+ }
+
+ public override DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread)
+ {
+ return new DocFieldConsumersPerThread(docFieldProcessorPerThread, this, one.AddThread(docFieldProcessorPerThread), two.AddThread(docFieldProcessorPerThread));
+ }
+
+ internal PerDoc[] docFreeList;
+ internal int freeCount;
+ internal int allocCount;
+
+ internal PerDoc GetPerDoc()
+ {
+ lock (this)
+ {
+ if (freeCount == 0)
+ {
+ allocCount++;
+ if (allocCount > docFreeList.Length)
+ {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
+ docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
+ }
+ return new PerDoc(this);
+ }
+ else
+ return docFreeList[--freeCount];
+ }
+ }
+
+ internal void FreePerDoc(PerDoc perDoc)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
+ docFreeList[freeCount++] = perDoc;
+ }
+ }
+
+ internal class PerDoc:DocumentsWriter.DocWriter
+ {
+ public PerDoc(DocFieldConsumers enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(DocFieldConsumers enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocFieldConsumers enclosingInstance;
+ public DocFieldConsumers Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal DocumentsWriter.DocWriter one;
+ internal DocumentsWriter.DocWriter two;
+
+ public override long SizeInBytes()
+ {
+ return one.SizeInBytes() + two.SizeInBytes();
+ }
+
+ public override void Finish()
+ {
+ try
+ {
+ try
+ {
+ one.Finish();
+ }
+ finally
+ {
+ two.Finish();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldConsumersPerField.cs b/src/core/Index/DocFieldConsumersPerField.cs
new file mode 100644
index 0000000..71e96e0
--- /dev/null
+++ b/src/core/Index/DocFieldConsumersPerField.cs
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class DocFieldConsumersPerField:DocFieldConsumerPerField
+ {
+
+ internal DocFieldConsumerPerField one;
+ internal DocFieldConsumerPerField two;
+ internal DocFieldConsumersPerThread perThread;
+
+ public DocFieldConsumersPerField(DocFieldConsumersPerThread perThread, DocFieldConsumerPerField one, DocFieldConsumerPerField two)
+ {
+ this.perThread = perThread;
+ this.one = one;
+ this.two = two;
+ }
+
+ public override void ProcessFields(IFieldable[] fields, int count)
+ {
+ one.ProcessFields(fields, count);
+ two.ProcessFields(fields, count);
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldConsumersPerThread.cs b/src/core/Index/DocFieldConsumersPerThread.cs
new file mode 100644
index 0000000..7098966
--- /dev/null
+++ b/src/core/Index/DocFieldConsumersPerThread.cs
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class DocFieldConsumersPerThread:DocFieldConsumerPerThread
+ {
+
+ internal DocFieldConsumerPerThread one;
+ internal DocFieldConsumerPerThread two;
+ internal DocFieldConsumers parent;
+ internal DocumentsWriter.DocState docState;
+
+ public DocFieldConsumersPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocFieldConsumers parent, DocFieldConsumerPerThread one, DocFieldConsumerPerThread two)
+ {
+ this.parent = parent;
+ this.one = one;
+ this.two = two;
+ docState = docFieldProcessorPerThread.docState;
+ }
+
+ public override void StartDocument()
+ {
+ one.StartDocument();
+ two.StartDocument();
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ DocumentsWriter.DocWriter oneDoc = one.FinishDocument();
+ DocumentsWriter.DocWriter twoDoc = two.FinishDocument();
+ if (oneDoc == null)
+ return twoDoc;
+ else if (twoDoc == null)
+ return oneDoc;
+ else
+ {
+ DocFieldConsumers.PerDoc both = parent.GetPerDoc();
+ both.docID = docState.docID;
+ System.Diagnostics.Debug.Assert(oneDoc.docID == docState.docID);
+ System.Diagnostics.Debug.Assert(twoDoc.docID == docState.docID);
+ both.one = oneDoc;
+ both.two = twoDoc;
+ return both;
+ }
+ }
+
+ public override DocFieldConsumerPerField AddField(FieldInfo fi)
+ {
+ return new DocFieldConsumersPerField(this, one.AddField(fi), two.AddField(fi));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldProcessor.cs b/src/core/Index/DocFieldProcessor.cs
new file mode 100644
index 0000000..4289118
--- /dev/null
+++ b/src/core/Index/DocFieldProcessor.cs
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This is a DocConsumer that gathers all fields under the
+ /// same name, and calls per-field consumers to process field
+ /// by field. This class doesn't doesn't do any "real" work
+ /// of its own: it just forwards the fields to a
+ /// DocFieldConsumer.
+ /// </summary>
+
+ sealed class DocFieldProcessor : DocConsumer
+ {
+
+ internal DocumentsWriter docWriter;
+ internal FieldInfos fieldInfos = new FieldInfos();
+ internal DocFieldConsumer consumer;
+ internal StoredFieldsWriter fieldsWriter;
+
+ public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer)
+ {
+ this.docWriter = docWriter;
+ this.consumer = consumer;
+ consumer.SetFieldInfos(fieldInfos);
+ fieldsWriter = new StoredFieldsWriter(docWriter, fieldInfos);
+ }
+
+ public override void CloseDocStore(SegmentWriteState state)
+ {
+ consumer.CloseDocStore(state);
+ fieldsWriter.CloseDocStore(state);
+ }
+
+ public override void Flush(ICollection<DocConsumerPerThread> threads, SegmentWriteState state)
+ {
+ var childThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
+ foreach(DocConsumerPerThread thread in threads)
+ {
+ DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)thread;
+ childThreadsAndFields[perThread.consumer] = perThread.Fields();
+ perThread.TrimFields(state);
+ }
+ fieldsWriter.Flush(state);
+ consumer.Flush(childThreadsAndFields, state);
+
+ // Important to save after asking consumer to flush so
+ // consumer can alter the FieldInfo* if necessary. EG,
+ // FreqProxTermsWriter does this with
+ // FieldInfo.storePayload.
+ System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
+ fieldInfos.Write(state.directory, fileName);
+ state.flushedFiles.Add(fileName);
+ }
+
+ public override void Abort()
+ {
+ fieldsWriter.Abort();
+ consumer.Abort();
+ }
+
+ public override bool FreeRAM()
+ {
+ return consumer.FreeRAM();
+ }
+
+ public override DocConsumerPerThread AddThread(DocumentsWriterThreadState threadState)
+ {
+ return new DocFieldProcessorPerThread(threadState, this);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldProcessorPerField.cs b/src/core/Index/DocFieldProcessorPerField.cs
new file mode 100644
index 0000000..1078988
--- /dev/null
+++ b/src/core/Index/DocFieldProcessorPerField.cs
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Holds all per thread, per field state.</summary>
+
+ sealed class DocFieldProcessorPerField
+ {
+
+ internal DocFieldConsumerPerField consumer;
+ internal FieldInfo fieldInfo;
+
+ internal DocFieldProcessorPerField next;
+ internal int lastGen = - 1;
+
+ internal int fieldCount;
+ internal IFieldable[] fields = new IFieldable[1];
+
+ public DocFieldProcessorPerField(DocFieldProcessorPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.consumer = perThread.consumer.AddField(fieldInfo);
+ this.fieldInfo = fieldInfo;
+ }
+
+ public void Abort()
+ {
+ consumer.Abort();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocFieldProcessorPerThread.cs b/src/core/Index/DocFieldProcessorPerThread.cs
new file mode 100644
index 0000000..d108116
--- /dev/null
+++ b/src/core/Index/DocFieldProcessorPerThread.cs
@@ -0,0 +1,478 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Gathers all Fieldables for a document under the same
+ /// name, updates FieldInfos, and calls per-field consumers
+ /// to process field by field.
+ ///
+ /// Currently, only a single thread visits the fields,
+ /// sequentially, for processing.
+ /// </summary>
+
+ sealed class DocFieldProcessorPerThread:DocConsumerPerThread
+ {
+ private void InitBlock()
+ {
+ docFreeList = new PerDoc[1];
+ }
+
+ internal float docBoost;
+ internal int fieldGen;
+ internal DocFieldProcessor docFieldProcessor;
+ internal FieldInfos fieldInfos;
+ internal DocFieldConsumerPerThread consumer;
+
+ // Holds all fields seen in current doc
+ internal DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
+ internal int fieldCount;
+
+ // Hash table for all fields ever seen
+ internal DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
+ internal int hashMask = 1;
+ internal int totalFieldCount;
+
+ internal StoredFieldsWriterPerThread fieldsWriter;
+
+ internal DocumentsWriter.DocState docState;
+
+ public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor)
+ {
+ InitBlock();
+ this.docState = threadState.docState;
+ this.docFieldProcessor = docFieldProcessor;
+ this.fieldInfos = docFieldProcessor.fieldInfos;
+ this.consumer = docFieldProcessor.consumer.AddThread(this);
+ fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState);
+ }
+
+ public override void Abort()
+ {
+ for (int i = 0; i < fieldHash.Length; i++)
+ {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while (field != null)
+ {
+ DocFieldProcessorPerField next = field.next;
+ field.Abort();
+ field = next;
+ }
+ }
+ fieldsWriter.Abort();
+ consumer.Abort();
+ }
+
+ public System.Collections.Generic.ICollection<DocFieldConsumerPerField> Fields()
+ {
+ System.Collections.Generic.ICollection<DocFieldConsumerPerField> fields =
+ new System.Collections.Generic.HashSet<DocFieldConsumerPerField>();
+ for (int i = 0; i < fieldHash.Length; i++)
+ {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while (field != null)
+ {
+ fields.Add(field.consumer);
+ field = field.next;
+ }
+ }
+ System.Diagnostics.Debug.Assert(fields.Count == totalFieldCount);
+ return fields;
+ }
+
+ /// <summary>If there are fields we've seen but did not see again
+ /// in the last run, then free them up.
+ /// </summary>
+
+ internal void TrimFields(SegmentWriteState state)
+ {
+
+ for (int i = 0; i < fieldHash.Length; i++)
+ {
+ DocFieldProcessorPerField perField = fieldHash[i];
+ DocFieldProcessorPerField lastPerField = null;
+
+ while (perField != null)
+ {
+
+ if (perField.lastGen == - 1)
+ {
+
+ // This field was not seen since the previous
+ // flush, so, free up its resources now
+
+ // Unhash
+ if (lastPerField == null)
+ fieldHash[i] = perField.next;
+ else
+ lastPerField.next = perField.next;
+
+ if (state.docWriter.infoStream != null)
+ state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name);
+
+ totalFieldCount--;
+ }
+ else
+ {
+ // Reset
+ perField.lastGen = - 1;
+ lastPerField = perField;
+ }
+
+ perField = perField.next;
+ }
+ }
+ }
+
+ private void Rehash()
+ {
+ int newHashSize = (fieldHash.Length * 2);
+ System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length);
+
+ DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize];
+
+ // Rehash
+ int newHashMask = newHashSize - 1;
+ for (int j = 0; j < fieldHash.Length; j++)
+ {
+ DocFieldProcessorPerField fp0 = fieldHash[j];
+ while (fp0 != null)
+ {
+ int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask;
+ DocFieldProcessorPerField nextFP0 = fp0.next;
+ fp0.next = newHashArray[hashPos2];
+ newHashArray[hashPos2] = fp0;
+ fp0 = nextFP0;
+ }
+ }
+
+ fieldHash = newHashArray;
+ hashMask = newHashMask;
+ }
+
+ public override DocumentsWriter.DocWriter ProcessDocument()
+ {
+
+ consumer.StartDocument();
+ fieldsWriter.StartDocument();
+
+ Document doc = docState.doc;
+
+ System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
+
+ fieldCount = 0;
+
+ int thisFieldGen = fieldGen++;
+
+ System.Collections.Generic.IList<IFieldable> docFields = doc.GetFields();
+ int numDocFields = docFields.Count;
+
+ // Absorb any new fields first seen in this document.
+ // Also absorb any changes to fields we had already
+ // seen before (eg suddenly turning on norms or
+ // vectors, etc.):
+
+ for (int i = 0; i < numDocFields; i++)
+ {
+ IFieldable field = docFields[i];
+ System.String fieldName = field.Name;
+
+ // Make sure we have a PerField allocated
+ int hashPos = fieldName.GetHashCode() & hashMask;
+ DocFieldProcessorPerField fp = fieldHash[hashPos];
+ while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
+ fp = fp.next;
+
+ if (fp == null)
+ {
+
+ // TODO FI: we need to genericize the "flags" that a
+ // field holds, and, how these flags are merged; it
+ // needs to be more "pluggable" such that if I want
+ // to have a new "thing" my Fields can do, I can
+ // easily add it
+ FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored,
+ field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
+ field.OmitNorms, false, field.OmitTermFreqAndPositions);
+
+ fp = new DocFieldProcessorPerField(this, fi);
+ fp.next = fieldHash[hashPos];
+ fieldHash[hashPos] = fp;
+ totalFieldCount++;
+
+ if (totalFieldCount >= fieldHash.Length / 2)
+ Rehash();
+ }
+ else
+ {
+ fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored,
+ field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
+ field.OmitNorms, false, field.OmitTermFreqAndPositions);
+ }
+
+ if (thisFieldGen != fp.lastGen)
+ {
+
+ // First time we're seeing this field for this doc
+ fp.fieldCount = 0;
+
+ if (fieldCount == fields.Length)
+ {
+ int newSize = fields.Length * 2;
+ DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
+ Array.Copy(fields, 0, newArray, 0, fieldCount);
+ fields = newArray;
+ }
+
+ fields[fieldCount++] = fp;
+ fp.lastGen = thisFieldGen;
+ }
+
+ if (fp.fieldCount == fp.fields.Length)
+ {
+ IFieldable[] newArray = new IFieldable[fp.fields.Length * 2];
+ Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
+ fp.fields = newArray;
+ }
+
+ fp.fields[fp.fieldCount++] = field;
+ if (field.IsStored)
+ {
+ fieldsWriter.AddField(field, fp.fieldInfo);
+ }
+ }
+
+ // If we are writing vectors then we must visit
+ // fields in sorted order so they are written in
+ // sorted order. TODO: we actually only need to
+ // sort the subset of fields that have vectors
+ // enabled; we could save [small amount of] CPU
+ // here.
+ QuickSort(fields, 0, fieldCount - 1);
+
+ for (int i = 0; i < fieldCount; i++)
+ fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
+
+ if (docState.maxTermPrefix != null && docState.infoStream != null)
+ {
+ docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
+ docState.maxTermPrefix = null;
+ }
+
+ DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
+ DocumentsWriter.DocWriter two = consumer.FinishDocument();
+ if (one == null)
+ {
+ return two;
+ }
+ else if (two == null)
+ {
+ return one;
+ }
+ else
+ {
+ PerDoc both = GetPerDoc();
+ both.docID = docState.docID;
+ System.Diagnostics.Debug.Assert(one.docID == docState.docID);
+ System.Diagnostics.Debug.Assert(two.docID == docState.docID);
+ both.one = one;
+ both.two = two;
+ return both;
+ }
+ }
+
+ internal void QuickSort(DocFieldProcessorPerField[] array, int lo, int hi)
+ {
+ if (lo >= hi)
+ return ;
+ else if (hi == 1 + lo)
+ {
+ if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp = array[lo];
+ array[lo] = array[hi];
+ array[hi] = tmp;
+ }
+ return ;
+ }
+
+ int mid = Number.URShift((lo + hi), 1);
+
+ if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp = array[lo];
+ array[lo] = array[mid];
+ array[mid] = tmp;
+ }
+
+ if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp = array[mid];
+ array[mid] = array[hi];
+ array[hi] = tmp;
+
+ if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp2 = array[lo];
+ array[lo] = array[mid];
+ array[mid] = tmp2;
+ }
+ }
+
+ int left = lo + 1;
+ int right = hi - 1;
+
+ if (left >= right)
+ return ;
+
+ DocFieldProcessorPerField partition = array[mid];
+
+ for (; ; )
+ {
+ while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0)
+ --right;
+
+ while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0)
+ ++left;
+
+ if (left < right)
+ {
+ DocFieldProcessorPerField tmp = array[left];
+ array[left] = array[right];
+ array[right] = tmp;
+ --right;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ QuickSort(array, lo, left);
+ QuickSort(array, left + 1, hi);
+ }
+
+ internal PerDoc[] docFreeList;
+ internal int freeCount;
+ internal int allocCount;
+
+ internal PerDoc GetPerDoc()
+ {
+ lock (this)
+ {
+ if (freeCount == 0)
+ {
+ allocCount++;
+ if (allocCount > docFreeList.Length)
+ {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
+ docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
+ }
+ return new PerDoc(this);
+ }
+ else
+ return docFreeList[--freeCount];
+ }
+ }
+
+ internal void FreePerDoc(PerDoc perDoc)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
+ docFreeList[freeCount++] = perDoc;
+ }
+ }
+
+ internal class PerDoc:DocumentsWriter.DocWriter
+ {
+ public PerDoc(DocFieldProcessorPerThread enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(DocFieldProcessorPerThread enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocFieldProcessorPerThread enclosingInstance;
+ public DocFieldProcessorPerThread Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal DocumentsWriter.DocWriter one;
+ internal DocumentsWriter.DocWriter two;
+
+ public override long SizeInBytes()
+ {
+ return one.SizeInBytes() + two.SizeInBytes();
+ }
+
+ public override void Finish()
+ {
+ try
+ {
+ try
+ {
+ one.Finish();
+ }
+ finally
+ {
+ two.Finish();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocInverter.cs b/src/core/Index/DocInverter.cs
new file mode 100644
index 0000000..4153465
--- /dev/null
+++ b/src/core/Index/DocInverter.cs
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This is a DocFieldConsumer that inverts each field,
+ /// separately, from a Document, and accepts a
+ /// InvertedTermsConsumer to process those terms.
+ /// </summary>
+
+ sealed class DocInverter : DocFieldConsumer
+ {
+
+ internal InvertedDocConsumer consumer;
+ internal InvertedDocEndConsumer endConsumer;
+
+ public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer)
+ {
+ this.consumer = consumer;
+ this.endConsumer = endConsumer;
+ }
+
+ internal override void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ base.SetFieldInfos(fieldInfos);
+ consumer.SetFieldInfos(fieldInfos);
+ endConsumer.SetFieldInfos(fieldInfos);
+ }
+
+ public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)
+ {
+
+ var childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>>();
+ var endChildThreadsAndFields = new HashMap<InvertedDocEndConsumerPerThread, ICollection<InvertedDocEndConsumerPerField>>();
+
+ foreach (var entry in threadsAndFields)
+ {
+ var perThread = (DocInverterPerThread) entry.Key;
+
+ ICollection<InvertedDocConsumerPerField> childFields = new HashSet<InvertedDocConsumerPerField>();
+ ICollection<InvertedDocEndConsumerPerField> endChildFields = new HashSet<InvertedDocEndConsumerPerField>();
+ foreach(DocFieldConsumerPerField field in entry.Value)
+ {
+ var perField = (DocInverterPerField)field;
+ childFields.Add(perField.consumer);
+ endChildFields.Add(perField.endConsumer);
+ }
+
+ childThreadsAndFields[perThread.consumer] = childFields;
+ endChildThreadsAndFields[perThread.endConsumer] = endChildFields;
+ }
+
+ consumer.Flush(childThreadsAndFields, state);
+ endConsumer.Flush(endChildThreadsAndFields, state);
+ }
+
+ public override void CloseDocStore(SegmentWriteState state)
+ {
+ consumer.CloseDocStore(state);
+ endConsumer.CloseDocStore(state);
+ }
+
+ public override void Abort()
+ {
+ consumer.Abort();
+ endConsumer.Abort();
+ }
+
+ public override bool FreeRAM()
+ {
+ return consumer.FreeRAM();
+ }
+
+ public override DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread)
+ {
+ return new DocInverterPerThread(docFieldProcessorPerThread, this);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocInverterPerField.cs b/src/core/Index/DocInverterPerField.cs
new file mode 100644
index 0000000..8cd7c0a
--- /dev/null
+++ b/src/core/Index/DocInverterPerField.cs
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Documents;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Holds state for inverting all occurrences of a single
+ /// field in the document. This class doesn't do anything
+ /// itself; instead, it forwards the tokens produced by
+ /// analysis to its own consumer
+ /// (InvertedDocConsumerPerField). It also interacts with an
+ /// endConsumer (InvertedDocEndConsumerPerField).
+ /// </summary>
+
+ sealed class DocInverterPerField:DocFieldConsumerPerField
+ {
+
+ private DocInverterPerThread perThread;
+ private FieldInfo fieldInfo;
+ internal InvertedDocConsumerPerField consumer;
+ internal InvertedDocEndConsumerPerField endConsumer;
+ internal DocumentsWriter.DocState docState;
+ internal FieldInvertState fieldState;
+
+ public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.perThread = perThread;
+ this.fieldInfo = fieldInfo;
+ docState = perThread.docState;
+ fieldState = perThread.fieldState;
+ this.consumer = perThread.consumer.AddField(this, fieldInfo);
+ this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
+ }
+
+ public override void Abort()
+ {
+ consumer.Abort();
+ endConsumer.Abort();
+ }
+
+ public override void ProcessFields(IFieldable[] fields, int count)
+ {
+
+ fieldState.Reset(docState.doc.Boost);
+
+ int maxFieldLength = docState.maxFieldLength;
+
+ bool doInvert = consumer.Start(fields, count);
+
+ for (int i = 0; i < count; i++)
+ {
+
+ IFieldable field = fields[i];
+
+ // TODO FI: this should be "genericized" to querying
+ // consumer if it wants to see this particular field
+ // tokenized.
+ if (field.IsIndexed && doInvert)
+ {
+
+ bool anyToken;
+
+ if (fieldState.length > 0)
+ fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
+
+ if (!field.IsTokenized)
+ {
+ // un-tokenized field
+ System.String stringValue = field.StringValue;
+ int valueLength = stringValue.Length;
+ perThread.singleToken.Reinit(stringValue, 0, valueLength);
+ fieldState.attributeSource = perThread.singleToken;
+ consumer.Start(field);
+
+ bool success = false;
+ try
+ {
+ consumer.Add();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ docState.docWriter.SetAborting();
+ }
+ fieldState.offset += valueLength;
+ fieldState.length++;
+ fieldState.position++;
+ anyToken = valueLength > 0;
+ }
+ else
+ {
+ // tokenized field
+ TokenStream stream;
+ TokenStream streamValue = field.TokenStreamValue;
+
+ if (streamValue != null)
+ stream = streamValue;
+ else
+ {
+ // the field does not have a TokenStream,
+ // so we have to obtain one from the analyzer
+ System.IO.TextReader reader; // find or make Reader
+ System.IO.TextReader readerValue = field.ReaderValue;
+
+ if (readerValue != null)
+ reader = readerValue;
+ else
+ {
+ System.String stringValue = field.StringValue;
+ if (stringValue == null)
+ throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
+ perThread.stringReader.Init(stringValue);
+ reader = perThread.stringReader;
+ }
+
+ // Tokenize field and add to postingTable
+ stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
+ }
+
+ // reset the TokenStream to the first token
+ stream.Reset();
+
+ int startLength = fieldState.length;
+
+ try
+ {
+ int offsetEnd = fieldState.offset - 1;
+
+ bool hasMoreTokens = stream.IncrementToken();
+
+ fieldState.attributeSource = stream;
+
+ IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute<IOffsetAttribute>();
+ IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute<IPositionIncrementAttribute>();
+
+ consumer.Start(field);
+
+ for (; ; )
+ {
+
+ // If we hit an exception in stream.next below
+ // (which is fairly common, eg if analyzer
+ // chokes on a given document), then it's
+ // non-aborting and (above) this one document
+ // will be marked as deleted, but still
+ // consume a docID
+
+ if (!hasMoreTokens)
+ break;
+
+ int posIncr = posIncrAttribute.PositionIncrement;
+ fieldState.position += posIncr;
+ if (fieldState.position > 0)
+ {
+ fieldState.position--;
+ }
+
+ if (posIncr == 0)
+ fieldState.numOverlap++;
+
+ bool success = false;
+ try
+ {
+ // If we hit an exception in here, we abort
+ // all buffered documents since the last
+ // flush, on the likelihood that the
+ // internal state of the consumer is now
+ // corrupt and should not be flushed to a
+ // new segment:
+ consumer.Add();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ docState.docWriter.SetAborting();
+ }
+ fieldState.position++;
+ offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
+ if (++fieldState.length >= maxFieldLength)
+ {
+ if (docState.infoStream != null)
+ docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
+ break;
+ }
+
+ hasMoreTokens = stream.IncrementToken();
+ }
+ // trigger streams to perform end-of-stream operations
+ stream.End();
+
+ fieldState.offset += offsetAttribute.EndOffset;
+ anyToken = fieldState.length > startLength;
+ }
+ finally
+ {
+ stream.Close();
+ }
+ }
+
+ if (anyToken)
+ fieldState.offset += docState.analyzer.GetOffsetGap(field);
+ fieldState.boost *= field.Boost;
+ }
+
+ // LUCENE-2387: don't hang onto the field, so GC can
+ // reclaim
+ fields[i] = null;
+ }
+
+ consumer.Finish();
+ endConsumer.Finish();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocInverterPerThread.cs b/src/core/Index/DocInverterPerThread.cs
new file mode 100644
index 0000000..c38ed35
--- /dev/null
+++ b/src/core/Index/DocInverterPerThread.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This is a DocFieldConsumer that inverts each field,
+ /// separately, from a Document, and accepts a
+ /// InvertedTermsConsumer to process those terms.
+ /// </summary>
+
+ sealed class DocInverterPerThread : DocFieldConsumerPerThread
+ {
+ private void InitBlock()
+ {
+ singleToken = new SingleTokenAttributeSource();
+ }
+ internal DocInverter docInverter;
+ internal InvertedDocConsumerPerThread consumer;
+ internal InvertedDocEndConsumerPerThread endConsumer;
+ internal SingleTokenAttributeSource singleToken;
+
+ internal class SingleTokenAttributeSource : AttributeSource
+ {
+ internal ITermAttribute termAttribute;
+ internal IOffsetAttribute offsetAttribute;
+
+ internal SingleTokenAttributeSource()
+ {
+ termAttribute = AddAttribute<ITermAttribute>();
+ offsetAttribute = AddAttribute<IOffsetAttribute>();
+ }
+
+ public void Reinit(System.String stringValue, int startOffset, int endOffset)
+ {
+ termAttribute.SetTermBuffer(stringValue);
+ offsetAttribute.SetOffset(startOffset, endOffset);
+ }
+ }
+
+ internal DocumentsWriter.DocState docState;
+
+ internal FieldInvertState fieldState = new FieldInvertState();
+
+ // Used to read a string value for a field
+ internal ReusableStringReader stringReader = new ReusableStringReader();
+
+ public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter)
+ {
+ InitBlock();
+ this.docInverter = docInverter;
+ docState = docFieldProcessorPerThread.docState;
+ consumer = docInverter.consumer.AddThread(this);
+ endConsumer = docInverter.endConsumer.AddThread(this);
+ }
+
+ public override void StartDocument()
+ {
+ consumer.StartDocument();
+ endConsumer.StartDocument();
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ // TODO: allow endConsumer.finishDocument to also return
+ // a DocWriter
+ endConsumer.FinishDocument();
+ return consumer.FinishDocument();
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ consumer.Abort();
+ }
+ finally
+ {
+ endConsumer.Abort();
+ }
+ }
+
+ public override DocFieldConsumerPerField AddField(FieldInfo fi)
+ {
+ return new DocInverterPerField(this, fi);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocumentsWriter.cs b/src/core/Index/DocumentsWriter.cs
new file mode 100644
index 0000000..6545d11
--- /dev/null
+++ b/src/core/Index/DocumentsWriter.cs
@@ -0,0 +1,2075 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using Lucene.Net.Support;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Document = Lucene.Net.Documents.Document;
+using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+using Directory = Lucene.Net.Store.Directory;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Constants = Lucene.Net.Util.Constants;
+using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+using Query = Lucene.Net.Search.Query;
+using Scorer = Lucene.Net.Search.Scorer;
+using Similarity = Lucene.Net.Search.Similarity;
+using Weight = Lucene.Net.Search.Weight;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This class accepts multiple added documents and directly
+ /// writes a single segment file. It does this more
+ /// efficiently than creating a single segment per document
+ /// (with DocumentWriter) and doing standard merges on those
+ /// segments.
+ ///
+ /// Each added document is passed to the <see cref="DocConsumer" />,
+ /// which in turn processes the document and interacts with
+ /// other consumers in the indexing chain. Certain
+ /// consumers, like <see cref="StoredFieldsWriter" /> and <see cref="TermVectorsTermsWriter" />
+ ///, digest a document and
+ /// immediately write bytes to the "doc store" files (ie,
+ /// they do not consume RAM per document, except while they
+ /// are processing the document).
+ ///
+ /// Other consumers, eg <see cref="FreqProxTermsWriter" /> and
+ /// <see cref="NormsWriter" />, buffer bytes in RAM and flush only
+ /// when a new segment is produced.
+ /// Once we have used our allowed RAM buffer, or the number
+ /// of added docs is large enough (in the case we are
+ /// flushing by doc count instead of RAM usage), we create a
+ /// real segment and flush it to the Directory.
+ ///
+ /// Threads:
+ ///
+ /// Multiple threads are allowed into addDocument at once.
+ /// There is an initial synchronized call to getThreadState
+ /// which allocates a ThreadState for this thread. The same
+ /// thread will get the same ThreadState over time (thread
+ /// affinity) so that if there are consistent patterns (for
+ /// example each thread is indexing a different content
+ /// source) then we make better use of RAM. Then
+ /// processDocument is called on that ThreadState without
+ /// synchronization (most of the "heavy lifting" is in this
+ /// call). Finally the synchronized "finishDocument" is
+ /// called to flush changes to the directory.
+ ///
+ /// When flush is called by IndexWriter we forcefully idle
+ /// all threads and flush only once they are all idle. This
+ /// means you can call flush with a given thread even while
+ /// other threads are actively adding/deleting documents.
+ ///
+ ///
+ /// Exceptions:
+ ///
+ /// Because this class directly updates in-memory posting
+ /// lists, and flushes stored fields and term vectors
+ /// directly to files in the directory, there are certain
+ /// limited times when an exception can corrupt this state.
+ /// For example, a disk full while flushing stored fields
+ /// leaves this file in a corrupt state. Or, an OOM
+ /// exception while appending to the in-memory posting lists
+ /// can corrupt that posting list. We call such exceptions
+ /// "aborting exceptions". In these cases we must call
+ /// abort() to discard all docs added since the last flush.
+ ///
+ /// All other exceptions ("non-aborting exceptions") can
+ /// still partially update the index structures. These
+ /// updates are consistent, but, they represent only a part
+ /// of the document seen up until the exception was hit.
+ /// When this happens, we immediately mark the document as
+ /// deleted so that the document is always atomically ("all
+ /// or none") added to the index.
+ /// </summary>
+
+ public sealed class DocumentsWriter : IDisposable
+ {
+ internal class AnonymousClassIndexingChain:IndexingChain
+ {
+
+ internal override DocConsumer GetChain(DocumentsWriter documentsWriter)
+ {
+ /*
+ This is the current indexing chain:
+
+ DocConsumer / DocConsumerPerThread
+ --> code: DocFieldProcessor / DocFieldProcessorPerThread
+ --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
+ --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
+ --> code: DocInverter / DocInverterPerThread / DocInverterPerField
+ --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: TermsHash / TermsHashPerThread / TermsHashPerField
+ --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
+ --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
+ --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
+ --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
+ --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
+ */
+
+ // Build up indexing chain:
+
+ TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
+ TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
+
+ InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, new TermsHash(documentsWriter, false, termVectorsWriter, null));
+ NormsWriter normsWriter = new NormsWriter();
+ DocInverter docInverter = new DocInverter(termsHash, normsWriter);
+ return new DocFieldProcessor(documentsWriter, docInverter);
+ }
+ }
+ private void InitBlock()
+ {
+ maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
+ maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
+ ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024);
+ waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
+ waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
+ freeTrigger = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 1.05);
+ freeLevel = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 0.95);
+ maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
+ skipDocWriter = new SkipDocWriter();
+ byteBlockAllocator = new ByteBlockAllocator(this, DocumentsWriter.BYTE_BLOCK_SIZE);
+ perDocAllocator = new ByteBlockAllocator(this,DocumentsWriter.PER_DOC_BLOCK_SIZE);
+ waitQueue = new WaitQueue(this);
+ }
+
+ internal IndexWriter writer;
+ internal Directory directory;
+
+ internal System.String segment; // Current segment we are working on
+ private System.String docStoreSegment; // Current doc-store segment we are writing
+ private int docStoreOffset; // Current starting doc-store offset of current segment
+
+ private int nextDocID; // Next docID to be added
+ private int numDocsInRAM; // # docs buffered in RAM
+ internal int numDocsInStore; // # docs written to doc stores
+
+ // Max # ThreadState instances; if there are more threads
+ // than this they share ThreadStates
+ private const int MAX_THREAD_STATE = 5;
+ private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0];
+ private HashMap<ThreadClass, DocumentsWriterThreadState> threadBindings = new HashMap<ThreadClass, DocumentsWriterThreadState>();
+
+ private int pauseThreads; // Non-zero when we need all threads to
+ // pause (eg to flush)
+ internal bool flushPending; // True when a thread has decided to flush
+ internal bool bufferIsFull; // True when it's time to write segment
+ private bool aborting; // True if an abort is pending
+
+ private DocFieldProcessor docFieldProcessor;
+
+ internal System.IO.StreamWriter infoStream;
+ internal int maxFieldLength;
+ internal Similarity similarity;
+
+ internal IList<string> newFiles;
+
+ internal class DocState
+ {
+ internal DocumentsWriter docWriter;
+ internal Analyzer analyzer;
+ internal int maxFieldLength;
+ internal System.IO.StreamWriter infoStream;
+ internal Similarity similarity;
+ internal int docID;
+ internal Document doc;
+ internal System.String maxTermPrefix;
+
+ // Only called by asserts
+ public bool TestPoint(System.String name)
+ {
+ return docWriter.writer.TestPoint(name);
+ }
+
+ public void Clear()
+ {
+ // don't hold onto doc nor analyzer, in case it is
+ // largish:
+ doc = null;
+ analyzer = null;
+ }
+ }
+
+ /// <summary>Consumer returns this on each doc. This holds any
+ /// state that must be flushed synchronized "in docID
+ /// order". We gather these and flush them in order.
+ /// </summary>
+ internal abstract class DocWriter
+ {
+ internal DocWriter next;
+ internal int docID;
+ public abstract void Finish();
+ public abstract void Abort();
+ public abstract long SizeInBytes();
+
+ internal void SetNext(DocWriter next)
+ {
+ this.next = next;
+ }
+ }
+
+ /*
+ * Create and return a new DocWriterBuffer.
+ */
+ internal PerDocBuffer NewPerDocBuffer()
+ {
+ return new PerDocBuffer(this);
+ }
+
+ /*
+ * RAMFile buffer for DocWriters.
+ */
+ internal class PerDocBuffer : Lucene.Net.Store.RAMFile
+ {
+ DocumentsWriter enclosingInstance;
+ public PerDocBuffer(DocumentsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ /*
+ * Allocate bytes used from shared pool.
+ */
+ public override byte[] NewBuffer(int size)
+ {
+ System.Diagnostics.Debug.Assert(size == PER_DOC_BLOCK_SIZE);
+ return enclosingInstance.perDocAllocator.GetByteBlock(false);
+ }
+
+ /*
+ * Recycle the bytes used.
+ */
+ internal void Recycle()
+ {
+ lock (this)
+ {
+ if (buffers.Count > 0)
+ {
+ Length = 0;
+
+ // Recycle the blocks
+ enclosingInstance.perDocAllocator.RecycleByteBlocks(buffers);
+ buffers.Clear();
+ sizeInBytes = 0;
+
+ System.Diagnostics.Debug.Assert(NumBuffers() == 0);
+ }
+ }
+ }
+ }
+
+ /// <summary> The IndexingChain must define the <see cref="GetChain(DocumentsWriter)" /> method
+ /// which returns the DocConsumer that the DocumentsWriter calls to process the
+ /// documents.
+ /// </summary>
+ internal abstract class IndexingChain
+ {
+ internal abstract DocConsumer GetChain(DocumentsWriter documentsWriter);
+ }
+
+ internal static readonly IndexingChain DefaultIndexingChain;
+
+ internal DocConsumer consumer;
+
+ // Deletes done after the last flush; these are discarded
+ // on abort
+ private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
+
+ // Deletes done before the last flush; these are still
+ // kept on abort
+ private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
+
+ // The max number of delete terms that can be buffered before
+ // they must be flushed to disk.
+ private int maxBufferedDeleteTerms;
+
+ // How much RAM we can use before flushing. This is 0 if
+ // we are flushing by doc count instead.
+ private long ramBufferSize;
+ private long waitQueuePauseBytes;
+ private long waitQueueResumeBytes;
+
+ // If we've allocated 5% over our RAM budget, we then
+ // free down to 95%
+ private long freeTrigger;
+ private long freeLevel;
+
+ // Flush @ this number of docs. If ramBufferSize is
+ // non-zero we will flush by RAM usage instead.
+ private int maxBufferedDocs;
+
+ private int flushedDocCount; // How many docs already flushed to index
+
+ internal void UpdateFlushedDocCount(int n)
+ {
+ lock (this)
+ {
+ flushedDocCount += n;
+ }
+ }
+ internal int GetFlushedDocCount()
+ {
+ lock (this)
+ {
+ return flushedDocCount;
+ }
+ }
+ internal void SetFlushedDocCount(int n)
+ {
+ lock (this)
+ {
+ flushedDocCount = n;
+ }
+ }
+
+ private bool closed;
+
+ internal DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain)
+ {
+ InitBlock();
+ this.directory = directory;
+ this.writer = writer;
+ this.similarity = writer.Similarity;
+ flushedDocCount = writer.MaxDoc();
+
+ consumer = indexingChain.GetChain(this);
+ if (consumer is DocFieldProcessor)
+ {
+ docFieldProcessor = (DocFieldProcessor) consumer;
+ }
+ }
+
+ /// <summary>Returns true if any of the fields in the current
+ /// buffered docs have omitTermFreqAndPositions==false
+ /// </summary>
+ internal bool HasProx()
+ {
+ return (docFieldProcessor != null)?docFieldProcessor.fieldInfos.HasProx():true;
+ }
+
+ /// <summary>If non-null, various details of indexing are printed
+ /// here.
+ /// </summary>
+ internal void SetInfoStream(System.IO.StreamWriter infoStream)
+ {
+ lock (this)
+ {
+ this.infoStream = infoStream;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.infoStream = infoStream;
+ }
+ }
+
+ internal void SetMaxFieldLength(int maxFieldLength)
+ {
+ lock (this)
+ {
+ this.maxFieldLength = maxFieldLength;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.maxFieldLength = maxFieldLength;
+ }
+ }
+
+ internal void SetSimilarity(Similarity similarity)
+ {
+ lock (this)
+ {
+ this.similarity = similarity;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.similarity = similarity;
+ }
+ }
+
+ /// <summary>Set how much RAM we can use before flushing. </summary>
+ internal void SetRAMBufferSizeMB(double mb)
+ {
+ lock (this)
+ {
+ if (mb == IndexWriter.DISABLE_AUTO_FLUSH)
+ {
+ ramBufferSize = IndexWriter.DISABLE_AUTO_FLUSH;
+ waitQueuePauseBytes = 4 * 1024 * 1024;
+ waitQueueResumeBytes = 2 * 1024 * 1024;
+ }
+ else
+ {
+ ramBufferSize = (long) (mb * 1024 * 1024);
+ waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
+ waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
+ freeTrigger = (long) (1.05 * ramBufferSize);
+ freeLevel = (long) (0.95 * ramBufferSize);
+ }
+ }
+ }
+
+ internal double GetRAMBufferSizeMB()
+ {
+ lock (this)
+ {
+ if (ramBufferSize == IndexWriter.DISABLE_AUTO_FLUSH)
+ {
+ return ramBufferSize;
+ }
+ else
+ {
+ return ramBufferSize / 1024.0 / 1024.0;
+ }
+ }
+ }
+
+ /// <summary>Gets or sets max buffered docs, which means we will flush by
+ /// doc count instead of by RAM usage.
+ /// </summary>
+ internal int MaxBufferedDocs
+ {
+ get { return maxBufferedDocs; }
+ set { maxBufferedDocs = value; }
+ }
+
+ /// <summary>Get current segment name we are writing. </summary>
+ internal string Segment
+ {
+ get { return segment; }
+ }
+
+ /// <summary>Returns how many docs are currently buffered in RAM. </summary>
+ internal int NumDocsInRAM
+ {
+ get { return numDocsInRAM; }
+ }
+
+ /// <summary>Returns the current doc store segment we are writing
+ /// to.
+ /// </summary>
+ internal string DocStoreSegment
+ {
+ get
+ {
+ lock (this)
+ {
+ return docStoreSegment;
+ }
+ }
+ }
+
+ /// <summary>Returns the doc offset into the shared doc store for
+ /// the current buffered docs.
+ /// </summary>
+ internal int DocStoreOffset
+ {
+ get { return docStoreOffset; }
+ }
+
+ /// <summary>Closes the current open doc stores an returns the doc
+ /// store segment name. This returns null if there are *
+ /// no buffered documents.
+ /// </summary>
+ internal System.String CloseDocStore()
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+
+ if (infoStream != null)
+ Message("closeDocStore: " + openFiles.Count + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore);
+
+ bool success = false;
+
+ try
+ {
+ InitFlushState(true);
+ closedFiles.Clear();
+
+ consumer.CloseDocStore(flushState);
+ System.Diagnostics.Debug.Assert(0 == openFiles.Count);
+
+ System.String s = docStoreSegment;
+ docStoreSegment = null;
+ docStoreOffset = 0;
+ numDocsInStore = 0;
+ success = true;
+ return s;
+ }
+ finally
+ {
+ if (!success)
+ {
+ Abort();
+ }
+ }
+ }
+ }
+
+ private ICollection<string> abortedFiles; // List of files that were written before last abort()
+
+ private SegmentWriteState flushState;
+
+ internal ICollection<string> AbortedFiles()
+ {
+ return abortedFiles;
+ }
+
+ internal void Message(System.String message)
+ {
+ if (infoStream != null)
+ writer.Message("DW: " + message);
+ }
+
+ internal IList<string> openFiles = new List<string>();
+ internal IList<string> closedFiles = new List<string>();
+
+ /* Returns Collection of files in use by this instance,
+ * including any flushed segments. */
+ internal IList<string> OpenFiles()
+ {
+ lock (this)
+ {
+ // ToArray returns a copy
+ return openFiles.ToArray();
+ }
+ }
+
+ internal IList<string> ClosedFiles()
+ {
+ lock (this)
+ {
+ // ToArray returns a copy
+ return closedFiles.ToArray();
+ }
+ }
+
+ internal void AddOpenFile(System.String name)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(!openFiles.Contains(name));
+ openFiles.Add(name);
+ }
+ }
+
+ internal void RemoveOpenFile(System.String name)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(openFiles.Contains(name));
+ openFiles.Remove(name);
+ closedFiles.Add(name);
+ }
+ }
+
+ internal void SetAborting()
+ {
+ lock (this)
+ {
+ aborting = true;
+ }
+ }
+
+ /// <summary>Called if we hit an exception at a bad time (when
+ /// updating the index files) and must discard all
+ /// currently buffered docs. This resets our state,
+ /// discarding any docs added since last flush.
+ /// </summary>
+ internal void Abort()
+ {
+ lock (this)
+ {
+ try
+ {
+ if (infoStream != null)
+ {
+ Message("docWriter: now abort");
+ }
+
+ // Forcefully remove waiting ThreadStates from line
+ waitQueue.Abort();
+
+ // Wait for all other threads to finish with
+ // DocumentsWriter:
+ PauseAllThreads();
+
+ try
+ {
+
+ System.Diagnostics.Debug.Assert(0 == waitQueue.numWaiting);
+
+ waitQueue.waitingBytes = 0;
+
+ try
+ {
+ abortedFiles = OpenFiles();
+ }
+ catch (System.Exception)
+ {
+ abortedFiles = null;
+ }
+
+ deletesInRAM.Clear();
+ deletesFlushed.Clear();
+ openFiles.Clear();
+
+ for (int i = 0; i < threadStates.Length; i++)
+ try
+ {
+ threadStates[i].consumer.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ try
+ {
+ consumer.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ docStoreSegment = null;
+ numDocsInStore = 0;
+ docStoreOffset = 0;
+
+ // Reset all postings data
+ DoAfterFlush();
+ }
+ finally
+ {
+ ResumeAllThreads();
+ }
+ }
+ finally
+ {
+ aborting = false;
+ System.Threading.Monitor.PulseAll(this);
+ if (infoStream != null)
+ {
+ Message("docWriter: done abort; abortedFiles=" + abortedFiles);
+ }
+ }
+ }
+ }
+
+ /// <summary>Reset after a flush </summary>
+ private void DoAfterFlush()
+ {
+ // All ThreadStates should be idle when we are called
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+ threadBindings.Clear();
+ waitQueue.Reset();
+ segment = null;
+ numDocsInRAM = 0;
+ nextDocID = 0;
+ bufferIsFull = false;
+ flushPending = false;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].DoAfterFlush();
+ numBytesUsed = 0;
+ }
+
+ // Returns true if an abort is in progress
+ internal bool PauseAllThreads()
+ {
+ lock (this)
+ {
+ pauseThreads++;
+ while (!AllThreadsIdle())
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+
+ return aborting;
+ }
+ }
+
+ internal void ResumeAllThreads()
+ {
+ lock (this)
+ {
+ pauseThreads--;
+ System.Diagnostics.Debug.Assert(pauseThreads >= 0);
+ if (0 == pauseThreads)
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ private bool AllThreadsIdle()
+ {
+ lock (this)
+ {
+ for (int i = 0; i < threadStates.Length; i++)
+ if (!threadStates[i].isIdle)
+ return false;
+ return true;
+ }
+ }
+
+ internal bool AnyChanges
+ {
+ get
+ {
+ lock (this)
+ {
+ return numDocsInRAM != 0 || deletesInRAM.numTerms != 0 || deletesInRAM.docIDs.Count != 0 ||
+ deletesInRAM.queries.Count != 0;
+ }
+ }
+ }
+
+ private void InitFlushState(bool onlyDocStore)
+ {
+ lock (this)
+ {
+ InitSegmentName(onlyDocStore);
+ flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.TermIndexInterval);
+ }
+ }
+
+ /// <summary>Flush all pending docs to a new segment </summary>
+ internal int Flush(bool closeDocStore)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+
+ System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
+
+ System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
+ System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
+ System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
+
+ InitFlushState(false);
+
+ docStoreOffset = numDocsInStore;
+
+ if (infoStream != null)
+ Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
+
+ bool success = false;
+
+ try
+ {
+
+ if (closeDocStore)
+ {
+ System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
+ System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName));
+ CloseDocStore();
+ flushState.numDocsInStore = 0;
+ }
+
+ ICollection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
+ for (int i = 0; i < threadStates.Length; i++)
+ threads.Add(threadStates[i].consumer);
+ consumer.Flush(threads, flushState);
+
+ if (infoStream != null)
+ {
+ SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory);
+ long newSegmentSize = si.SizeInBytes();
+ System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
+ new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) });
+ Message(message);
+ }
+
+ flushedDocCount += flushState.numDocs;
+
+ DoAfterFlush();
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ Abort();
+ }
+ }
+
+ System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
+
+ return flushState.numDocs;
+ }
+ }
+
+ internal ICollection<string> GetFlushedFiles()
+ {
+ return flushState.flushedFiles;
+ }
+
+ /// <summary>Build compound file for the segment we just flushed </summary>
+ internal void CreateCompoundFile(System.String segment)
+ {
+
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ foreach(string flushedFile in flushState.flushedFiles)
+ {
+ cfsWriter.AddFile(flushedFile);
+ }
+
+ // Perform the merge
+ cfsWriter.Close();
+ }
+
+ /// <summary>Set flushPending if it is not already set and returns
+ /// whether it was set. This is used by IndexWriter to
+ /// trigger a single flush even when multiple threads are
+ /// trying to do so.
+ /// </summary>
+ internal bool SetFlushPending()
+ {
+ lock (this)
+ {
+ if (flushPending)
+ return false;
+ else
+ {
+ flushPending = true;
+ return true;
+ }
+ }
+ }
+
+ internal void ClearFlushPending()
+ {
+ lock (this)
+ {
+ flushPending = false;
+ }
+ }
+
+ internal void PushDeletes()
+ {
+ lock (this)
+ {
+ deletesFlushed.Update(deletesInRAM);
+ }
+ }
+
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ lock (this)
+ {
+ closed = true;
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ internal void InitSegmentName(bool onlyDocStore)
+ {
+ lock (this)
+ {
+ if (segment == null && (!onlyDocStore || docStoreSegment == null))
+ {
+ segment = writer.NewSegmentName();
+ System.Diagnostics.Debug.Assert(numDocsInRAM == 0);
+ }
+ if (docStoreSegment == null)
+ {
+ docStoreSegment = segment;
+ System.Diagnostics.Debug.Assert(numDocsInStore == 0);
+ }
+ }
+ }
+
+ /// <summary>Returns a free (idle) ThreadState that may be used for
+ /// indexing this one document. This call also pauses if a
+ /// flush is pending. If delTerm is non-null then we
+ /// buffer this deleted term after the thread state has
+ /// been acquired.
+ /// </summary>
+ internal DocumentsWriterThreadState GetThreadState(Document doc, Term delTerm)
+ {
+ lock (this)
+ {
+
+ // First, find a thread state. If this thread already
+ // has affinity to a specific ThreadState, use that one
+ // again.
+ DocumentsWriterThreadState state = threadBindings[ThreadClass.Current()];
+ if (state == null)
+ {
+
+ // First time this thread has called us since last
+ // flush. Find the least loaded thread state:
+ DocumentsWriterThreadState minThreadState = null;
+ for (int i = 0; i < threadStates.Length; i++)
+ {
+ DocumentsWriterThreadState ts = threadStates[i];
+ if (minThreadState == null || ts.numThreads < minThreadState.numThreads)
+ minThreadState = ts;
+ }
+ if (minThreadState != null && (minThreadState.numThreads == 0 || threadStates.Length >= MAX_THREAD_STATE))
+ {
+ state = minThreadState;
+ state.numThreads++;
+ }
+ else
+ {
+ // Just create a new "private" thread state
+ DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1 + threadStates.Length];
+ if (threadStates.Length > 0)
+ Array.Copy(threadStates, 0, newArray, 0, threadStates.Length);
+ state = newArray[threadStates.Length] = new DocumentsWriterThreadState(this);
+ threadStates = newArray;
+ }
+ threadBindings[ThreadClass.Current()] = state;
+ }
+
+ // Next, wait until my thread state is idle (in case
+ // it's shared with other threads) and for threads to
+ // not be paused nor a flush pending:
+ WaitReady(state);
+
+ // Allocate segment name if this is the first doc since
+ // last flush:
+ InitSegmentName(false);
+
+ state.isIdle = false;
+
+ bool success = false;
+ try
+ {
+ state.docState.docID = nextDocID;
+
+ System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init start"));
+
+ if (delTerm != null)
+ {
+ AddDeleteTerm(delTerm, state.docState.docID);
+ state.doFlushAfter = TimeToFlushDeletes();
+ }
+
+ System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init after delTerm"));
+
+ nextDocID++;
+ numDocsInRAM++;
+
+ // We must at this point commit to flushing to ensure we
+ // always get N docs when we flush by doc count, even if
+ // > 1 thread is adding documents:
+ if (!flushPending && maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs)
+ {
+ flushPending = true;
+ state.doFlushAfter = true;
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Forcefully idle this ThreadState:
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ if (state.doFlushAfter)
+ {
+ state.doFlushAfter = false;
+ flushPending = false;
+ }
+ }
+ }
+
+ return state;
+ }
+ }
+
+ /// <summary>Returns true if the caller (IndexWriter) should now
+ /// flush.
+ /// </summary>
+ internal bool AddDocument(Document doc, Analyzer analyzer)
+ {
+ return UpdateDocument(doc, analyzer, null);
+ }
+
+ internal bool UpdateDocument(Term t, Document doc, Analyzer analyzer)
+ {
+ return UpdateDocument(doc, analyzer, t);
+ }
+
+ internal bool UpdateDocument(Document doc, Analyzer analyzer, Term delTerm)
+ {
+
+ // This call is synchronized but fast
+ DocumentsWriterThreadState state = GetThreadState(doc, delTerm);
+
+ DocState docState = state.docState;
+ docState.doc = doc;
+ docState.analyzer = analyzer;
+
+ bool doReturnFalse = false; // {{Aroush-2.9}} to handle return from finally clause
+
+ bool success = false;
+ try
+ {
+ // This call is not synchronized and does all the
+ // work
+ DocWriter perDoc;
+ try
+ {
+ perDoc = state.consumer.ProcessDocument();
+ }
+ finally
+ {
+ docState.Clear();
+ }
+ // This call is synchronized but fast
+ FinishDocument(state, perDoc);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ lock (this)
+ {
+
+ if (aborting)
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ Abort();
+ }
+ else
+ {
+ skipDocWriter.docID = docState.docID;
+ bool success2 = false;
+ try
+ {
+ waitQueue.Add(skipDocWriter);
+ success2 = true;
+ }
+ finally
+ {
+ if (!success2)
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ Abort();
+ // return false; // {{Aroush-2.9}} this 'return false' is move to outside finally
+ doReturnFalse = true;
+ }
+ }
+
+ if (!doReturnFalse) // {{Aroush-2.9}} added because of the above 'return false' removal
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+
+ // If this thread state had decided to flush, we
+ // must clear it so another thread can flush
+ if (state.doFlushAfter)
+ {
+ state.doFlushAfter = false;
+ flushPending = false;
+ System.Threading.Monitor.PulseAll(this);
+ }
+
+ // Immediately mark this document as deleted
+ // since likely it was partially added. This
+ // keeps indexing as "all or none" (atomic) when
+ // adding a document:
+ AddDeleteDocID(state.docState.docID);
+ }
+ }
+ }
+ }
+ }
+
+ if (doReturnFalse) // {{Aroush-2.9}} see comment abouve
+ {
+ return false;
+ }
+
+ return state.doFlushAfter || TimeToFlushDeletes();
+ }
+
+ // for testing
+ internal int GetNumBufferedDeleteTerms()
+ {
+ lock (this)
+ {
+ return deletesInRAM.numTerms;
+ }
+ }
+
+ // for testing
+ internal IDictionary<Term, BufferedDeletes.Num> GetBufferedDeleteTerms()
+ {
+ lock (this)
+ {
+ return deletesInRAM.terms;
+ }
+ }
+
+ /// <summary>Called whenever a merge has completed and the merged segments had deletions </summary>
+ internal void RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
+ {
+ lock (this)
+ {
+ if (docMaps == null)
+ // The merged segments had no deletes so docIDs did not change and we have nothing to do
+ return ;
+ MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount);
+ deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
+ deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
+ flushedDocCount -= mapper.docShift;
+ }
+ }
+
+ private void WaitReady(DocumentsWriterThreadState state)
+ {
+ lock (this)
+ {
+
+ while (!closed && ((state != null && !state.isIdle) || pauseThreads != 0 || flushPending || aborting))
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+
+ if (closed)
+ throw new AlreadyClosedException("this IndexWriter is closed");
+ }
+ }
+
+ internal bool BufferDeleteTerms(Term[] terms)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ for (int i = 0; i < terms.Length; i++)
+ AddDeleteTerm(terms[i], numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteTerm(Term term)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ AddDeleteTerm(term, numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteQueries(Query[] queries)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ for (int i = 0; i < queries.Length; i++)
+ AddDeleteQuery(queries[i], numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteQuery(Query query)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ AddDeleteQuery(query, numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool DeletesFull()
+ {
+ lock (this)
+ {
+ return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed + numBytesUsed) >= ramBufferSize) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
+ }
+ }
+
+ internal bool DoApplyDeletes()
+ {
+ lock (this)
+ {
+ // Very similar to deletesFull(), except we don't count
+ // numBytesAlloc, because we are checking whether
+ // deletes (alone) are consuming too many resources now
+ // and thus should be applied. We apply deletes if RAM
+ // usage is > 1/2 of our allowed RAM buffer, to prevent
+ // too-frequent flushing of a long tail of tiny segments
+ // when merges (which always apply deletes) are
+ // infrequent.
+ return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed) >= ramBufferSize / 2) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
+ }
+ }
+
+ private bool TimeToFlushDeletes()
+ {
+ lock (this)
+ {
+ return (bufferIsFull || DeletesFull()) && SetFlushPending();
+ }
+ }
+
+ internal int MaxBufferedDeleteTerms
+ {
+ set { this.maxBufferedDeleteTerms = value; }
+ get { return maxBufferedDeleteTerms; }
+ }
+
+ internal bool HasDeletes()
+ {
+ lock (this)
+ {
+ return deletesFlushed.Any();
+ }
+ }
+
+ internal bool ApplyDeletes(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ if (!HasDeletes())
+ return false;
+
+ if (infoStream != null)
+ Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments.");
+
+ int infosEnd = infos.Count;
+
+ int docStart = 0;
+ bool any = false;
+ for (int i = 0; i < infosEnd; i++)
+ {
+
+ // Make sure we never attempt to apply deletes to
+ // segment in external dir
+ System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory);
+
+ SegmentReader reader = writer.readerPool.Get(infos.Info(i), false);
+ try
+ {
+ any |= ApplyDeletes(reader, docStart);
+ docStart += reader.MaxDoc;
+ }
+ finally
+ {
+ writer.readerPool.Release(reader);
+ }
+ }
+
+ deletesFlushed.Clear();
+
+ return any;
+ }
+ }
+
+ // used only by assert
+ private Term lastDeleteTerm;
+
+ // used only by assert
+ private bool CheckDeleteTerm(Term term)
+ {
+ if (term != null) {
+ System.Diagnostics.Debug.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm=" + lastDeleteTerm + " vs term=" + term);
+ }
+ lastDeleteTerm = term;
+ return true;
+ }
+
+ // Apply buffered delete terms, queries and docIDs to the
+ // provided reader
+ private bool ApplyDeletes(IndexReader reader, int docIDStart)
+ {
+ lock (this)
+ {
+ int docEnd = docIDStart + reader.MaxDoc;
+ bool any = false;
+
+ System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));
+
+ // Delete by term
+ TermDocs docs = reader.TermDocs();
+ try
+ {
+ foreach(KeyValuePair<Term, BufferedDeletes.Num> entry in deletesFlushed.terms)
+ {
+ Term term = entry.Key;
+ // LUCENE-2086: we should be iterating a TreeMap,
+ // here, so terms better be in order:
+ System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
+ docs.Seek(term);
+ int limit = entry.Value.GetNum();
+ while (docs.Next())
+ {
+ int docID = docs.Doc;
+ if (docIDStart + docID >= limit)
+ break;
+ reader.DeleteDocument(docID);
+ any = true;
+ }
+ }
+ }
+ finally
+ {
+ docs.Close();
+ }
+
+ // Delete by docID
+ foreach(int docIdInt in deletesFlushed.docIDs)
+ {
+ int docID = docIdInt;
+ if (docID >= docIDStart && docID < docEnd)
+ {
+ reader.DeleteDocument(docID - docIDStart);
+ any = true;
+ }
+ }
+
+ // Delete by query
+ IndexSearcher searcher = new IndexSearcher(reader);
+ foreach(KeyValuePair<Query, int> entry in deletesFlushed.queries)
+ {
+ Query query = (Query) entry.Key;
+ int limit = (int)entry.Value;
+ Weight weight = query.Weight(searcher);
+ Scorer scorer = weight.Scorer(reader, true, false);
+ if (scorer != null)
+ {
+ while (true)
+ {
+ int doc = scorer.NextDoc();
+ if (((long) docIDStart) + doc >= limit)
+ break;
+ reader.DeleteDocument(doc);
+ any = true;
+ }
+ }
+ }
+ searcher.Close();
+ return any;
+ }
+ }
+
+ // Buffer a term in bufferedDeleteTerms, which records the
+ // current number of documents buffered in ram so that the
+ // delete term will be applied to those documents as well
+ // as the disk segments.
+ private void AddDeleteTerm(Term term, int docCount)
+ {
+ lock (this)
+ {
+ BufferedDeletes.Num num = deletesInRAM.terms[term];
+ int docIDUpto = flushedDocCount + docCount;
+ if (num == null)
+ deletesInRAM.terms[term] = new BufferedDeletes.Num(docIDUpto);
+ else
+ num.SetNum(docIDUpto);
+ deletesInRAM.numTerms++;
+
+ deletesInRAM.AddBytesUsed(BYTES_PER_DEL_TERM + term.Text.Length * CHAR_NUM_BYTE);
+ }
+ }
+
+ // Buffer a specific docID for deletion. Currently only
+ // used when we hit a exception when adding a document
+ private void AddDeleteDocID(int docID)
+ {
+ lock (this)
+ {
+ deletesInRAM.docIDs.Add(flushedDocCount + docID);
+ deletesInRAM.AddBytesUsed(BYTES_PER_DEL_DOCID);
+ }
+ }
+
+ private void AddDeleteQuery(Query query, int docID)
+ {
+ lock (this)
+ {
+ deletesInRAM.queries[query] = flushedDocCount + docID;
+ deletesInRAM.AddBytesUsed(BYTES_PER_DEL_QUERY);
+ }
+ }
+
+ internal bool DoBalanceRAM()
+ {
+ lock (this)
+ {
+ return ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && !bufferIsFull && (numBytesUsed + deletesInRAM.bytesUsed + deletesFlushed.bytesUsed >= ramBufferSize || numBytesAlloc >= freeTrigger);
+ }
+ }
+
+ /// <summary>Does the synchronized work to finish/flush the
+ /// inverted document.
+ /// </summary>
+ private void FinishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter)
+ {
+
+ if (DoBalanceRAM())
+ // Must call this w/o holding synchronized(this) else
+ // we'll hit deadlock:
+ BalanceRAM();
+
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(docWriter == null || docWriter.docID == perThread.docState.docID);
+
+ if (aborting)
+ {
+
+ // We are currently aborting, and another thread is
+ // waiting for me to become idle. We just forcefully
+ // idle this threadState; it will be fully reset by
+ // abort()
+ if (docWriter != null)
+ try
+ {
+ docWriter.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ perThread.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ return ;
+ }
+
+ bool doPause;
+
+ if (docWriter != null)
+ doPause = waitQueue.Add(docWriter);
+ else
+ {
+ skipDocWriter.docID = perThread.docState.docID;
+ doPause = waitQueue.Add(skipDocWriter);
+ }
+
+ if (doPause)
+ WaitForWaitQueue();
+
+ if (bufferIsFull && !flushPending)
+ {
+ flushPending = true;
+ perThread.doFlushAfter = true;
+ }
+
+ perThread.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ internal void WaitForWaitQueue()
+ {
+ lock (this)
+ {
+ do
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+ while (!waitQueue.DoResume());
+ }
+ }
+
+ internal class SkipDocWriter:DocWriter
+ {
+ public override void Finish()
+ {
+ }
+ public override void Abort()
+ {
+ }
+ public override long SizeInBytes()
+ {
+ return 0;
+ }
+ }
+ internal SkipDocWriter skipDocWriter;
+
+ internal long GetRAMUsed()
+ {
+ return numBytesUsed + deletesInRAM.bytesUsed + deletesFlushed.bytesUsed;
+ }
+
+ internal long numBytesAlloc;
+ internal long numBytesUsed;
+
+ internal System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
+
+ // Coarse estimates used to measure RAM usage of buffered deletes
+ internal const int OBJECT_HEADER_BYTES = 8;
+ internal static readonly int POINTER_NUM_BYTE;
+ internal const int INT_NUM_BYTE = 4;
+ internal const int CHAR_NUM_BYTE = 2;
+
+ /* Rough logic: HashMap has an array[Entry] w/ varying
+ load factor (say 2 * POINTER). Entry is object w/ Term
+ key, BufferedDeletes.Num val, int hash, Entry next
+ (OBJ_HEADER + 3*POINTER + INT). Term is object w/
+ String field and String text (OBJ_HEADER + 2*POINTER).
+ We don't count Term's field since it's interned.
+ Term's text is String (OBJ_HEADER + 4*INT + POINTER +
+ OBJ_HEADER + string.length*CHAR). BufferedDeletes.num is
+ OBJ_HEADER + INT. */
+
+ internal static readonly int BYTES_PER_DEL_TERM = 8 * POINTER_NUM_BYTE + 5 * OBJECT_HEADER_BYTES + 6 * INT_NUM_BYTE;
+
+ /* Rough logic: del docIDs are List<Integer>. Say list
+ allocates ~2X size (2*POINTER). Integer is OBJ_HEADER
+ + int */
+ internal static readonly int BYTES_PER_DEL_DOCID = 2 * POINTER_NUM_BYTE + OBJECT_HEADER_BYTES + INT_NUM_BYTE;
+
+ /* Rough logic: HashMap has an array[Entry] w/ varying
+ load factor (say 2 * POINTER). Entry is object w/
+ Query key, Integer val, int hash, Entry next
+ (OBJ_HEADER + 3*POINTER + INT). Query we often
+ undercount (say 24 bytes). Integer is OBJ_HEADER + INT. */
+ internal static readonly int BYTES_PER_DEL_QUERY = 5 * POINTER_NUM_BYTE + 2 * OBJECT_HEADER_BYTES + 2 * INT_NUM_BYTE + 24;
+
+ /* Initial chunks size of the shared byte[] blocks used to
+ store postings data */
+ internal const int BYTE_BLOCK_SHIFT = 15;
+ internal static readonly int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
+ internal static readonly int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
+ internal static readonly int BYTE_BLOCK_NOT_MASK = ~ BYTE_BLOCK_MASK;
+
+ internal class ByteBlockAllocator : ByteBlockPool.Allocator
+ {
+ public ByteBlockAllocator(DocumentsWriter enclosingInstance, int blockSize)
+ {
+ this.blockSize = blockSize;
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(DocumentsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocumentsWriter enclosingInstance;
+ public DocumentsWriter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ int blockSize;
+ internal List<byte[]> freeByteBlocks = new List<byte[]>();
+
+ /* Allocate another byte[] from the shared pool */
+ public /*internal*/ override byte[] GetByteBlock(bool trackAllocations)
+ {
+ lock (Enclosing_Instance)
+ {
+ int size = freeByteBlocks.Count;
+ byte[] b;
+ if (0 == size)
+ {
+ // Always record a block allocated, even if
+ // trackAllocations is false. This is necessary
+ // because this block will be shared between
+ // things that don't track allocations (term
+ // vectors) and things that do (freq/prox
+ // postings).
+ Enclosing_Instance.numBytesAlloc += blockSize;
+ b = new byte[blockSize];
+ }
+ else
+ {
+ b = freeByteBlocks[size - 1];
+ freeByteBlocks.RemoveAt(size - 1);
+ }
+ if (trackAllocations)
+ Enclosing_Instance.numBytesUsed += blockSize;
+ System.Diagnostics.Debug.Assert(Enclosing_Instance.numBytesUsed <= Enclosing_Instance.numBytesAlloc);
+ return b;
+ }
+ }
+
+ /* Return byte[]'s to the pool */
+ public /*internal*/ override void RecycleByteBlocks(byte[][] blocks, int start, int end)
+ {
+ lock (Enclosing_Instance)
+ {
+ for (int i = start; i < end; i++)
+ {
+ freeByteBlocks.Add(blocks[i]);
+ blocks[i] = null;
+ }
+ }
+ }
+
+ public /*internal*/ override void RecycleByteBlocks(IList<byte[]> blocks)
+ {
+ lock (Enclosing_Instance)
+ {
+ int size = blocks.Count;
+ for(int i=0;i<size;i++)
+ freeByteBlocks.Add(blocks[i]);
+ }
+ }
+ }
+
+ /* Initial chunks size of the shared int[] blocks used to
+ store postings data */
+ internal const int INT_BLOCK_SHIFT = 13;
+ internal static readonly int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT;
+ internal static readonly int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1;
+
+ private List<int[]> freeIntBlocks = new List<int[]>();
+
+ /* Allocate another int[] from the shared pool */
+ internal int[] GetIntBlock(bool trackAllocations)
+ {
+ lock (this)
+ {
+ int size = freeIntBlocks.Count;
+ int[] b;
+ if (0 == size)
+ {
+ // Always record a block allocated, even if
+ // trackAllocations is false. This is necessary
+ // because this block will be shared between
+ // things that don't track allocations (term
+ // vectors) and things that do (freq/prox
+ // postings).
+ numBytesAlloc += INT_BLOCK_SIZE * INT_NUM_BYTE;
+ b = new int[INT_BLOCK_SIZE];
+ }
+ else
+ {
+ b = freeIntBlocks[size - 1];
+ freeIntBlocks.RemoveAt(size - 1);
+ }
+ if (trackAllocations)
+ numBytesUsed += INT_BLOCK_SIZE * INT_NUM_BYTE;
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ return b;
+ }
+ }
+
+ internal void BytesAllocated(long numBytes)
+ {
+ lock (this)
+ {
+ numBytesAlloc += numBytes;
+ }
+ }
+
+ internal void BytesUsed(long numBytes)
+ {
+ lock (this)
+ {
+ numBytesUsed += numBytes;
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ }
+ }
+
+ /* Return int[]s to the pool */
+ internal void RecycleIntBlocks(int[][] blocks, int start, int end)
+ {
+ lock (this)
+ {
+ for (int i = start; i < end; i++)
+ {
+ freeIntBlocks.Add(blocks[i]);
+ blocks[i] = null;
+ }
+ }
+ }
+
+ internal ByteBlockAllocator byteBlockAllocator;
+
+ internal static int PER_DOC_BLOCK_SIZE = 1024;
+
+ ByteBlockAllocator perDocAllocator;
+
+ /* Initial chunk size of the shared char[] blocks used to
+ store term text */
+ internal const int CHAR_BLOCK_SHIFT = 14;
+ internal static readonly int CHAR_BLOCK_SIZE = 1 << CHAR_BLOCK_SHIFT;
+ internal static readonly int CHAR_BLOCK_MASK = CHAR_BLOCK_SIZE - 1;
+
+ internal static readonly int MAX_TERM_LENGTH = CHAR_BLOCK_SIZE - 1;
+
+ private List<char[]> freeCharBlocks = new List<char[]>();
+
+ /* Allocate another char[] from the shared pool */
+ internal char[] GetCharBlock()
+ {
+ lock (this)
+ {
+ int size = freeCharBlocks.Count;
+ char[] c;
+ if (0 == size)
+ {
+ numBytesAlloc += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE;
+ c = new char[CHAR_BLOCK_SIZE];
+ }
+ else
+ {
+ c = freeCharBlocks[size - 1];
+ freeCharBlocks.RemoveAt(size - 1);
+ }
+ // We always track allocations of char blocks, for now,
+ // because nothing that skips allocation tracking
+ // (currently only term vectors) uses its own char
+ // blocks.
+ numBytesUsed += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE;
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ return c;
+ }
+ }
+
+ /* Return char[]s to the pool */
+ internal void RecycleCharBlocks(char[][] blocks, int numBlocks)
+ {
+ lock (this)
+ {
+ for (int i = 0; i < numBlocks; i++)
+ {
+ freeCharBlocks.Add(blocks[i]);
+ blocks[i] = null;
+ }
+ }
+ }
+
+ internal System.String ToMB(long v)
+ {
+ return System.String.Format(nf, "{0:f}", new System.Object[] { (v / 1024F / 1024F) });
+ }
+
+
+ /* We have four pools of RAM: Postings, byte blocks
+ * (holds freq/prox posting data), char blocks (holds
+ * characters in the term) and per-doc buffers (stored fields/term vectors).
+ * Different docs require varying amount of storage from
+ * these four classes.
+ *
+ * For example, docs with many unique single-occurrence
+ * short terms will use up the Postings RAM and hardly any
+ * of the other two. Whereas docs with very large terms
+ * will use alot of char blocks RAM and relatively less of
+ * the other two. This method just frees allocations from
+ * the pools once we are over-budget, which balances the
+ * pools to match the current docs. */
+ internal void BalanceRAM()
+ {
+
+ // We flush when we've used our target usage
+ long flushTrigger = ramBufferSize;
+
+ long deletesRAMUsed = deletesInRAM.bytesUsed + deletesFlushed.bytesUsed;
+
+ if (numBytesAlloc + deletesRAMUsed > freeTrigger)
+ {
+
+ if (infoStream != null)
+ Message(
+ " RAM: now balance allocations: usedMB=" + ToMB(numBytesUsed) +
+ " vs trigger=" + ToMB(flushTrigger) +
+ " allocMB=" + ToMB(numBytesAlloc) +
+ " deletesMB=" + ToMB(deletesRAMUsed) +
+ " vs trigger=" + ToMB(freeTrigger) +
+ " byteBlockFree=" + ToMB(byteBlockAllocator.freeByteBlocks.Count * BYTE_BLOCK_SIZE) +
+ " perDocFree=" + ToMB(perDocAllocator.freeByteBlocks.Count * PER_DOC_BLOCK_SIZE) +
+ " charBlockFree=" + ToMB(freeCharBlocks.Count * CHAR_BLOCK_SIZE * CHAR_NUM_BYTE));
+
+ long startBytesAlloc = numBytesAlloc + deletesRAMUsed;
+
+ int iter = 0;
+
+ // We free equally from each pool in 32 KB
+ // chunks until we are below our threshold
+ // (freeLevel)
+
+ bool any = true;
+
+ while (numBytesAlloc + deletesRAMUsed > freeLevel)
+ {
+
+ lock (this)
+ {
+ if (0 == perDocAllocator.freeByteBlocks.Count
+ && 0 == byteBlockAllocator.freeByteBlocks.Count
+ && 0 == freeCharBlocks.Count
+ && 0 == freeIntBlocks.Count
+ && !any)
+ {
+ // Nothing else to free -- must flush now.
+ bufferIsFull = numBytesUsed + deletesRAMUsed > flushTrigger;
+ if (infoStream != null)
+ {
+ if (bufferIsFull)
+ Message(" nothing to free; now set bufferIsFull");
+ else
+ Message(" nothing to free");
+ }
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ break;
+ }
+
+ if ((0 == iter % 5) && byteBlockAllocator.freeByteBlocks.Count > 0)
+ {
+ byteBlockAllocator.freeByteBlocks.RemoveAt(byteBlockAllocator.freeByteBlocks.Count - 1);
+ numBytesAlloc -= BYTE_BLOCK_SIZE;
+ }
+
+ if ((1 == iter % 5) && freeCharBlocks.Count > 0)
+ {
+ freeCharBlocks.RemoveAt(freeCharBlocks.Count - 1);
+ numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE;
+ }
+
+ if ((2 == iter % 5) && freeIntBlocks.Count > 0)
+ {
+ freeIntBlocks.RemoveAt(freeIntBlocks.Count - 1);
+ numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE;
+ }
+
+ if ((3 == iter % 5) && perDocAllocator.freeByteBlocks.Count > 0)
+ {
+ // Remove upwards of 32 blocks (each block is 1K)
+ for (int i = 0; i < 32; ++i)
+ {
+ perDocAllocator.freeByteBlocks.RemoveAt(perDocAllocator.freeByteBlocks.Count - 1);
+ numBytesAlloc -= PER_DOC_BLOCK_SIZE;
+ if (perDocAllocator.freeByteBlocks.Count == 0)
+ {
+ break;
+ }
+ }
+ }
+ }
+
+ if ((4 == iter % 5) && any)
+ // Ask consumer to free any recycled state
+ any = consumer.FreeRAM();
+
+ iter++;
+ }
+
+ if (infoStream != null)
+ Message(System.String.Format(nf, " after free: freedMB={0:f} usedMB={1:f} allocMB={2:f}",
+ new System.Object[] { ((startBytesAlloc - numBytesAlloc) / 1024.0 / 1024.0), (numBytesUsed / 1024.0 / 1024.0), (numBytesAlloc / 1024.0 / 1024.0) }));
+ }
+ else
+ {
+ // If we have not crossed the 100% mark, but have
+ // crossed the 95% mark of RAM we are actually
+ // using, go ahead and flush. This prevents
+ // over-allocating and then freeing, with every
+ // flush.
+ lock (this)
+ {
+
+ if (numBytesUsed + deletesRAMUsed > flushTrigger)
+ {
+ if (infoStream != null)
+ Message(System.String.Format(nf, " RAM: now flush @ usedMB={0:f} allocMB={1:f} triggerMB={2:f}",
+ new object[] { (numBytesUsed / 1024.0 / 1024.0), (numBytesAlloc / 1024.0 / 1024.0), (flushTrigger / 1024.0 / 1024.0) }));
+
+ bufferIsFull = true;
+ }
+ }
+ }
+ }
+
+ internal WaitQueue waitQueue;
+
+ internal class WaitQueue
+ {
+ private void InitBlock(DocumentsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocumentsWriter enclosingInstance;
+ public DocumentsWriter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal DocWriter[] waiting;
+ internal int nextWriteDocID;
+ internal int nextWriteLoc;
+ internal int numWaiting;
+ internal long waitingBytes;
+
+ public WaitQueue(DocumentsWriter enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ waiting = new DocWriter[10];
+ }
+
+ internal void Reset()
+ {
+ lock (this)
+ {
+ // NOTE: nextWriteLoc doesn't need to be reset
+ System.Diagnostics.Debug.Assert(numWaiting == 0);
+ System.Diagnostics.Debug.Assert(waitingBytes == 0);
+ nextWriteDocID = 0;
+ }
+ }
+
+ internal bool DoResume()
+ {
+ lock (this)
+ {
+ return waitingBytes <= Enclosing_Instance.waitQueueResumeBytes;
+ }
+ }
+
+ internal bool DoPause()
+ {
+ lock (this)
+ {
+ return waitingBytes > Enclosing_Instance.waitQueuePauseBytes;
+ }
+ }
+
+ internal void Abort()
+ {
+ lock (this)
+ {
+ int count = 0;
+ for (int i = 0; i < waiting.Length; i++)
+ {
+ DocWriter doc = waiting[i];
+ if (doc != null)
+ {
+ doc.Abort();
+ waiting[i] = null;
+ count++;
+ }
+ }
+ waitingBytes = 0;
+ System.Diagnostics.Debug.Assert(count == numWaiting);
+ numWaiting = 0;
+ }
+ }
+
+ private void WriteDocument(DocWriter doc)
+ {
+ System.Diagnostics.Debug.Assert(doc == Enclosing_Instance.skipDocWriter || nextWriteDocID == doc.docID);
+ bool success = false;
+ try
+ {
+ doc.Finish();
+ nextWriteDocID++;
+ Enclosing_Instance.numDocsInStore++;
+ nextWriteLoc++;
+ System.Diagnostics.Debug.Assert(nextWriteLoc <= waiting.Length);
+ if (nextWriteLoc == waiting.Length)
+ nextWriteLoc = 0;
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ Enclosing_Instance.SetAborting();
+ }
+ }
+
+ public bool Add(DocWriter doc)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(doc.docID >= nextWriteDocID);
+
+ if (doc.docID == nextWriteDocID)
+ {
+ WriteDocument(doc);
+ while (true)
+ {
+ doc = waiting[nextWriteLoc];
+ if (doc != null)
+ {
+ numWaiting--;
+ waiting[nextWriteLoc] = null;
+ waitingBytes -= doc.SizeInBytes();
+ WriteDocument(doc);
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+
+ // I finished before documents that were added
+ // before me. This can easily happen when I am a
+ // small doc and the docs before me were large, or,
+ // just due to luck in the thread scheduling. Just
+ // add myself to the queue and when that large doc
+ // finishes, it will flush me:
+ int gap = doc.docID - nextWriteDocID;
+ if (gap >= waiting.Length)
+ {
+ // Grow queue
+ DocWriter[] newArray = new DocWriter[ArrayUtil.GetNextSize(gap)];
+ System.Diagnostics.Debug.Assert(nextWriteLoc >= 0);
+ Array.Copy(waiting, nextWriteLoc, newArray, 0, waiting.Length - nextWriteLoc);
+ Array.Copy(waiting, 0, newArray, waiting.Length - nextWriteLoc, nextWriteLoc);
+ nextWriteLoc = 0;
+ waiting = newArray;
+ gap = doc.docID - nextWriteDocID;
+ }
+
+ int loc = nextWriteLoc + gap;
+ if (loc >= waiting.Length)
+ loc -= waiting.Length;
+
+ // We should only wrap one time
+ System.Diagnostics.Debug.Assert(loc < waiting.Length);
+
+ // Nobody should be in my spot!
+ System.Diagnostics.Debug.Assert(waiting [loc] == null);
+ waiting[loc] = doc;
+ numWaiting++;
+ waitingBytes += doc.SizeInBytes();
+ }
+
+ return DoPause();
+ }
+ }
+ }
+ static DocumentsWriter()
+ {
+ DefaultIndexingChain = new AnonymousClassIndexingChain();
+ POINTER_NUM_BYTE = Constants.JRE_IS_64BIT?8:4;
+ }
+
+ public static int BYTE_BLOCK_SIZE_ForNUnit
+ {
+ get { return BYTE_BLOCK_SIZE; }
+ }
+
+ public static int CHAR_BLOCK_SIZE_ForNUnit
+ {
+ get { return CHAR_BLOCK_SIZE; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/DocumentsWriterThreadState.cs b/src/core/Index/DocumentsWriterThreadState.cs
new file mode 100644
index 0000000..e20fbee
--- /dev/null
+++ b/src/core/Index/DocumentsWriterThreadState.cs
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Used by DocumentsWriter to maintain per-thread state.
+ /// We keep a separate Posting hash and other state for each
+ /// thread and then merge postings hashes from all threads
+ /// when writing the segment.
+ /// </summary>
+ sealed class DocumentsWriterThreadState
+ {
+
+ internal bool isIdle = true; // false if this is currently in use by a thread
+ internal int numThreads = 1; // Number of threads that share this instance
+ internal bool doFlushAfter; // true if we should flush after processing current doc
+ internal DocConsumerPerThread consumer;
+ internal DocumentsWriter.DocState docState;
+
+ internal DocumentsWriter docWriter;
+
+ public DocumentsWriterThreadState(DocumentsWriter docWriter)
+ {
+ this.docWriter = docWriter;
+ docState = new DocumentsWriter.DocState();
+ docState.maxFieldLength = docWriter.maxFieldLength;
+ docState.infoStream = docWriter.infoStream;
+ docState.similarity = docWriter.similarity;
+ docState.docWriter = docWriter;
+ consumer = docWriter.consumer.AddThread(this);
+ }
+
+ internal void DoAfterFlush()
+ {
+ numThreads = 0;
+ doFlushAfter = false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FieldInfo.cs b/src/core/Index/FieldInfo.cs
new file mode 100644
index 0000000..bfca8af
--- /dev/null
+++ b/src/core/Index/FieldInfo.cs
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ public sealed class FieldInfo : System.ICloneable
+ {
+ internal System.String name;
+ internal bool isIndexed;
+ internal int number;
+
+ // true if term vector for this field should be stored
+ internal bool storeTermVector;
+ internal bool storeOffsetWithTermVector;
+ internal bool storePositionWithTermVector;
+
+ internal bool omitNorms; // omit norms associated with indexed fields
+ internal bool omitTermFreqAndPositions;
+
+ internal bool storePayloads; // whether this field stores payloads together with term positions
+
+ internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
+ {
+ name = na;
+ isIndexed = tk;
+ number = nu;
+ if (isIndexed)
+ {
+ this.storeTermVector = storeTermVector;
+ this.storeOffsetWithTermVector = storeOffsetWithTermVector;
+ this.storePositionWithTermVector = storePositionWithTermVector;
+ this.storePayloads = storePayloads;
+ this.omitNorms = omitNorms;
+ this.omitTermFreqAndPositions = omitTermFreqAndPositions;
+ }
+ else
+ {
+ // for non-indexed fields, leave defaults
+ this.storeTermVector = false;
+ this.storeOffsetWithTermVector = false;
+ this.storePositionWithTermVector = false;
+ this.storePayloads = false;
+ this.omitNorms = true;
+ this.omitTermFreqAndPositions = false;
+ }
+ }
+
+ public System.Object Clone()
+ {
+ return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+
+ internal void Update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
+ {
+ if (this.isIndexed != isIndexed)
+ {
+ this.isIndexed = true; // once indexed, always index
+ }
+ if (isIndexed)
+ {
+ // if updated field data is not for indexing, leave the updates out
+ if (this.storeTermVector != storeTermVector)
+ {
+ this.storeTermVector = true; // once vector, always vector
+ }
+ if (this.storePositionWithTermVector != storePositionWithTermVector)
+ {
+ this.storePositionWithTermVector = true; // once vector, always vector
+ }
+ if (this.storeOffsetWithTermVector != storeOffsetWithTermVector)
+ {
+ this.storeOffsetWithTermVector = true; // once vector, always vector
+ }
+ if (this.storePayloads != storePayloads)
+ {
+ this.storePayloads = true;
+ }
+ if (this.omitNorms != omitNorms)
+ {
+ this.omitNorms = false; // once norms are stored, always store
+ }
+ if (this.omitTermFreqAndPositions != omitTermFreqAndPositions)
+ {
+ this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
+ }
+ }
+ }
+
+ public bool storePayloads_ForNUnit
+ {
+ get { return storePayloads; }
+ }
+
+ public System.String name_ForNUnit
+ {
+ get { return name; }
+ }
+
+ public bool isIndexed_ForNUnit
+ {
+ get { return isIndexed; }
+ }
+
+ public bool omitNorms_ForNUnit
+ {
+ get { return omitNorms; }
+ }
+
+ public bool omitTermFreqAndPositions_ForNUnit
+ {
+ get { return omitTermFreqAndPositions; }
+ }
+
+ public bool storeTermVector_ForNUnit
+ {
+ get { return storeTermVector; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FieldInfos.cs b/src/core/Index/FieldInfos.cs
new file mode 100644
index 0000000..8c9cae6
--- /dev/null
+++ b/src/core/Index/FieldInfos.cs
@@ -0,0 +1,491 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Access to the Fieldable Info file that describes document fields and whether or
+ /// not they are indexed. Each segment has a separate Fieldable Info file. Objects
+ /// of this class are thread-safe for multiple readers, but only one thread can
+ /// be adding documents at a time, with no other reader or writer threads
+ /// accessing this object.
+ /// </summary>
+ public sealed class FieldInfos : ICloneable
+ {
+
+ // Used internally (ie not written to *.fnm files) for pre-2.9 files
+ public const int FORMAT_PRE = - 1;
+
+ // First used in 2.9; prior to 2.9 there was no format header
+ public const int FORMAT_START = - 2;
+
+ internal static readonly int CURRENT_FORMAT = FORMAT_START;
+
+ internal const byte IS_INDEXED = (0x1);
+ internal const byte STORE_TERMVECTOR = (0x2);
+ internal const byte STORE_POSITIONS_WITH_TERMVECTOR =(0x4);
+ internal const byte STORE_OFFSET_WITH_TERMVECTOR = (0x8);
+ internal const byte OMIT_NORMS = (0x10);
+ internal const byte STORE_PAYLOADS = (0x20);
+ internal const byte OMIT_TERM_FREQ_AND_POSITIONS = (0x40);
+
+ private readonly System.Collections.Generic.List<FieldInfo> byNumber = new System.Collections.Generic.List<FieldInfo>();
+ private readonly HashMap<string, FieldInfo> byName = new HashMap<string, FieldInfo>();
+ private int format;
+
+ public /*internal*/ FieldInfos()
+ {
+ }
+
+ /// <summary> Construct a FieldInfos object using the directory and the name of the file
+ /// IndexInput
+ /// </summary>
+ /// <param name="d">The directory to open the IndexInput from
+ /// </param>
+ /// <param name="name">The name of the file to open the IndexInput from in the Directory
+ /// </param>
+ /// <throws> IOException </throws>
+ public /*internal*/ FieldInfos(Directory d, String name)
+ {
+ IndexInput input = d.OpenInput(name);
+ try
+ {
+ try
+ {
+ Read(input, name);
+ }
+ catch (System.IO.IOException)
+ {
+ if (format == FORMAT_PRE)
+ {
+ // LUCENE-1623: FORMAT_PRE (before there was a
+ // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
+ // encoding; retry with input set to pre-utf8
+ input.Seek(0);
+ input.SetModifiedUTF8StringsMode();
+ byNumber.Clear();
+ byName.Clear();
+
+ bool rethrow = false;
+ try
+ {
+ Read(input, name);
+ }
+ catch (Exception)
+ {
+ // Ignore any new exception & set to throw original IOE
+ rethrow = true;
+ }
+ if(rethrow)
+ {
+ // Preserve stack trace
+ throw;
+ }
+ }
+ else
+ {
+ // The IOException cannot be caused by
+ // LUCENE-1623, so re-throw it
+ throw;
+ }
+ }
+ }
+ finally
+ {
+ input.Close();
+ }
+ }
+
+ /// <summary> Returns a deep clone of this FieldInfos instance.</summary>
+ public Object Clone()
+ {
+ lock (this)
+ {
+ var fis = new FieldInfos();
+ int numField = byNumber.Count;
+ for (int i = 0; i < numField; i++)
+ {
+ var fi = (FieldInfo)byNumber[i].Clone();
+ fis.byNumber.Add(fi);
+ fis.byName[fi.name] = fi;
+ }
+ return fis;
+ }
+ }
+
+ /// <summary>Adds field info for a Document. </summary>
+ public void Add(Document doc)
+ {
+ lock (this)
+ {
+ System.Collections.Generic.IList<IFieldable> fields = doc.GetFields();
+ foreach(IFieldable field in fields)
+ {
+ Add(field.Name, field.IsIndexed, field.IsTermVectorStored,
+ field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms,
+ false, field.OmitTermFreqAndPositions);
+ }
+ }
+ }
+
+ /// <summary>Returns true if any fields do not omitTermFreqAndPositions </summary>
+ internal bool HasProx()
+ {
+ int numFields = byNumber.Count;
+ for (int i = 0; i < numFields; i++)
+ {
+ FieldInfo fi = FieldInfo(i);
+ if (fi.isIndexed && !fi.omitTermFreqAndPositions)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// <summary> Add fields that are indexed. Whether they have termvectors has to be specified.
+ ///
+ /// </summary>
+ /// <param name="names">The names of the fields
+ /// </param>
+ /// <param name="storeTermVectors">Whether the fields store term vectors or not
+ /// </param>
+ /// <param name="storePositionWithTermVector">true if positions should be stored.
+ /// </param>
+ /// <param name="storeOffsetWithTermVector">true if offsets should be stored
+ /// </param>
+ public void AddIndexed(System.Collections.Generic.ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
+ {
+ lock (this)
+ {
+ foreach(string name in names)
+ {
+ Add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+ }
+ }
+ }
+
+ /// <summary> Assumes the fields are not storing term vectors.
+ ///
+ /// </summary>
+ /// <param name="names">The names of the fields
+ /// </param>
+ /// <param name="isIndexed">Whether the fields are indexed or not
+ ///
+ /// </param>
+ /// <seealso cref="Add(String, bool)">
+ /// </seealso>
+ public void Add(System.Collections.Generic.ICollection<string> names, bool isIndexed)
+ {
+ lock (this)
+ {
+ foreach(string name in names)
+ {
+ Add(name, isIndexed);
+ }
+ }
+ }
+
+ /// <summary> Calls 5 parameter add with false for all TermVector parameters.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the Fieldable
+ /// </param>
+ /// <param name="isIndexed">true if the field is indexed
+ /// </param>
+ /// <seealso cref="Add(String, bool, bool, bool, bool)">
+ /// </seealso>
+ public void Add(String name, bool isIndexed)
+ {
+ lock (this)
+ {
+ Add(name, isIndexed, false, false, false, false);
+ }
+ }
+
+ /// <summary> Calls 5 parameter add with false for term vector positions and offsets.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="isIndexed"> true if the field is indexed
+ /// </param>
+ /// <param name="storeTermVector">true if the term vector should be stored
+ /// </param>
+ public void Add(System.String name, bool isIndexed, bool storeTermVector)
+ {
+ lock (this)
+ {
+ Add(name, isIndexed, storeTermVector, false, false, false);
+ }
+ }
+
+ /// <summary>If the field is not yet known, adds it. If it is known, checks to make
+ /// sure that the isIndexed flag is the same as was given previously for this
+ /// field. If not - marks it as being indexed. Same goes for the TermVector
+ /// parameters.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="isIndexed">true if the field is indexed
+ /// </param>
+ /// <param name="storeTermVector">true if the term vector should be stored
+ /// </param>
+ /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
+ /// </param>
+ /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
+ /// </param>
+ public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
+ {
+ lock (this)
+ {
+
+ Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ }
+ }
+
+ /// <summary>If the field is not yet known, adds it. If it is known, checks to make
+ /// sure that the isIndexed flag is the same as was given previously for this
+ /// field. If not - marks it as being indexed. Same goes for the TermVector
+ /// parameters.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="isIndexed">true if the field is indexed
+ /// </param>
+ /// <param name="storeTermVector">true if the term vector should be stored
+ /// </param>
+ /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
+ /// </param>
+ /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
+ /// </param>
+ /// <param name="omitNorms">true if the norms for the indexed field should be omitted
+ /// </param>
+ public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
+ {
+ lock (this)
+ {
+ Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false);
+ }
+ }
+
+ /// <summary>If the field is not yet known, adds it. If it is known, checks to make
+ /// sure that the isIndexed flag is the same as was given previously for this
+ /// field. If not - marks it as being indexed. Same goes for the TermVector
+ /// parameters.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="isIndexed">true if the field is indexed
+ /// </param>
+ /// <param name="storeTermVector">true if the term vector should be stored
+ /// </param>
+ /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
+ /// </param>
+ /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
+ /// </param>
+ /// <param name="omitNorms">true if the norms for the indexed field should be omitted
+ /// </param>
+ /// <param name="storePayloads">true if payloads should be stored for this field
+ /// </param>
+ /// <param name="omitTermFreqAndPositions">true if term freqs should be omitted for this field
+ /// </param>
+ public FieldInfo Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
+ {
+ lock (this)
+ {
+ FieldInfo fi = FieldInfo(name);
+ if (fi == null)
+ {
+ return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+ else
+ {
+ fi.Update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+ return fi;
+ }
+ }
+
+ private FieldInfo AddInternal(String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
+ {
+ name = StringHelper.Intern(name);
+ var fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ byNumber.Add(fi);
+ byName[name] = fi;
+ return fi;
+ }
+
+ public int FieldNumber(System.String fieldName)
+ {
+ FieldInfo fi = FieldInfo(fieldName);
+ return (fi != null)?fi.number:- 1;
+ }
+
+ public FieldInfo FieldInfo(System.String fieldName)
+ {
+ return byName[fieldName];
+ }
+
+ /// <summary> Return the fieldName identified by its number.
+ ///
+ /// </summary>
+ /// <param name="fieldNumber">
+ /// </param>
+ /// <returns> the fieldName or an empty string when the field
+ /// with the given number doesn't exist.
+ /// </returns>
+ public System.String FieldName(int fieldNumber)
+ {
+ FieldInfo fi = FieldInfo(fieldNumber);
+ return (fi != null) ? fi.name : "";
+ }
+
+ /// <summary> Return the fieldinfo object referenced by the fieldNumber.</summary>
+ /// <param name="fieldNumber">
+ /// </param>
+ /// <returns> the FieldInfo object or null when the given fieldNumber
+ /// doesn't exist.
+ /// </returns>
+ public FieldInfo FieldInfo(int fieldNumber)
+ {
+ return (fieldNumber >= 0) ? byNumber[fieldNumber] : null;
+ }
+
+ public int Size()
+ {
+ return byNumber.Count;
+ }
+
+ public bool HasVectors()
+ {
+ bool hasVectors = false;
+ for (int i = 0; i < Size(); i++)
+ {
+ if (FieldInfo(i).storeTermVector)
+ {
+ hasVectors = true;
+ break;
+ }
+ }
+ return hasVectors;
+ }
+
+ public void Write(Directory d, System.String name)
+ {
+ IndexOutput output = d.CreateOutput(name);
+ try
+ {
+ Write(output);
+ }
+ finally
+ {
+ output.Close();
+ }
+ }
+
+ public void Write(IndexOutput output)
+ {
+ output.WriteVInt(CURRENT_FORMAT);
+ output.WriteVInt(Size());
+ for (int i = 0; i < Size(); i++)
+ {
+ FieldInfo fi = FieldInfo(i);
+ var bits = (byte) (0x0);
+ if (fi.isIndexed)
+ bits |= IS_INDEXED;
+ if (fi.storeTermVector)
+ bits |= STORE_TERMVECTOR;
+ if (fi.storePositionWithTermVector)
+ bits |= STORE_POSITIONS_WITH_TERMVECTOR;
+ if (fi.storeOffsetWithTermVector)
+ bits |= STORE_OFFSET_WITH_TERMVECTOR;
+ if (fi.omitNorms)
+ bits |= OMIT_NORMS;
+ if (fi.storePayloads)
+ bits |= STORE_PAYLOADS;
+ if (fi.omitTermFreqAndPositions)
+ bits |= OMIT_TERM_FREQ_AND_POSITIONS;
+
+ output.WriteString(fi.name);
+ output.WriteByte(bits);
+ }
+ }
+
+ private void Read(IndexInput input, String fileName)
+ {
+ int firstInt = input.ReadVInt();
+
+ if (firstInt < 0)
+ {
+ // This is a real format
+ format = firstInt;
+ }
+ else
+ {
+ format = FORMAT_PRE;
+ }
+
+ if (format != FORMAT_PRE & format != FORMAT_START)
+ {
+ throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
+ }
+
+ int size;
+ if (format == FORMAT_PRE)
+ {
+ size = firstInt;
+ }
+ else
+ {
+ size = input.ReadVInt(); //read in the size
+ }
+
+ for (int i = 0; i < size; i++)
+ {
+ String name = StringHelper.Intern(input.ReadString());
+ byte bits = input.ReadByte();
+ bool isIndexed = (bits & IS_INDEXED) != 0;
+ bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
+ bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+ bool omitNorms = (bits & OMIT_NORMS) != 0;
+ bool storePayloads = (bits & STORE_PAYLOADS) != 0;
+ bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
+
+ AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+
+ if (input.FilePointer != input.Length())
+ {
+ throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length());
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FieldInvertState.cs b/src/core/Index/FieldInvertState.cs
new file mode 100644
index 0000000..96d6c83
--- /dev/null
+++ b/src/core/Index/FieldInvertState.cs
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This class tracks the number and position / offset parameters of terms
+ /// being added to the index. The information collected in this class is
+ /// also used to calculate the normalization factor for a field.
+ ///
+ /// <p/><b>WARNING</b>: This API is new and experimental, and may suddenly
+ /// change.<p/>
+ /// </summary>
+ public sealed class FieldInvertState
+ {
+ internal int position;
+ internal int length;
+ internal int numOverlap;
+ internal int offset;
+ internal float boost;
+ internal AttributeSource attributeSource;
+
+ public FieldInvertState()
+ {
+ }
+
+ public FieldInvertState(int position, int length, int numOverlap, int offset, float boost)
+ {
+ this.position = position;
+ this.length = length;
+ this.numOverlap = numOverlap;
+ this.offset = offset;
+ this.boost = boost;
+ }
+
+ /// <summary> Re-initialize the state, using this boost value.</summary>
+ /// <param name="docBoost">boost value to use.
+ /// </param>
+ internal void Reset(float docBoost)
+ {
+ position = 0;
+ length = 0;
+ numOverlap = 0;
+ offset = 0;
+ boost = docBoost;
+ attributeSource = null;
+ }
+
+ /// <summary> Get the last processed term position.</summary>
+ /// <value> the position </value>
+ public int Position
+ {
+ get { return position; }
+ }
+
+ /// <summary> Get total number of terms in this field.</summary>
+ /// <value> the length </value>
+ public int Length
+ {
+ get { return length; }
+ }
+
+ /// <summary> Get the number of terms with <c>positionIncrement == 0</c>.</summary>
+ /// <value> the numOverlap </value>
+ public int NumOverlap
+ {
+ get { return numOverlap; }
+ }
+
+ /// <summary> Get end offset of the last processed term.</summary>
+ /// <value> the offset </value>
+ public int Offset
+ {
+ get { return offset; }
+ }
+
+ /// <summary> Get boost value. This is the cumulative product of
+ /// document boost and field boost for all field instances
+ /// sharing the same field name.
+ /// </summary>
+ /// <value> the boost </value>
+ public float Boost
+ {
+ get { return boost; }
+ }
+
+ public AttributeSource AttributeSource
+ {
+ get { return attributeSource; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FieldReaderException.cs b/src/core/Index/FieldReaderException.cs
new file mode 100644
index 0000000..7654fb4
--- /dev/null
+++ b/src/core/Index/FieldReaderException.cs
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public class FieldReaderException:System.SystemException
+ {
+ /// <summary> Constructs a new runtime exception with <c>null</c> as its
+ /// detail message. The cause is not initialized, and may subsequently be
+ /// initialized by a call to <see cref="Exception.InnerException" />.
+ /// </summary>
+ public FieldReaderException()
+ {
+ }
+
+ /// <summary> Constructs a new runtime exception with the specified cause and a
+ /// detail message of <tt>(cause==null &#63; null : cause.toString())</tt>
+ /// (which typically contains the class and detail message of
+ /// <tt>cause</tt>).
+ /// <p/>
+ /// This constructor is useful for runtime exceptions
+ /// that are little more than wrappers for other throwables.
+ ///
+ /// </summary>
+ /// <param name="cause">the cause (which is saved for later retrieval by the
+ /// <see cref="Exception.InnerException" />). (A <tt>null</tt> value is
+ /// permitted, and indicates that the cause is nonexistent or
+ /// unknown.)
+ /// </param>
+ /// <since> 1.4
+ /// </since>
+ public FieldReaderException(System.Exception cause):base((cause == null)?null:cause.Message, cause)
+ {
+ }
+
+ /// <summary> Constructs a new runtime exception with the specified detail message.
+ /// The cause is not initialized, and may subsequently be initialized by a
+ /// call to <see cref="Exception.InnerException" />.
+ ///
+ /// </summary>
+ /// <param name="message">the detail message. The detail message is saved for
+ /// later retrieval by the <see cref="Exception.Message" /> method.
+ /// </param>
+ public FieldReaderException(System.String message):base(message)
+ {
+ }
+
+ /// <summary> Constructs a new runtime exception with the specified detail message and
+ /// cause. <p/>Note that the detail message associated with
+ /// <c>cause</c> is <i>not</i> automatically incorporated in
+ /// this runtime exception's detail message.
+ ///
+ /// </summary>
+ /// <param name="message">the detail message (which is saved for later retrieval
+ /// by the <see cref="Exception.Message" /> method).
+ /// </param>
+ /// <param name="cause"> the cause (which is saved for later retrieval by the
+ /// <see cref="Exception.InnerException" /> method). (A <tt>null</tt> value is
+ /// permitted, and indicates that the cause is nonexistent or
+ /// unknown.)
+ /// </param>
+ /// <since> 1.4
+ /// </since>
+ public FieldReaderException(System.String message, System.Exception cause):base(message, cause)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FieldSortedTermVectorMapper.cs b/src/core/Index/FieldSortedTermVectorMapper.cs
new file mode 100644
index 0000000..6c1915e
--- /dev/null
+++ b/src/core/Index/FieldSortedTermVectorMapper.cs
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> For each Field, store a sorted collection of <see cref="TermVectorEntry" />s
+ /// <p/>
+ /// This is not thread-safe.
+ /// </summary>
+ public class FieldSortedTermVectorMapper:TermVectorMapper
+ {
+ private readonly IDictionary<string, SortedSet<TermVectorEntry>> fieldToTerms = new HashMap<string, SortedSet<TermVectorEntry>>();
+ private SortedSet<TermVectorEntry> currentSet;
+ private System.String currentField;
+ private readonly IComparer<TermVectorEntry> comparator;
+
+ /// <summary> </summary>
+ /// <param name="comparator">A Comparator for sorting <see cref="TermVectorEntry" />s
+ /// </param>
+ public FieldSortedTermVectorMapper(IComparer<TermVectorEntry> comparator)
+ : this(false, false, comparator)
+ {
+ }
+
+
+ public FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer<TermVectorEntry> comparator)
+ : base(ignoringPositions, ignoringOffsets)
+ {
+ this.comparator = comparator;
+ }
+
+ public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+ {
+ var entry = new TermVectorEntry(currentField, term, frequency, offsets, positions);
+ currentSet.Add(entry);
+ }
+
+ public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
+ {
+ currentSet = new SortedSet<TermVectorEntry>(comparator);
+ currentField = field;
+ fieldToTerms[field] = currentSet;
+ }
+
+ /// <summary> Get the mapping between fields and terms, sorted by the comparator
+ ///
+ /// </summary>
+ /// <value> A map between field names and &lt;see cref=&quot;System.Collections.Generic.SortedDictionary{Object,Object}&quot; /&gt;s per field. SortedSet entries are &lt;see cref=&quot;TermVectorEntry&quot; /&gt; </value>
+ public virtual IDictionary<string, SortedSet<TermVectorEntry>> FieldToTerms
+ {
+ get { return fieldToTerms; }
+ }
+
+
+ public virtual IComparer<TermVectorEntry> Comparator
+ {
+ get { return comparator; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FieldsReader.cs b/src/core/Index/FieldsReader.cs
new file mode 100644
index 0000000..8fa351d
--- /dev/null
+++ b/src/core/Index/FieldsReader.cs
@@ -0,0 +1,641 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using Lucene.Net.Documents;
+using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Class responsible for access to stored document fields.
+ /// <p/>
+ /// It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
+ ///
+ /// </summary>
+ public sealed class FieldsReader : ICloneable, IDisposable
+ {
+ private readonly FieldInfos fieldInfos;
+
+ // The main fieldStream, used only for cloning.
+ private readonly IndexInput cloneableFieldsStream;
+
+ // This is a clone of cloneableFieldsStream used for reading documents.
+ // It should not be cloned outside of a synchronized context.
+ private readonly IndexInput fieldsStream;
+
+ private readonly IndexInput cloneableIndexStream;
+ private readonly IndexInput indexStream;
+ private readonly int numTotalDocs;
+ private readonly int size;
+ private bool closed;
+ private readonly int format;
+ private readonly int formatSize;
+
+ // The docID offset where our docs begin in the index
+ // file. This will be 0 if we have our own private file.
+ private readonly int docStoreOffset;
+
+ private readonly CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
+ private readonly bool isOriginal = false;
+
+ /// <summary>Returns a cloned FieldsReader that shares open
+ /// IndexInputs with the original one. It is the caller's
+ /// job not to close the original FieldsReader until all
+ /// clones are called (eg, currently SegmentReader manages
+ /// this logic).
+ /// </summary>
+ public System.Object Clone()
+ {
+ EnsureOpen();
+ return new FieldsReader(fieldInfos, numTotalDocs, size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
+ }
+
+ // Used only by clone
+ private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream)
+ {
+ this.fieldInfos = fieldInfos;
+ this.numTotalDocs = numTotalDocs;
+ this.size = size;
+ this.format = format;
+ this.formatSize = formatSize;
+ this.docStoreOffset = docStoreOffset;
+ this.cloneableFieldsStream = cloneableFieldsStream;
+ this.cloneableIndexStream = cloneableIndexStream;
+ fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
+ indexStream = (IndexInput) cloneableIndexStream.Clone();
+ }
+
+ public /*internal*/ FieldsReader(Directory d, String segment, FieldInfos fn):this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
+ {
+ }
+
+ internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize):this(d, segment, fn, readBufferSize, - 1, 0)
+ {
+ }
+
+ internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
+ {
+ bool success = false;
+ isOriginal = true;
+ try
+ {
+ fieldInfos = fn;
+
+ cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
+ cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);
+
+ // First version of fdx did not include a format
+ // header, but, the first int will always be 0 in that
+ // case
+ int firstInt = cloneableIndexStream.ReadInt();
+ format = firstInt == 0 ? 0 : firstInt;
+
+ if (format > FieldsWriter.FORMAT_CURRENT)
+ throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
+
+ formatSize = format > FieldsWriter.FORMAT ? 4 : 0;
+
+ if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+ cloneableFieldsStream.SetModifiedUTF8StringsMode();
+
+ fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
+
+ long indexSize = cloneableIndexStream.Length() - formatSize;
+
+ if (docStoreOffset != - 1)
+ {
+ // We read only a slice out of this shared fields file
+ this.docStoreOffset = docStoreOffset;
+ this.size = size;
+
+ // Verify the file is long enough to hold all of our
+ // docs
+ System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
+ }
+ else
+ {
+ this.docStoreOffset = 0;
+ this.size = (int) (indexSize >> 3);
+ }
+
+ indexStream = (IndexInput) cloneableIndexStream.Clone();
+ numTotalDocs = (int) (indexSize >> 3);
+ success = true;
+ }
+ finally
+ {
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success)
+ {
+ Dispose();
+ }
+ }
+ }
+
+ /// <throws> AlreadyClosedException if this FieldsReader is closed </throws>
+ internal void EnsureOpen()
+ {
+ if (closed)
+ {
+ throw new AlreadyClosedException("this FieldsReader is closed");
+ }
+ }
+
+ /// <summary> Closes the underlying <see cref="Lucene.Net.Store.IndexInput" /> streams, including any ones associated with a
+ /// lazy implementation of a Field. This means that the Fields values will not be accessible.
+ ///
+ /// </summary>
+ /// <throws> IOException </throws>
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ if (!closed)
+ {
+ if (fieldsStream != null)
+ {
+ fieldsStream.Close();
+ }
+ if (isOriginal)
+ {
+ if (cloneableFieldsStream != null)
+ {
+ cloneableFieldsStream.Close();
+ }
+ if (cloneableIndexStream != null)
+ {
+ cloneableIndexStream.Close();
+ }
+ }
+ if (indexStream != null)
+ {
+ indexStream.Close();
+ }
+ fieldsStreamTL.Close();
+ closed = true;
+ }
+ }
+
+ public /*internal*/ int Size()
+ {
+ return size;
+ }
+
+ private void SeekIndex(int docID)
+ {
+ indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L);
+ }
+
+ internal bool CanReadRawDocs()
+ {
+ // Disable reading raw docs in 2.x format, because of the removal of compressed
+ // fields in 3.0. We don't want rawDocs() to decode field bits to figure out
+ // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges
+ // for <3.0 indexes.
+ return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+ }
+
+ public /*internal*/ Document Doc(int n, FieldSelector fieldSelector)
+ {
+ SeekIndex(n);
+ long position = indexStream.ReadLong();
+ fieldsStream.Seek(position);
+
+ var doc = new Document();
+ int numFields = fieldsStream.ReadVInt();
+ for (int i = 0; i < numFields; i++)
+ {
+ int fieldNumber = fieldsStream.ReadVInt();
+ FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
+ FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);
+
+ byte bits = fieldsStream.ReadByte();
+ System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);
+
+ bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
+ System.Diagnostics.Debug.Assert(
+ (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)),
+ "compressed fields are only allowed in indexes of version <= 2.9");
+ bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
+ bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
+ //TODO: Find an alternative approach here if this list continues to grow beyond the
+ //list of 5 or 6 currently here. See Lucene 762 for discussion
+ if (acceptField.Equals(FieldSelectorResult.LOAD))
+ {
+ AddField(doc, fi, binary, compressed, tokenize);
+ }
+ else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
+ {
+ AddField(doc, fi, binary, compressed, tokenize);
+ break; //Get out of this loop
+ }
+ else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
+ {
+ AddFieldLazy(doc, fi, binary, compressed, tokenize);
+ }
+ else if (acceptField.Equals(FieldSelectorResult.SIZE))
+ {
+ SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
+ }
+ else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
+ {
+ AddFieldSize(doc, fi, binary, compressed);
+ break;
+ }
+ else
+ {
+ SkipField(binary, compressed);
+ }
+ }
+
+ return doc;
+ }
+
+ /// <summary>Returns the length in bytes of each raw document in a
+ /// contiguous range of length numDocs starting with
+ /// startDocID. Returns the IndexInput (the fieldStream),
+ /// already seeked to the starting point for startDocID.
+ /// </summary>
+ internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
+ {
+ SeekIndex(startDocID);
+ long startOffset = indexStream.ReadLong();
+ long lastOffset = startOffset;
+ int count = 0;
+ while (count < numDocs)
+ {
+ long offset;
+ int docID = docStoreOffset + startDocID + count + 1;
+ System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
+ if (docID < numTotalDocs)
+ offset = indexStream.ReadLong();
+ else
+ offset = fieldsStream.Length();
+ lengths[count++] = (int) (offset - lastOffset);
+ lastOffset = offset;
+ }
+
+ fieldsStream.Seek(startOffset);
+
+ return fieldsStream;
+ }
+
+ /// <summary> Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
+ /// This will have the most payoff on large fields.
+ /// </summary>
+ private void SkipField(bool binary, bool compressed)
+ {
+ SkipField(binary, compressed, fieldsStream.ReadVInt());
+ }
+
+ private void SkipField(bool binary, bool compressed, int toRead)
+ {
+ if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed)
+ {
+ fieldsStream.Seek(fieldsStream.FilePointer + toRead);
+ }
+ else
+ {
+ // We need to skip chars. This will slow us down, but still better
+ fieldsStream.SkipChars(toRead);
+ }
+ }
+
+ private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+ {
+ if (binary)
+ {
+ int toRead = fieldsStream.ReadVInt();
+ long pointer = fieldsStream.FilePointer;
+ //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
+ doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary, compressed));
+
+ //Need to move the pointer ahead by toRead positions
+ fieldsStream.Seek(pointer + toRead);
+ }
+ else
+ {
+ const Field.Store store = Field.Store.YES;
+ Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
+ Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
+
+ AbstractField f;
+ if (compressed)
+ {
+ int toRead = fieldsStream.ReadVInt();
+ long pointer = fieldsStream.FilePointer;
+ f = new LazyField(this, fi.name, store, toRead, pointer, binary, compressed);
+ //skip over the part that we aren't loading
+ fieldsStream.Seek(pointer + toRead);
+ f.OmitNorms = fi.omitNorms;
+ f.OmitTermFreqAndPositions = fi.omitTermFreqAndPositions;
+ }
+ else
+ {
+ int length = fieldsStream.ReadVInt();
+ long pointer = fieldsStream.FilePointer;
+ //Skip ahead of where we are by the length of what is stored
+ if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+ {
+ fieldsStream.Seek(pointer + length);
+ }
+ else
+ {
+ fieldsStream.SkipChars(length);
+ }
+ f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary, compressed)
+ {OmitNorms = fi.omitNorms, OmitTermFreqAndPositions = fi.omitTermFreqAndPositions};
+ }
+
+ doc.Add(f);
+ }
+ }
+
+ private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+ {
+ //we have a binary stored field, and it may be compressed
+ if (binary)
+ {
+ int toRead = fieldsStream.ReadVInt();
+ var b = new byte[toRead];
+ fieldsStream.ReadBytes(b, 0, b.Length);
+ doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES));
+ }
+ else
+ {
+ const Field.Store store = Field.Store.YES;
+ Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
+ Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
+
+ AbstractField f;
+ if (compressed)
+ {
+ int toRead = fieldsStream.ReadVInt();
+
+ var b = new byte[toRead];
+ fieldsStream.ReadBytes(b, 0, b.Length);
+ f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index,
+ termVector) {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
+ }
+ else
+ {
+ f = new Field(fi.name, false, fieldsStream.ReadString(), store, index, termVector)
+ {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
+ }
+
+ doc.Add(f);
+ }
+ }
+
+ // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
+ // Read just the size -- caller must skip the field content to continue reading fields
+ // Return the size in bytes or chars, depending on field type
+ private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
+ {
+ int size = fieldsStream.ReadVInt(), bytesize = binary || compressed?size:2 * size;
+ var sizebytes = new byte[4];
+ sizebytes[0] = (byte) (Number.URShift(bytesize, 24));
+ sizebytes[1] = (byte) (Number.URShift(bytesize, 16));
+ sizebytes[2] = (byte) (Number.URShift(bytesize, 8));
+ sizebytes[3] = (byte) bytesize;
+ doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
+ return size;
+ }
+
+ /// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
+ /// loaded.
+ /// </summary>
+ [Serializable]
+ private sealed class LazyField : AbstractField
+ {
+ private void InitBlock(FieldsReader enclosingInstance)
+ {
+ this.Enclosing_Instance = enclosingInstance;
+ }
+
+ private FieldsReader Enclosing_Instance { get; set; }
+
+ private int toRead;
+ private long pointer;
+ [Obsolete("Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0.")]
+ private readonly Boolean isCompressed;
+
+ public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, Field.Index.NO, Field.TermVector.NO)
+ {
+ InitBlock(enclosingInstance);
+ this.toRead = toRead;
+ this.pointer = pointer;
+ this.internalIsBinary = isBinary;
+ if (isBinary)
+ internalBinaryLength = toRead;
+ lazy = true;
+ this.isCompressed = isCompressed;
+ }
+
+ public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, index, termVector)
+ {
+ InitBlock(enclosingInstance);
+ this.toRead = toRead;
+ this.pointer = pointer;
+ this.internalIsBinary = isBinary;
+ if (isBinary)
+ internalBinaryLength = toRead;
+ lazy = true;
+ this.isCompressed = isCompressed;
+ }
+
+ private IndexInput GetFieldStream()
+ {
+ IndexInput localFieldsStream = Enclosing_Instance.fieldsStreamTL.Get();
+ if (localFieldsStream == null)
+ {
+ localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone();
+ Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream);
+ }
+ return localFieldsStream;
+ }
+
+ /// <summary>The value of the field as a Reader, or null. If null, the String value,
+ /// binary value, or TokenStream value is used. Exactly one of StringValue(),
+ /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
+ /// </summary>
+ public override TextReader ReaderValue
+ {
+ get
+ {
+ Enclosing_Instance.EnsureOpen();
+ return null;
+ }
+ }
+
+ /// <summary>The value of the field as a TokenStream, or null. If null, the Reader value,
+ /// String value, or binary value is used. Exactly one of StringValue(),
+ /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
+ /// </summary>
+ public override TokenStream TokenStreamValue
+ {
+ get
+ {
+ Enclosing_Instance.EnsureOpen();
+ return null;
+ }
+ }
+
+ /// <summary>The value of the field as a String, or null. If null, the Reader value,
+ /// binary value, or TokenStream value is used. Exactly one of StringValue(),
+ /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
+ /// </summary>
+ public override string StringValue
+ {
+ get
+ {
+ Enclosing_Instance.EnsureOpen();
+ if (internalIsBinary)
+ return null;
+
+ if (fieldsData == null)
+ {
+ IndexInput localFieldsStream = GetFieldStream();
+ try
+ {
+ localFieldsStream.Seek(pointer);
+ if (isCompressed)
+ {
+ var b = new byte[toRead];
+ localFieldsStream.ReadBytes(b, 0, b.Length);
+ fieldsData =
+ System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
+ }
+ else
+ {
+ if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+ {
+ var bytes = new byte[toRead];
+ localFieldsStream.ReadBytes(bytes, 0, toRead);
+ fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes);
+ }
+ else
+ {
+ //read in chars b/c we already know the length we need to read
+ var chars = new char[toRead];
+ localFieldsStream.ReadChars(chars, 0, toRead);
+ fieldsData = new System.String(chars);
+ }
+ }
+ }
+ catch (System.IO.IOException e)
+ {
+ throw new FieldReaderException(e);
+ }
+ }
+ return (System.String) fieldsData;
+ }
+ }
+
+ public long Pointer
+ {
+ get
+ {
+ Enclosing_Instance.EnsureOpen();
+ return pointer;
+ }
+ set
+ {
+ Enclosing_Instance.EnsureOpen();
+ this.pointer = value;
+ }
+ }
+
+ public int ToRead
+ {
+ get
+ {
+ Enclosing_Instance.EnsureOpen();
+ return toRead;
+ }
+ set
+ {
+ Enclosing_Instance.EnsureOpen();
+ this.toRead = value;
+ }
+ }
+
+ public override byte[] GetBinaryValue(byte[] result)
+ {
+ Enclosing_Instance.EnsureOpen();
+
+ if (internalIsBinary)
+ {
+ if (fieldsData == null)
+ {
+ // Allocate new buffer if result is null or too small
+ byte[] b;
+ if (result == null || result.Length < toRead)
+ b = new byte[toRead];
+ else
+ b = result;
+
+ IndexInput localFieldsStream = GetFieldStream();
+
+ // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
+ // since they are already handling this exception when getting the document
+ try
+ {
+ localFieldsStream.Seek(pointer);
+ localFieldsStream.ReadBytes(b, 0, toRead);
+ fieldsData = isCompressed ? Enclosing_Instance.Uncompress(b) : b;
+ }
+ catch (IOException e)
+ {
+ throw new FieldReaderException(e);
+ }
+
+ internalbinaryOffset = 0;
+ internalBinaryLength = toRead;
+ }
+
+ return (byte[]) fieldsData;
+ }
+ return null;
+ }
+ }
+
+ private byte[] Uncompress(byte[] b)
+ {
+ try
+ {
+ return CompressionTools.Decompress(b);
+ }
+ catch (Exception e)
+ {
+ // this will happen if the field is not compressed
+ throw new CorruptIndexException("field data are in wrong format: " + e, e);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FieldsWriter.cs b/src/core/Index/FieldsWriter.cs
new file mode 100644
index 0000000..9244195
--- /dev/null
+++ b/src/core/Index/FieldsWriter.cs
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Documents;
+using Document = Lucene.Net.Documents.Document;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class FieldsWriter : IDisposable
+ {
+ internal const byte FIELD_IS_TOKENIZED = (0x1);
+ internal const byte FIELD_IS_BINARY = (0x2);
+ [Obsolete("Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0")]
+ internal const byte FIELD_IS_COMPRESSED = (0x4);
+
+ // Original format
+ internal const int FORMAT = 0;
+
+ // Changed strings to UTF8
+ internal const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;
+
+ // Lucene 3.0: Removal of compressed fields
+ internal static int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
+
+ // NOTE: if you introduce a new format, make it 1 higher
+ // than the current one, and always change this if you
+ // switch to a new format!
+ internal static readonly int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+
+ private readonly FieldInfos fieldInfos;
+
+ private IndexOutput fieldsStream;
+
+ private IndexOutput indexStream;
+
+ private readonly bool doClose;
+
+ internal FieldsWriter(Directory d, System.String segment, FieldInfos fn)
+ {
+ fieldInfos = fn;
+
+ bool success = false;
+ String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;
+ try
+ {
+ fieldsStream = d.CreateOutput(fieldsName);
+ fieldsStream.WriteInt(FORMAT_CURRENT);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ Dispose();
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ try
+ {
+ d.DeleteFile(fieldsName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ }
+ }
+
+ success = false;
+ String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
+ try
+ {
+ indexStream = d.CreateOutput(indexName);
+ indexStream.WriteInt(FORMAT_CURRENT);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ Dispose();
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ try
+ {
+ d.DeleteFile(fieldsName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ try
+ {
+ d.DeleteFile(indexName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ }
+ }
+
+ doClose = true;
+ }
+
+ internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn)
+ {
+ fieldInfos = fn;
+ fieldsStream = fdt;
+ indexStream = fdx;
+ doClose = false;
+ }
+
+ internal void SetFieldsStream(IndexOutput stream)
+ {
+ this.fieldsStream = stream;
+ }
+
+ // Writes the contents of buffer into the fields stream
+ // and adds a new entry for this document into the index
+ // stream. This assumes the buffer was already written
+ // in the correct fields format.
+ internal void FlushDocument(int numStoredFields, RAMOutputStream buffer)
+ {
+ indexStream.WriteLong(fieldsStream.FilePointer);
+ fieldsStream.WriteVInt(numStoredFields);
+ buffer.WriteTo(fieldsStream);
+ }
+
+ internal void SkipDocument()
+ {
+ indexStream.WriteLong(fieldsStream.FilePointer);
+ fieldsStream.WriteVInt(0);
+ }
+
+ internal void Flush()
+ {
+ indexStream.Flush();
+ fieldsStream.Flush();
+ }
+
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ if (doClose)
+ {
+ try
+ {
+ if (fieldsStream != null)
+ {
+ try
+ {
+ fieldsStream.Close();
+ }
+ finally
+ {
+ fieldsStream = null;
+ }
+ }
+ }
+ catch (System.IO.IOException)
+ {
+ try
+ {
+ if (indexStream != null)
+ {
+ try
+ {
+ indexStream.Close();
+ }
+ finally
+ {
+ indexStream = null;
+ }
+ }
+ }
+ catch (System.IO.IOException)
+ {
+ // Ignore so we throw only first IOException hit
+ }
+ throw;
+ }
+ finally
+ {
+ if (indexStream != null)
+ {
+ try
+ {
+ indexStream.Close();
+ }
+ finally
+ {
+ indexStream = null;
+ }
+ }
+ }
+ }
+ }
+
+ internal void WriteField(FieldInfo fi, IFieldable field)
+ {
+ fieldsStream.WriteVInt(fi.number);
+ byte bits = 0;
+ if (field.IsTokenized)
+ bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+ if (field.IsBinary)
+ bits |= FieldsWriter.FIELD_IS_BINARY;
+
+ fieldsStream.WriteByte(bits);
+
+ // compression is disabled for the current field
+ if (field.IsBinary)
+ {
+ byte[] data = field.GetBinaryValue();
+ int len = field.BinaryLength;
+ int offset = field.BinaryOffset;
+
+ fieldsStream.WriteVInt(len);
+ fieldsStream.WriteBytes(data, offset, len);
+ }
+ else
+ {
+ fieldsStream.WriteString(field.StringValue);
+ }
+ }
+
+ /// <summary>Bulk write a contiguous series of documents. The
+ /// lengths array is the length (in bytes) of each raw
+ /// document. The stream IndexInput is the
+ /// fieldsStream from which we should bulk-copy all
+ /// bytes.
+ /// </summary>
+ internal void AddRawDocuments(IndexInput stream, int[] lengths, int numDocs)
+ {
+ long position = fieldsStream.FilePointer;
+ long start = position;
+ for (int i = 0; i < numDocs; i++)
+ {
+ indexStream.WriteLong(position);
+ position += lengths[i];
+ }
+ fieldsStream.CopyBytes(stream, position - start);
+ System.Diagnostics.Debug.Assert(fieldsStream.FilePointer == position);
+ }
+
+ internal void AddDocument(Document doc)
+ {
+ indexStream.WriteLong(fieldsStream.FilePointer);
+
+ System.Collections.Generic.IList<IFieldable> fields = doc.GetFields();
+ int storedCount = fields.Count(field => field.IsStored);
+ fieldsStream.WriteVInt(storedCount);
+
+ foreach(IFieldable field in fields)
+ {
+ if (field.IsStored)
+ WriteField(fieldInfos.FieldInfo(field.Name), field);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FilterIndexReader.cs b/src/core/Index/FilterIndexReader.cs
new file mode 100644
index 0000000..dc61613
--- /dev/null
+++ b/src/core/Index/FilterIndexReader.cs
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>A <c>FilterIndexReader</c> contains another IndexReader, which it
+ /// uses as its basic source of data, possibly transforming the data along the
+ /// way or providing additional functionality. The class
+ /// <c>FilterIndexReader</c> itself simply implements all abstract methods
+ /// of <c>IndexReader</c> with versions that pass all requests to the
+ /// contained index reader. Subclasses of <c>FilterIndexReader</c> may
+ /// further override some of these methods and may also provide additional
+ /// methods and fields.
+ /// </summary>
+ public class FilterIndexReader:IndexReader
+ {
+
+ /// <summary>Base class for filtering <see cref="Lucene.Net.Index.TermDocs" /> implementations. </summary>
+ public class FilterTermDocs : TermDocs
+ {
+ protected internal TermDocs in_Renamed;
+
+ public FilterTermDocs(TermDocs in_Renamed)
+ {
+ this.in_Renamed = in_Renamed;
+ }
+
+ public virtual void Seek(Term term)
+ {
+ in_Renamed.Seek(term);
+ }
+ public virtual void Seek(TermEnum termEnum)
+ {
+ in_Renamed.Seek(termEnum);
+ }
+
+ public virtual int Doc
+ {
+ get { return in_Renamed.Doc; }
+ }
+
+ public virtual int Freq
+ {
+ get { return in_Renamed.Freq; }
+ }
+
+ public virtual bool Next()
+ {
+ return in_Renamed.Next();
+ }
+ public virtual int Read(int[] docs, int[] freqs)
+ {
+ return in_Renamed.Read(docs, freqs);
+ }
+ public virtual bool SkipTo(int i)
+ {
+ return in_Renamed.SkipTo(i);
+ }
+
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ in_Renamed.Close();
+ }
+ }
+ }
+
+ /// <summary>Base class for filtering <see cref="TermPositions" /> implementations. </summary>
+ public class FilterTermPositions:FilterTermDocs, TermPositions
+ {
+
+ public FilterTermPositions(TermPositions in_Renamed):base(in_Renamed)
+ {
+ }
+
+ public virtual int NextPosition()
+ {
+ return ((TermPositions) this.in_Renamed).NextPosition();
+ }
+
+ public virtual int PayloadLength
+ {
+ get { return ((TermPositions) this.in_Renamed).PayloadLength; }
+ }
+
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ return ((TermPositions) this.in_Renamed).GetPayload(data, offset);
+ }
+
+
+ // TODO: Remove warning after API has been finalized
+
+ public virtual bool IsPayloadAvailable
+ {
+ get { return ((TermPositions) this.in_Renamed).IsPayloadAvailable; }
+ }
+ }
+
+ /// <summary>Base class for filtering <see cref="TermEnum" /> implementations. </summary>
+ public class FilterTermEnum:TermEnum
+ {
+ protected internal TermEnum in_Renamed;
+
+ public FilterTermEnum(TermEnum in_Renamed)
+ {
+ this.in_Renamed = in_Renamed;
+ }
+
+ public override bool Next()
+ {
+ return in_Renamed.Next();
+ }
+
+ public override Term Term
+ {
+ get { return in_Renamed.Term; }
+ }
+
+ public override int DocFreq()
+ {
+ return in_Renamed.DocFreq();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ in_Renamed.Close();
+ }
+ }
+ }
+
+ protected internal IndexReader in_Renamed;
+
+ /// <summary> <p/>Construct a FilterIndexReader based on the specified base reader.
+ /// Directory locking for delete, undeleteAll, and setNorm operations is
+ /// left to the base reader.<p/>
+ /// <p/>Note that base reader is closed if this FilterIndexReader is closed.<p/>
+ /// </summary>
+ /// <param name="in_Renamed">specified base reader.
+ /// </param>
+ public FilterIndexReader(IndexReader in_Renamed):base()
+ {
+ this.in_Renamed = in_Renamed;
+ }
+
+ public override Directory Directory()
+ {
+ return in_Renamed.Directory();
+ }
+
+ public override ITermFreqVector[] GetTermFreqVectors(int docNumber)
+ {
+ EnsureOpen();
+ return in_Renamed.GetTermFreqVectors(docNumber);
+ }
+
+ public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field)
+ {
+ EnsureOpen();
+ return in_Renamed.GetTermFreqVector(docNumber, field);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ in_Renamed.GetTermFreqVector(docNumber, field, mapper);
+ }
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ in_Renamed.GetTermFreqVector(docNumber, mapper);
+ }
+
+ public override int NumDocs()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return in_Renamed.NumDocs();
+ }
+
+ public override int MaxDoc
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return in_Renamed.MaxDoc;
+ }
+ }
+
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ EnsureOpen();
+ return in_Renamed.Document(n, fieldSelector);
+ }
+
+ public override bool IsDeleted(int n)
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return in_Renamed.IsDeleted(n);
+ }
+
+ public override bool HasDeletions
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return in_Renamed.HasDeletions;
+ }
+ }
+
+ protected internal override void DoUndeleteAll()
+ {
+ in_Renamed.UndeleteAll();
+ }
+
+ public override bool HasNorms(System.String field)
+ {
+ EnsureOpen();
+ return in_Renamed.HasNorms(field);
+ }
+
+ public override byte[] Norms(System.String f)
+ {
+ EnsureOpen();
+ return in_Renamed.Norms(f);
+ }
+
+ public override void Norms(System.String f, byte[] bytes, int offset)
+ {
+ EnsureOpen();
+ in_Renamed.Norms(f, bytes, offset);
+ }
+
+ protected internal override void DoSetNorm(int d, System.String f, byte b)
+ {
+ in_Renamed.SetNorm(d, f, b);
+ }
+
+ public override TermEnum Terms()
+ {
+ EnsureOpen();
+ return in_Renamed.Terms();
+ }
+
+ public override TermEnum Terms(Term t)
+ {
+ EnsureOpen();
+ return in_Renamed.Terms(t);
+ }
+
+ public override int DocFreq(Term t)
+ {
+ EnsureOpen();
+ return in_Renamed.DocFreq(t);
+ }
+
+ public override TermDocs TermDocs()
+ {
+ EnsureOpen();
+ return in_Renamed.TermDocs();
+ }
+
+ public override TermDocs TermDocs(Term term)
+ {
+ EnsureOpen();
+ return in_Renamed.TermDocs(term);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ EnsureOpen();
+ return in_Renamed.TermPositions();
+ }
+
+ protected internal override void DoDelete(int n)
+ {
+ in_Renamed.DeleteDocument(n);
+ }
+
+ protected internal override void DoCommit(System.Collections.Generic.IDictionary<string, string> commitUserData)
+ {
+ in_Renamed.Commit(commitUserData);
+ }
+
+ protected internal override void DoClose()
+ {
+ in_Renamed.Close();
+ // NOTE: only needed in case someone had asked for
+ // FieldCache for top-level reader (which is generally
+ // not a good idea):
+ Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this);
+ }
+
+
+ public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
+ {
+ EnsureOpen();
+ return in_Renamed.GetFieldNames(fieldNames);
+ }
+
+ public override long Version
+ {
+ get
+ {
+ EnsureOpen();
+ return in_Renamed.Version;
+ }
+ }
+
+ public override bool IsCurrent()
+ {
+ EnsureOpen();
+ return in_Renamed.IsCurrent();
+ }
+
+ public override bool IsOptimized()
+ {
+ EnsureOpen();
+ return in_Renamed.IsOptimized();
+ }
+
+ public override IndexReader[] GetSequentialSubReaders()
+ {
+ return in_Renamed.GetSequentialSubReaders();
+ }
+
+ override public System.Object Clone()
+ {
+ System.Diagnostics.Debug.Fail("Port issue:", "Lets see if we need this FilterIndexReader.Clone()"); // {{Aroush-2.9}}
+ return null;
+ }
+
+ /// <summary>
+ /// If the subclass of FilteredIndexReader modifies the
+ /// contents of the FieldCache, you must override this
+ /// method to provide a different key */
+ ///</summary>
+ public override object FieldCacheKey
+ {
+ get { return in_Renamed.FieldCacheKey; }
+ }
+
+ /// <summary>
+ /// If the subclass of FilteredIndexReader modifies the
+ /// deleted docs, you must override this method to provide
+ /// a different key */
+ /// </summary>
+ public override object DeletesCacheKey
+ {
+ get { return in_Renamed.DeletesCacheKey; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsDocsConsumer.cs b/src/core/Index/FormatPostingsDocsConsumer.cs
new file mode 100644
index 0000000..29c0558
--- /dev/null
+++ b/src/core/Index/FormatPostingsDocsConsumer.cs
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> NOTE: this API is experimental and will likely change</summary>
+
+ abstract class FormatPostingsDocsConsumer
+ {
+
+ /// <summary>Adds a new doc in this term. If this returns null
+ /// then we just skip consuming positions/payloads.
+ /// </summary>
+ internal abstract FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq);
+
+ /// <summary>Called when we are done adding docs to this term </summary>
+ internal abstract void Finish();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsDocsWriter.cs b/src/core/Index/FormatPostingsDocsWriter.cs
new file mode 100644
index 0000000..82a7398
--- /dev/null
+++ b/src/core/Index/FormatPostingsDocsWriter.cs
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+ /// <summary>Consumes doc and freq, writing them using the current
+ /// index file format
+ /// </summary>
+ sealed class FormatPostingsDocsWriter : FormatPostingsDocsConsumer, IDisposable
+ {
+
+ internal IndexOutput out_Renamed;
+ internal FormatPostingsTermsWriter parent;
+ internal FormatPostingsPositionsWriter posWriter;
+ internal DefaultSkipListWriter skipListWriter;
+ internal int skipInterval;
+ internal int totalNumDocs;
+
+ internal bool omitTermFreqAndPositions;
+ internal bool storePayloads;
+ internal long freqStart;
+ internal FieldInfo fieldInfo;
+
+ internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base()
+ {
+ this.parent = parent;
+ System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION);
+ state.flushedFiles.Add(fileName);
+ out_Renamed = parent.parent.dir.CreateOutput(fileName);
+ totalNumDocs = parent.parent.totalNumDocs;
+
+ // TODO: abstraction violation
+ skipInterval = parent.parent.termsOut.skipInterval;
+ skipListWriter = parent.parent.skipListWriter;
+ skipListWriter.SetFreqOutput(out_Renamed);
+
+ posWriter = new FormatPostingsPositionsWriter(state, this);
+ }
+
+ internal void SetField(FieldInfo fieldInfo)
+ {
+ this.fieldInfo = fieldInfo;
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ storePayloads = fieldInfo.storePayloads;
+ posWriter.SetField(fieldInfo);
+ }
+
+ internal int lastDocID;
+ internal int df;
+
+ /// <summary>Adds a new doc in this term. If this returns null
+ /// then we just skip consuming positions/payloads.
+ /// </summary>
+ internal override FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq)
+ {
+
+ int delta = docID - lastDocID;
+
+ if (docID < 0 || (df > 0 && delta <= 0))
+ throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
+
+ if ((++df % skipInterval) == 0)
+ {
+ // TODO: abstraction violation
+ skipListWriter.SetSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
+ skipListWriter.BufferSkip(df);
+ }
+
+ System.Diagnostics.Debug.Assert(docID < totalNumDocs, "docID=" + docID + " totalNumDocs=" + totalNumDocs);
+
+ lastDocID = docID;
+ if (omitTermFreqAndPositions)
+ out_Renamed.WriteVInt(delta);
+ else if (1 == termDocFreq)
+ out_Renamed.WriteVInt((delta << 1) | 1);
+ else
+ {
+ out_Renamed.WriteVInt(delta << 1);
+ out_Renamed.WriteVInt(termDocFreq);
+ }
+
+ return posWriter;
+ }
+
+ private TermInfo termInfo = new TermInfo(); // minimize consing
+ internal UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
+
+ /// <summary>Called when we are done adding docs to this term </summary>
+ internal override void Finish()
+ {
+ long skipPointer = skipListWriter.WriteSkip(out_Renamed);
+
+ // TODO: this is abstraction violation -- we should not
+ // peek up into parents terms encoding format
+ termInfo.Set(df, parent.freqStart, parent.proxStart, (int) (skipPointer - parent.freqStart));
+
+ // TODO: we could do this incrementally
+ UnicodeUtil.UTF16toUTF8(parent.currentTerm, parent.currentTermStart, utf8);
+
+ if (df > 0)
+ {
+ parent.termsOut.Add(fieldInfo.number, utf8.result, utf8.length, termInfo);
+ }
+
+ lastDocID = 0;
+ df = 0;
+ }
+
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ out_Renamed.Dispose();
+ posWriter.Dispose();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsFieldsConsumer.cs b/src/core/Index/FormatPostingsFieldsConsumer.cs
new file mode 100644
index 0000000..a3f86ec
--- /dev/null
+++ b/src/core/Index/FormatPostingsFieldsConsumer.cs
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Abstract API that consumes terms, doc, freq, prox and
+ /// payloads postings. Concrete implementations of this
+ /// actually do "something" with the postings (write it into
+ /// the index in a specific format).
+ ///
+ /// NOTE: this API is experimental and will likely change
+ /// </summary>
+ abstract class FormatPostingsFieldsConsumer
+ {
+
+ /// <summary>Add a new field </summary>
+ internal abstract FormatPostingsTermsConsumer AddField(FieldInfo field);
+
+ /// <summary>Called when we are done adding everything. </summary>
+ internal abstract void Finish();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsFieldsWriter.cs b/src/core/Index/FormatPostingsFieldsWriter.cs
new file mode 100644
index 0000000..40ef619
--- /dev/null
+++ b/src/core/Index/FormatPostingsFieldsWriter.cs
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class FormatPostingsFieldsWriter:FormatPostingsFieldsConsumer
+ {
+
+ internal Directory dir;
+ internal System.String segment;
+ internal TermInfosWriter termsOut;
+ internal FieldInfos fieldInfos;
+ internal FormatPostingsTermsWriter termsWriter;
+ internal DefaultSkipListWriter skipListWriter;
+ internal int totalNumDocs;
+
+ public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base()
+ {
+
+ dir = state.directory;
+ segment = state.segmentName;
+ totalNumDocs = state.numDocs;
+ this.fieldInfos = fieldInfos;
+ termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval);
+
+ // TODO: this is a nasty abstraction violation (that we
+ // peek down to find freqOut/proxOut) -- we need a
+ // better abstraction here whereby these child consumers
+ // can provide skip data or not
+ skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null);
+
+ state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_EXTENSION));
+ state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
+
+ termsWriter = new FormatPostingsTermsWriter(state, this);
+ }
+
+ /// <summary>Add a new field </summary>
+ internal override FormatPostingsTermsConsumer AddField(FieldInfo field)
+ {
+ termsWriter.SetField(field);
+ return termsWriter;
+ }
+
+ /// <summary>Called when we are done adding everything. </summary>
+ internal override void Finish()
+ {
+ termsOut.Dispose();
+ termsWriter.Dispose();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsPositionsConsumer.cs b/src/core/Index/FormatPostingsPositionsConsumer.cs
new file mode 100644
index 0000000..f5bc440
--- /dev/null
+++ b/src/core/Index/FormatPostingsPositionsConsumer.cs
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+
+ abstract class FormatPostingsPositionsConsumer
+ {
+
+ /// <summary>Add a new position &amp; payload. If payloadLength > 0
+ /// you must read those bytes from the IndexInput.
+ /// </summary>
+ internal abstract void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength);
+
+ /// <summary>Called when we are done adding positions &amp; payloads </summary>
+ internal abstract void Finish();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsPositionsWriter.cs b/src/core/Index/FormatPostingsPositionsWriter.cs
new file mode 100644
index 0000000..8b70fcc
--- /dev/null
+++ b/src/core/Index/FormatPostingsPositionsWriter.cs
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class FormatPostingsPositionsWriter:FormatPostingsPositionsConsumer
+ {
+ internal FormatPostingsDocsWriter parent;
+ internal IndexOutput out_Renamed;
+
+ internal bool omitTermFreqAndPositions;
+ internal bool storePayloads;
+ internal int lastPayloadLength = - 1;
+
+ internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent)
+ {
+ this.parent = parent;
+ omitTermFreqAndPositions = parent.omitTermFreqAndPositions;
+ if (parent.parent.parent.fieldInfos.HasProx())
+ {
+ // At least one field does not omit TF, so create the
+ // prox file
+ System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION);
+ state.flushedFiles.Add(fileName);
+ out_Renamed = parent.parent.parent.dir.CreateOutput(fileName);
+ parent.skipListWriter.SetProxOutput(out_Renamed);
+ }
+ // Every field omits TF so we will write no prox file
+ else
+ out_Renamed = null;
+ }
+
+ internal int lastPosition;
+
+ /// <summary>Add a new position &amp; payload </summary>
+ internal override void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength)
+ {
+ System.Diagnostics.Debug.Assert(!omitTermFreqAndPositions, "omitTermFreqAndPositions is true");
+ System.Diagnostics.Debug.Assert(out_Renamed != null);
+
+ int delta = position - lastPosition;
+ lastPosition = position;
+
+ if (storePayloads)
+ {
+ if (payloadLength != lastPayloadLength)
+ {
+ lastPayloadLength = payloadLength;
+ out_Renamed.WriteVInt((delta << 1) | 1);
+ out_Renamed.WriteVInt(payloadLength);
+ }
+ else
+ out_Renamed.WriteVInt(delta << 1);
+ if (payloadLength > 0)
+ out_Renamed.WriteBytes(payload, payloadLength);
+ }
+ else
+ out_Renamed.WriteVInt(delta);
+ }
+
+ internal void SetField(FieldInfo fieldInfo)
+ {
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ storePayloads = omitTermFreqAndPositions?false:fieldInfo.storePayloads;
+ }
+
+ /// <summary>Called when we are done adding positions &amp; payloads </summary>
+ internal override void Finish()
+ {
+ lastPosition = 0;
+ lastPayloadLength = - 1;
+ }
+
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ if (out_Renamed != null)
+ out_Renamed.Close();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsTermsConsumer.cs b/src/core/Index/FormatPostingsTermsConsumer.cs
new file mode 100644
index 0000000..637ecff
--- /dev/null
+++ b/src/core/Index/FormatPostingsTermsConsumer.cs
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> NOTE: this API is experimental and will likely change</summary>
+
+ abstract class FormatPostingsTermsConsumer
+ {
+
+ /// <summary>Adds a new term in this field; term ends with U+FFFF
+ /// char
+ /// </summary>
+ internal abstract FormatPostingsDocsConsumer AddTerm(char[] text, int start);
+
+ internal char[] termBuffer;
+ internal virtual FormatPostingsDocsConsumer AddTerm(System.String text)
+ {
+ int len = text.Length;
+ if (termBuffer == null || termBuffer.Length < 1 + len)
+ termBuffer = new char[ArrayUtil.GetNextSize(1 + len)];
+ for (int i = 0; i < len; i++)
+ {
+ termBuffer[i] = (char) text[i];
+ }
+ termBuffer[len] = (char) (0xffff);
+ return AddTerm(termBuffer, 0);
+ }
+
+ /// <summary>Called when we are done adding terms to this field </summary>
+ internal abstract void Finish();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FormatPostingsTermsWriter.cs b/src/core/Index/FormatPostingsTermsWriter.cs
new file mode 100644
index 0000000..87d2026
--- /dev/null
+++ b/src/core/Index/FormatPostingsTermsWriter.cs
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class FormatPostingsTermsWriter : FormatPostingsTermsConsumer, IDisposable
+ {
+ internal FormatPostingsFieldsWriter parent;
+ internal FormatPostingsDocsWriter docsWriter;
+ internal TermInfosWriter termsOut;
+ internal FieldInfo fieldInfo;
+
+ internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent):base()
+ {
+ this.parent = parent;
+ termsOut = parent.termsOut;
+ docsWriter = new FormatPostingsDocsWriter(state, this);
+ }
+
+ internal void SetField(FieldInfo fieldInfo)
+ {
+ this.fieldInfo = fieldInfo;
+ docsWriter.SetField(fieldInfo);
+ }
+
+ internal char[] currentTerm;
+ internal int currentTermStart;
+
+ internal long freqStart;
+ internal long proxStart;
+
+ /// <summary>Adds a new term in this field </summary>
+ internal override FormatPostingsDocsConsumer AddTerm(char[] text, int start)
+ {
+ currentTerm = text;
+ currentTermStart = start;
+
+ // TODO: this is abstraction violation -- ideally this
+ // terms writer is not so "invasive", looking for file
+ // pointers in its child consumers.
+ freqStart = docsWriter.out_Renamed.FilePointer;
+ if (docsWriter.posWriter.out_Renamed != null)
+ proxStart = docsWriter.posWriter.out_Renamed.FilePointer;
+
+ parent.skipListWriter.ResetSkip();
+
+ return docsWriter;
+ }
+
+ /// <summary>Called when we are done adding terms to this field </summary>
+ internal override void Finish()
+ {
+ }
+
+ public void Dispose()
+ {
+ docsWriter.Dispose();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FreqProxFieldMergeState.cs b/src/core/Index/FreqProxFieldMergeState.cs
new file mode 100644
index 0000000..5306918
--- /dev/null
+++ b/src/core/Index/FreqProxFieldMergeState.cs
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ // TODO FI: some of this is "generic" to TermsHash* so we
+ // should factor it out so other consumers don't have to
+ // duplicate this code
+
+ /// <summary>Used by DocumentsWriter to merge the postings from
+ /// multiple ThreadStates when creating a segment
+ /// </summary>
+ sealed class FreqProxFieldMergeState
+ {
+
+ internal FreqProxTermsWriterPerField field;
+ internal int numPostings;
+ internal CharBlockPool charPool;
+ internal RawPostingList[] postings;
+
+ private FreqProxTermsWriter.PostingList p;
+ internal char[] text;
+ internal int textOffset;
+
+ private int postingUpto = - 1;
+
+ internal ByteSliceReader freq = new ByteSliceReader();
+ internal ByteSliceReader prox = new ByteSliceReader();
+
+ internal int docID;
+ internal int termFreq;
+
+ public FreqProxFieldMergeState(FreqProxTermsWriterPerField field)
+ {
+ this.field = field;
+ this.charPool = field.perThread.termsHashPerThread.charPool;
+ this.numPostings = field.termsHashPerField.numPostings;
+ this.postings = field.termsHashPerField.SortPostings();
+ }
+
+ internal bool NextTerm()
+ {
+ postingUpto++;
+ if (postingUpto == numPostings)
+ return false;
+
+ p = (FreqProxTermsWriter.PostingList) postings[postingUpto];
+ docID = 0;
+
+ text = charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+ field.termsHashPerField.InitReader(freq, p, 0);
+ if (!field.fieldInfo.omitTermFreqAndPositions)
+ field.termsHashPerField.InitReader(prox, p, 1);
+
+ // Should always be true
+ bool result = NextDoc();
+ System.Diagnostics.Debug.Assert(result);
+
+ return true;
+ }
+
+ public bool NextDoc()
+ {
+ if (freq.Eof())
+ {
+ if (p.lastDocCode != - 1)
+ {
+ // Return last doc
+ docID = p.lastDocID;
+ if (!field.omitTermFreqAndPositions)
+ termFreq = p.docFreq;
+ p.lastDocCode = - 1;
+ return true;
+ }
+ // EOF
+ else
+ return false;
+ }
+
+ int code = freq.ReadVInt();
+ if (field.omitTermFreqAndPositions)
+ docID += code;
+ else
+ {
+ docID += Number.URShift(code, 1);
+ if ((code & 1) != 0)
+ termFreq = 1;
+ else
+ termFreq = freq.ReadVInt();
+ }
+
+ System.Diagnostics.Debug.Assert(docID != p.lastDocID);
+
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FreqProxTermsWriter.cs b/src/core/Index/FreqProxTermsWriter.cs
new file mode 100644
index 0000000..f98d646
--- /dev/null
+++ b/src/core/Index/FreqProxTermsWriter.cs
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+ sealed class FreqProxTermsWriter : TermsHashConsumer
+ {
+ public override TermsHashConsumerPerThread AddThread(TermsHashPerThread perThread)
+ {
+ return new FreqProxTermsWriterPerThread(perThread);
+ }
+
+ internal override void CreatePostings(RawPostingList[] postings, int start, int count)
+ {
+ int end = start + count;
+ for (int i = start; i < end; i++)
+ postings[i] = new PostingList();
+ }
+
+ private static int compareText(char[] text1, int pos1, char[] text2, int pos2)
+ {
+ while (true)
+ {
+ char c1 = text1[pos1++];
+ char c2 = text2[pos2++];
+ if (c1 != c2)
+ {
+ if (0xffff == c2)
+ return 1;
+ else if (0xffff == c1)
+ return - 1;
+ else
+ return c1 - c2;
+ }
+ else if (0xffff == c1)
+ return 0;
+ }
+ }
+
+ internal override void CloseDocStore(SegmentWriteState state)
+ {
+ }
+ public override void Abort()
+ {
+ }
+
+
+ // TODO: would be nice to factor out more of this, eg the
+ // FreqProxFieldMergeState, and code to visit all Fields
+ // under the same FieldInfo together, up into TermsHash*.
+ // Other writers would presumably share alot of this...
+ public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
+ {
+
+ // Gather all FieldData's that have postings, across all
+ // ThreadStates
+ var allFields = new List<FreqProxTermsWriterPerField>();
+
+ foreach(var entry in threadsAndFields)
+ {
+ var fields = entry.Value;
+
+ foreach(var i in fields)
+ {
+ FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
+ if (perField.termsHashPerField.numPostings > 0)
+ allFields.Add(perField);
+ }
+ }
+
+ // Sort by field name
+ allFields.Sort();
+ int numAllFields = allFields.Count;
+
+ // TODO: allow Lucene user to customize this consumer:
+ FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
+ /*
+ Current writer chain:
+ FormatPostingsFieldsConsumer
+ -> IMPL: FormatPostingsFieldsWriter
+ -> FormatPostingsTermsConsumer
+ -> IMPL: FormatPostingsTermsWriter
+ -> FormatPostingsDocConsumer
+ -> IMPL: FormatPostingsDocWriter
+ -> FormatPostingsPositionsConsumer
+ -> IMPL: FormatPostingsPositionsWriter
+ */
+
+ int start = 0;
+ while (start < numAllFields)
+ {
+ FieldInfo fieldInfo = allFields[start].fieldInfo;
+ System.String fieldName = fieldInfo.name;
+
+ int end = start + 1;
+ while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName))
+ end++;
+
+ FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
+ for (int i = start; i < end; i++)
+ {
+ fields[i - start] = allFields[i];
+
+ // Aggregate the storePayload as seen by the same
+ // field across multiple threads
+ fieldInfo.storePayloads |= fields[i - start].hasPayloads;
+ }
+
+ // If this field has postings then add them to the
+ // segment
+ AppendPostings(fields, consumer);
+
+ for (int i = 0; i < fields.Length; i++)
+ {
+ TermsHashPerField perField = fields[i].termsHashPerField;
+ int numPostings = perField.numPostings;
+ perField.Reset();
+ perField.ShrinkHash(numPostings);
+ fields[i].Reset();
+ }
+
+ start = end;
+ }
+
+ foreach(var entry in threadsAndFields)
+ {
+ FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key;
+ perThread.termsHashPerThread.Reset(true);
+ }
+
+ consumer.Finish();
+ }
+
+ private byte[] payloadBuffer;
+
+ /* Walk through all unique text tokens (Posting
+ * instances) found in this field and serialize them
+ * into a single RAM segment. */
+ internal void AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer)
+ {
+
+ int numFields = fields.Length;
+
+ FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];
+
+ for (int i = 0; i < numFields; i++)
+ {
+ FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);
+
+ System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo);
+
+ // Should always be true
+ bool result = fms.NextTerm();
+ System.Diagnostics.Debug.Assert(result);
+ }
+
+ FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo);
+
+ FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
+
+ bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;
+
+ while (numFields > 0)
+ {
+
+ // Get the next term to merge
+ termStates[0] = mergeStates[0];
+ int numToMerge = 1;
+
+ for (int i = 1; i < numFields; i++)
+ {
+ char[] text = mergeStates[i].text;
+ int textOffset = mergeStates[i].textOffset;
+ int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);
+
+ if (cmp < 0)
+ {
+ termStates[0] = mergeStates[i];
+ numToMerge = 1;
+ }
+ else if (cmp == 0)
+ termStates[numToMerge++] = mergeStates[i];
+ }
+
+ FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset);
+
+ // Now termStates has numToMerge FieldMergeStates
+ // which all share the same term. Now we must
+ // interleave the docID streams.
+ while (numToMerge > 0)
+ {
+
+ FreqProxFieldMergeState minState = termStates[0];
+ for (int i = 1; i < numToMerge; i++)
+ if (termStates[i].docID < minState.docID)
+ minState = termStates[i];
+
+ int termDocFreq = minState.termFreq;
+
+ FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq);
+
+ ByteSliceReader prox = minState.prox;
+
+ // Carefully copy over the prox + payload info,
+ // changing the format to match Lucene's segment
+ // format.
+ if (!currentFieldOmitTermFreqAndPositions)
+ {
+ // omitTermFreqAndPositions == false so we do write positions &
+ // payload
+ int position = 0;
+ for (int j = 0; j < termDocFreq; j++)
+ {
+ int code = prox.ReadVInt();
+ position += (code >> 1);
+
+ int payloadLength;
+ if ((code & 1) != 0)
+ {
+ // This position has a payload
+ payloadLength = prox.ReadVInt();
+
+ if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
+ payloadBuffer = new byte[payloadLength];
+
+ prox.ReadBytes(payloadBuffer, 0, payloadLength);
+ }
+ else
+ payloadLength = 0;
+
+ posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
+ } //End for
+
+ posConsumer.Finish();
+ }
+
+ if (!minState.NextDoc())
+ {
+
+ // Remove from termStates
+ int upto = 0;
+ for (int i = 0; i < numToMerge; i++)
+ if (termStates[i] != minState)
+ termStates[upto++] = termStates[i];
+ numToMerge--;
+ System.Diagnostics.Debug.Assert(upto == numToMerge);
+
+ // Advance this state to the next term
+
+ if (!minState.NextTerm())
+ {
+ // OK, no more terms, so remove from mergeStates
+ // as well
+ upto = 0;
+ for (int i = 0; i < numFields; i++)
+ if (mergeStates[i] != minState)
+ mergeStates[upto++] = mergeStates[i];
+ numFields--;
+ System.Diagnostics.Debug.Assert(upto == numFields);
+ }
+ }
+ }
+
+ docConsumer.Finish();
+ }
+
+ termsConsumer.Finish();
+ }
+
+ internal UnicodeUtil.UTF8Result termsUTF8 = new UnicodeUtil.UTF8Result();
+
+ internal sealed class PostingList:RawPostingList
+ {
+ internal int docFreq; // # times this term occurs in the current doc
+ internal int lastDocID; // Last docID where this term occurred
+ internal int lastDocCode; // Code for prior doc
+ internal int lastPosition; // Last position where this term occurred
+ }
+
+ internal override int BytesPerPosting()
+ {
+ return RawPostingList.BYTES_SIZE + 4 * DocumentsWriter.INT_NUM_BYTE;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FreqProxTermsWriterPerField.cs b/src/core/Index/FreqProxTermsWriterPerField.cs
new file mode 100644
index 0000000..c654b48
--- /dev/null
+++ b/src/core/Index/FreqProxTermsWriterPerField.cs
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ // TODO: break into separate freq and prox writers as
+ // codecs; make separate container (tii/tis/skip/*) that can
+ // be configured as any number of files 1..N
+ sealed class FreqProxTermsWriterPerField:TermsHashConsumerPerField, System.IComparable<FreqProxTermsWriterPerField>
+ {
+
+ internal FreqProxTermsWriterPerThread perThread;
+ internal TermsHashPerField termsHashPerField;
+ internal FieldInfo fieldInfo;
+ internal DocumentsWriter.DocState docState;
+ internal FieldInvertState fieldState;
+ internal bool omitTermFreqAndPositions;
+ internal IPayloadAttribute payloadAttribute;
+
+ public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.termsHashPerField = termsHashPerField;
+ this.perThread = perThread;
+ this.fieldInfo = fieldInfo;
+ docState = termsHashPerField.docState;
+ fieldState = termsHashPerField.fieldState;
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ }
+
+ internal override int GetStreamCount()
+ {
+ if (fieldInfo.omitTermFreqAndPositions)
+ return 1;
+ else
+ return 2;
+ }
+
+ internal override void Finish()
+ {
+ }
+
+ internal bool hasPayloads;
+
+ internal override void SkippingLongTerm()
+ {
+ }
+
+ public int CompareTo(FreqProxTermsWriterPerField other)
+ {
+ return String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name);
+ }
+
+ internal void Reset()
+ {
+ // Record, up front, whether our in-RAM format will be
+ // with or without term freqs:
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ payloadAttribute = null;
+ }
+
+ internal override bool Start(IFieldable[] fields, int count)
+ {
+ for (int i = 0; i < count; i++)
+ if (fields[i].IsIndexed)
+ return true;
+ return false;
+ }
+
+ internal override void Start(IFieldable f)
+ {
+ if (fieldState.attributeSource.HasAttribute<IPayloadAttribute>())
+ {
+ payloadAttribute = fieldState.attributeSource.GetAttribute<IPayloadAttribute>();
+ }
+ else
+ {
+ payloadAttribute = null;
+ }
+ }
+
+ internal void WriteProx(FreqProxTermsWriter.PostingList p, int proxCode)
+ {
+ Payload payload;
+ if (payloadAttribute == null)
+ {
+ payload = null;
+ }
+ else
+ {
+ payload = payloadAttribute.Payload;
+ }
+
+ if (payload != null && payload.internalLength > 0)
+ {
+ termsHashPerField.WriteVInt(1, (proxCode << 1) | 1);
+ termsHashPerField.WriteVInt(1, payload.internalLength);
+ termsHashPerField.WriteBytes(1, payload.data, payload.internalOffset, payload.internalLength);
+ hasPayloads = true;
+ }
+ else
+ termsHashPerField.WriteVInt(1, proxCode << 1);
+ p.lastPosition = fieldState.position;
+ }
+
+ internal override void NewTerm(RawPostingList p0)
+ {
+ // First time we're seeing this term since the last
+ // flush
+ System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));
+ FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
+ p.lastDocID = docState.docID;
+ if (omitTermFreqAndPositions)
+ {
+ p.lastDocCode = docState.docID;
+ }
+ else
+ {
+ p.lastDocCode = docState.docID << 1;
+ p.docFreq = 1;
+ WriteProx(p, fieldState.position);
+ }
+ }
+
+ internal override void AddTerm(RawPostingList p0)
+ {
+
+ System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));
+
+ FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
+
+ System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0);
+
+ if (omitTermFreqAndPositions)
+ {
+ if (docState.docID != p.lastDocID)
+ {
+ System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
+ termsHashPerField.WriteVInt(0, p.lastDocCode);
+ p.lastDocCode = docState.docID - p.lastDocID;
+ p.lastDocID = docState.docID;
+ }
+ }
+ else
+ {
+ if (docState.docID != p.lastDocID)
+ {
+ System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
+ // Term not yet seen in the current doc but previously
+ // seen in other doc(s) since the last flush
+
+ // Now that we know doc freq for previous doc,
+ // write it & lastDocCode
+ if (1 == p.docFreq)
+ termsHashPerField.WriteVInt(0, p.lastDocCode | 1);
+ else
+ {
+ termsHashPerField.WriteVInt(0, p.lastDocCode);
+ termsHashPerField.WriteVInt(0, p.docFreq);
+ }
+ p.docFreq = 1;
+ p.lastDocCode = (docState.docID - p.lastDocID) << 1;
+ p.lastDocID = docState.docID;
+ WriteProx(p, fieldState.position);
+ }
+ else
+ {
+ p.docFreq++;
+ WriteProx(p, fieldState.position - p.lastPosition);
+ }
+ }
+ }
+
+ public void Abort()
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/FreqProxTermsWriterPerThread.cs b/src/core/Index/FreqProxTermsWriterPerThread.cs
new file mode 100644
index 0000000..01f1ae9
--- /dev/null
+++ b/src/core/Index/FreqProxTermsWriterPerThread.cs
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class FreqProxTermsWriterPerThread:TermsHashConsumerPerThread
+ {
+ internal TermsHashPerThread termsHashPerThread;
+ internal DocumentsWriter.DocState docState;
+
+ public FreqProxTermsWriterPerThread(TermsHashPerThread perThread)
+ {
+ docState = perThread.docState;
+ termsHashPerThread = perThread;
+ }
+
+ public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo)
+ {
+ return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ }
+
+ public override void StartDocument()
+ {
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ return null;
+ }
+
+ public override void Abort()
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IndexCommit.cs b/src/core/Index/IndexCommit.cs
new file mode 100644
index 0000000..306d7f1
--- /dev/null
+++ b/src/core/Index/IndexCommit.cs
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Store;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> <p/>Expert: represents a single commit into an index as seen by the
+ /// <see cref="IndexDeletionPolicy" /> or <see cref="IndexReader" />.<p/>
+ ///
+ /// <p/> Changes to the content of an index are made visible
+ /// only after the writer who made that change commits by
+ /// writing a new segments file
+ /// (<c>segments_N</c>). This point in time, when the
+ /// action of writing of a new segments file to the directory
+ /// is completed, is an index commit.<p/>
+ ///
+ /// <p/>Each index commit point has a unique segments file
+ /// associated with it. The segments file associated with a
+ /// later index commit point would have a larger N.<p/>
+ ///
+ /// <p/><b>WARNING</b>: This API is a new and experimental and
+ /// may suddenly change. <p/>
+ /// </summary>
+
+ public abstract class IndexCommit
+ {
+ /// <summary> Get the segments file (<c>segments_N</c>) associated
+ /// with this commit point.
+ /// </summary>
+ public abstract string SegmentsFileName { get; }
+
+ /// <summary> Returns all index files referenced by this commit point.</summary>
+ public abstract ICollection<string> FileNames { get; }
+
+ /// <summary> Returns the <see cref="Store.Directory" /> for the index.</summary>
+ public abstract Directory Directory { get; }
+
+ /// <summary> Delete this commit point. This only applies when using
+ /// the commit point in the context of IndexWriter's
+ /// IndexDeletionPolicy.
+ /// <p/>
+ /// Upon calling this, the writer is notified that this commit
+ /// point should be deleted.
+ /// <p/>
+ /// Decision that a commit-point should be deleted is taken by the <see cref="IndexDeletionPolicy" /> in effect
+ /// and therefore this should only be called by its <see cref="IndexDeletionPolicy.OnInit{T}(IList{T})" /> or
+ /// <see cref="IndexDeletionPolicy.OnCommit{T}(IList{T})" /> methods.
+ /// </summary>
+ public abstract void Delete();
+
+ public abstract bool IsDeleted { get; }
+
+ /// <summary> Returns true if this commit is an optimized index.</summary>
+ public abstract bool IsOptimized { get; }
+
+ /// <summary> Two IndexCommits are equal if both their Directory and versions are equal.</summary>
+ public override bool Equals(System.Object other)
+ {
+ if (other is IndexCommit)
+ {
+ IndexCommit otherCommit = (IndexCommit) other;
+ return otherCommit.Directory.Equals(Directory) && otherCommit.Version == Version;
+ }
+ else
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return (int)(Directory.GetHashCode() + Version);
+ }
+
+ /// <summary>Returns the version for this IndexCommit. This is the
+ /// same value that <see cref="IndexReader.Version" /> would
+ /// return if it were opened on this commit.
+ /// </summary>
+ public abstract long Version { get; }
+
+ /// <summary>Returns the generation (the _N in segments_N) for this
+ /// IndexCommit
+ /// </summary>
+ public abstract long Generation { get; }
+
+ /// <summary>Convenience method that returns the last modified time
+ /// of the segments_N file corresponding to this index
+ /// commit, equivalent to
+ /// getDirectory().fileModified(getSegmentsFileName()).
+ /// </summary>
+ public virtual long Timestamp
+ {
+ get { return Directory.FileModified(SegmentsFileName); }
+ }
+
+ /// <summary>Returns userData, previously passed to
+ /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />
+ /// for this commit. IDictionary is String -> String.
+ /// </summary>
+ public abstract IDictionary<string, string> UserData { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IndexDeletionPolicy.cs b/src/core/Index/IndexDeletionPolicy.cs
new file mode 100644
index 0000000..bef9924
--- /dev/null
+++ b/src/core/Index/IndexDeletionPolicy.cs
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> <p/>Expert: policy for deletion of stale <see cref="IndexCommit">index commits</see>.
+ ///
+ /// <p/>Implement this interface, and pass it to one
+ /// of the <see cref="IndexWriter" /> or <see cref="IndexReader" />
+ /// constructors, to customize when older
+ /// <see cref="IndexCommit">point-in-time commits</see>
+ /// are deleted from the index directory. The default deletion policy
+ /// is <see cref="KeepOnlyLastCommitDeletionPolicy" />, which always
+ /// removes old commits as soon as a new commit is done (this
+ /// matches the behavior before 2.2).<p/>
+ ///
+ /// <p/>One expected use case for this (and the reason why it
+ /// was first created) is to work around problems with an
+ /// index directory accessed via filesystems like NFS because
+ /// NFS does not provide the "delete on last close" semantics
+ /// that Lucene's "point in time" search normally relies on.
+ /// By implementing a custom deletion policy, such as "a
+ /// commit is only removed once it has been stale for more
+ /// than X minutes", you can give your readers time to
+ /// refresh to the new commit before <see cref="IndexWriter" />
+ /// removes the old commits. Note that doing so will
+ /// increase the storage requirements of the index. See <a
+ /// target="top"
+ /// href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a>
+ /// for details.<p/>
+ /// </summary>
+
+ public interface IndexDeletionPolicy
+ {
+
+ /// <summary> <p/>This is called once when a writer is first
+ /// instantiated to give the policy a chance to remove old
+ /// commit points.<p/>
+ ///
+ /// <p/>The writer locates all index commits present in the
+ /// index directory and calls this method. The policy may
+ /// choose to delete some of the commit points, doing so by
+ /// calling method <see cref="IndexCommit.Delete()" />
+ /// of <see cref="IndexCommit" />.<p/>
+ ///
+ /// <p/><u>Note:</u> the last CommitPoint is the most recent one,
+ /// i.e. the "front index state". Be careful not to delete it,
+ /// unless you know for sure what you are doing, and unless
+ /// you can afford to lose the index content while doing that.
+ ///
+ /// </summary>
+ /// <param name="commits">List of current
+ /// <see cref="IndexCommit">point-in-time commits</see>,
+ /// sorted by age (the 0th one is the oldest commit).
+ /// </param>
+ void OnInit<T>(IList<T> commits) where T : IndexCommit;
+
+ /// <summary>
+ /// <p>This is called each time the writer completed a commit.
+ /// This gives the policy a chance to remove old commit points
+ /// with each commit.</p>
+ ///
+ /// <p>The policy may now choose to delete old commit points
+ /// by calling method <see cref="IndexCommit.Delete()"/>
+ /// of <see cref="IndexCommit" />.</p>
+ ///
+ /// <p>This method is only called when <see cref="IndexWriter.Commit()"/>
+ /// or <see cref="IndexWriter.Close()"/> is called, or possibly not at
+ /// all if the <see cref="IndexWriter.Rollback()"/> is called.</p>
+ ///
+ /// <p><u>Note:</u> the last CommitPoint is the most recent one,
+ /// i.e. the "front index state". Be careful not to delete it,
+ /// unless you know for sure what you are doing, and unless
+ /// you can afford to lose the index content while doing that.</p>
+ /// </summary>
+ /// <param name="commits">
+ /// List of <see cref="IndexCommit" />, sorted by age (the 0th one is the oldest commit).
+ /// </param>
+ void OnCommit<T>(IList<T> commits) where T : IndexCommit;
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IndexFileDeleter.cs b/src/core/Index/IndexFileDeleter.cs
new file mode 100644
index 0000000..3ac815d
--- /dev/null
+++ b/src/core/Index/IndexFileDeleter.cs
@@ -0,0 +1,808 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>
+ /// <para>This class keeps track of each SegmentInfos instance that
+ /// is still "live", either because it corresponds to a
+ /// segments_N file in the Directory (a "commit", i.e. a
+ /// committed SegmentInfos) or because it's an in-memory
+ /// SegmentInfos that a writer is actively updating but has
+ /// not yet committed. This class uses simple reference
+ /// counting to map the live SegmentInfos instances to
+ /// individual files in the Directory.</para>
+ ///
+ /// <para>The same directory file may be referenced by more than
+ /// one IndexCommit, i.e. more than one SegmentInfos.
+ /// Therefore we count how many commits reference each file.
+ /// When all the commits referencing a certain file have been
+ /// deleted, the refcount for that file becomes zero, and the
+ /// file is deleted.</para>
+ ///
+ /// <para>A separate deletion policy interface
+ /// (IndexDeletionPolicy) is consulted on creation (onInit)
+ /// and once per commit (onCommit), to decide when a commit
+ /// should be removed.</para>
+ ///
+ /// <para>It is the business of the IndexDeletionPolicy to choose
+ /// when to delete commit points. The actual mechanics of
+ /// file deletion, retrying, etc, derived from the deletion
+ /// of commit points is the business of the IndexFileDeleter.</para>
+ ///
+ /// <para>The current default deletion policy is
+ /// <see cref="KeepOnlyLastCommitDeletionPolicy"/>, which removes all
+ /// prior commits when a new commit has completed. This
+ /// matches the behavior before 2.2.</para>
+ ///
+ /// <para>Note that you must hold the write.lock before
+ /// instantiating this class. It opens segments_N file(s)
+ /// directly with no retry logic.</para>
+ /// </summary>
+
+ public sealed class IndexFileDeleter : IDisposable
+ {
+
+ //// Files that we tried to delete but failed (likely
+ /// because they are open and we are running on Windows),
+ /// so we will retry them again later: ////
+ private IList<string> deletable;
+
+ //// Reference count for all files in the index.
+ /// Counts how many existing commits reference a file.
+ /// Maps String to RefCount (class below) instances: ////
+ private IDictionary<string, RefCount> refCounts = new HashMap<string, RefCount>();
+
+ //// Holds all commits (segments_N) currently in the index.
+ /// This will have just 1 commit if you are using the
+ /// default delete policy (KeepOnlyLastCommitDeletionPolicy).
+ /// Other policies may leave commit points live for longer
+ /// in which case this list would be longer than 1: ////
+ private List<CommitPoint> commits = new List<CommitPoint>();
+
+ //// Holds files we had incref'd from the previous
+ /// non-commit checkpoint: ////
+ private List<ICollection<string>> lastFiles = new List<ICollection<string>>();
+
+ //// Commits that the IndexDeletionPolicy have decided to delete: ////
+ private List<CommitPoint> commitsToDelete = new List<CommitPoint>();
+
+ private System.IO.StreamWriter infoStream;
+ private Directory directory;
+ private IndexDeletionPolicy policy;
+ private DocumentsWriter docWriter;
+
+ internal bool startingCommitDeleted;
+ private SegmentInfos lastSegmentInfos;
+
+ private HashSet<string> synced;
+
+ /// <summary>Change to true to see details of reference counts when
+ /// infoStream != null
+ /// </summary>
+ public static bool VERBOSE_REF_COUNTS = false;
+
+ internal void SetInfoStream(System.IO.StreamWriter infoStream)
+ {
+ this.infoStream = infoStream;
+ if (infoStream != null)
+ {
+ Message("setInfoStream deletionPolicy=" + policy);
+ }
+ }
+
+ private void Message(System.String message)
+ {
+ infoStream.WriteLine("IFD [" + new DateTime().ToString() + "; " + ThreadClass.Current().Name + "]: " + message);
+ }
+
+ /// <summary> Initialize the deleter: find all previous commits in
+ /// the Directory, incref the files they reference, call
+ /// the policy to let it delete commits. This will remove
+ /// any files not referenced by any of the commits.
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, HashSet<string> synced)
+ {
+
+ this.docWriter = docWriter;
+ this.infoStream = infoStream;
+ this.synced = synced;
+
+ if (infoStream != null)
+ {
+ Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy);
+ }
+
+ this.policy = policy;
+ this.directory = directory;
+
+ // First pass: walk the files and initialize our ref
+ // counts:
+ long currentGen = segmentInfos.Generation;
+ IndexFileNameFilter filter = IndexFileNameFilter.Filter;
+
+ System.String[] files = directory.ListAll();
+
+ CommitPoint currentCommitPoint = null;
+
+ for (int i = 0; i < files.Length; i++)
+ {
+
+ System.String fileName = files[i];
+
+ if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
+ {
+
+ // Add this file to refCounts with initial count 0:
+ GetRefCount(fileName);
+
+ if (fileName.StartsWith(IndexFileNames.SEGMENTS))
+ {
+
+ // This is a commit (segments or segments_N), and
+ // it's valid (<= the max gen). Load it, then
+ // incref all files it refers to:
+ if (infoStream != null)
+ {
+ Message("init: load commit \"" + fileName + "\"");
+ }
+ SegmentInfos sis = new SegmentInfos();
+ try
+ {
+ sis.Read(directory, fileName);
+ }
+ catch (System.IO.FileNotFoundException)
+ {
+ // LUCENE-948: on NFS (and maybe others), if
+ // you have writers switching back and forth
+ // between machines, it's very likely that the
+ // dir listing will be stale and will claim a
+ // file segments_X exists when in fact it
+ // doesn't. So, we catch this and handle it
+ // as if the file does not exist
+ if (infoStream != null)
+ {
+ Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+ }
+ sis = null;
+ }
+ catch (System.IO.IOException)
+ {
+ if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen)
+ {
+ throw;
+ }
+ else
+ {
+ // Most likely we are opening an index that
+ // has an aborted "future" commit, so suppress
+ // exc in this case
+ sis = null;
+ }
+ }
+ if (sis != null)
+ {
+ CommitPoint commitPoint = new CommitPoint(this, commitsToDelete, directory, sis);
+ if (sis.Generation == segmentInfos.Generation)
+ {
+ currentCommitPoint = commitPoint;
+ }
+ commits.Add(commitPoint);
+ IncRef(sis, true);
+
+ if (lastSegmentInfos == null || sis.Generation > lastSegmentInfos.Generation)
+ {
+ lastSegmentInfos = sis;
+ }
+ }
+ }
+ }
+ }
+
+ if (currentCommitPoint == null)
+ {
+ // We did not in fact see the segments_N file
+ // corresponding to the segmentInfos that was passed
+ // in. Yet, it must exist, because our caller holds
+ // the write lock. This can happen when the directory
+ // listing was stale (eg when index accessed via NFS
+ // client with stale directory listing cache). So we
+ // try now to explicitly open this commit point:
+ SegmentInfos sis = new SegmentInfos();
+ try
+ {
+ sis.Read(directory, segmentInfos.GetCurrentSegmentFileName());
+ }
+ catch (System.IO.IOException)
+ {
+ throw new CorruptIndexException("failed to locate current segments_N file");
+ }
+ if (infoStream != null)
+ Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName());
+ currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis);
+ commits.Add(currentCommitPoint);
+ IncRef(sis, true);
+ }
+
+ // We keep commits list in sorted order (oldest to newest):
+ commits.Sort();
+
+ // Now delete anything with ref count at 0. These are
+ // presumably abandoned files eg due to crash of
+ // IndexWriter.
+ foreach(KeyValuePair<string, RefCount> entry in refCounts)
+ {
+ string fileName = entry.Key;
+ RefCount rc = refCounts[fileName];
+ if (0 == rc.count)
+ {
+ if (infoStream != null)
+ {
+ Message("init: removing unreferenced file \"" + fileName + "\"");
+ }
+ DeleteFile(fileName);
+ }
+ }
+
+ // Finally, give policy a chance to remove things on
+ // startup:
+ policy.OnInit(commits);
+
+ // Always protect the incoming segmentInfos since
+ // sometime it may not be the most recent commit
+ Checkpoint(segmentInfos, false);
+
+ startingCommitDeleted = currentCommitPoint.IsDeleted;
+
+ DeleteCommits();
+ }
+
+ public SegmentInfos LastSegmentInfos
+ {
+ get { return lastSegmentInfos; }
+ }
+
+ /// <summary> Remove the CommitPoints in the commitsToDelete List by
+ /// DecRef'ing all files from each SegmentInfos.
+ /// </summary>
+ private void DeleteCommits()
+ {
+
+ int size = commitsToDelete.Count;
+
+ if (size > 0)
+ {
+
+ // First decref all files that had been referred to by
+ // the now-deleted commits:
+ for (int i = 0; i < size; i++)
+ {
+ CommitPoint commit = commitsToDelete[i];
+ if (infoStream != null)
+ {
+ Message("deleteCommits: now decRef commit \"" + commit.SegmentsFileName + "\"");
+ }
+ foreach(string file in commit.files)
+ {
+ DecRef(file);
+ }
+ }
+ commitsToDelete.Clear();
+
+ // Now compact commits to remove deleted ones (preserving the sort):
+ size = commits.Count;
+ int readFrom = 0;
+ int writeTo = 0;
+ while (readFrom < size)
+ {
+ CommitPoint commit = commits[readFrom];
+ if (!commit.deleted)
+ {
+ if (writeTo != readFrom)
+ {
+ commits[writeTo] = commits[readFrom];
+ }
+ writeTo++;
+ }
+ readFrom++;
+ }
+
+ while (size > writeTo)
+ {
+ commits.RemoveAt(size - 1);
+ size--;
+ }
+ }
+ }
+
+ /// <summary> Writer calls this when it has hit an error and had to
+ /// roll back, to tell us that there may now be
+ /// unreferenced files in the filesystem. So we re-list
+ /// the filesystem and delete such files. If segmentName
+ /// is non-null, we will only delete files corresponding to
+ /// that segment.
+ /// </summary>
+ public void Refresh(System.String segmentName)
+ {
+ System.String[] files = directory.ListAll();
+ IndexFileNameFilter filter = IndexFileNameFilter.Filter;
+ System.String segmentPrefix1;
+ System.String segmentPrefix2;
+ if (segmentName != null)
+ {
+ segmentPrefix1 = segmentName + ".";
+ segmentPrefix2 = segmentName + "_";
+ }
+ else
+ {
+ segmentPrefix1 = null;
+ segmentPrefix2 = null;
+ }
+
+ for (int i = 0; i < files.Length; i++)
+ {
+ System.String fileName = files[i];
+ if (filter.Accept(null, fileName) && (segmentName == null || fileName.StartsWith(segmentPrefix1) || fileName.StartsWith(segmentPrefix2)) && !refCounts.ContainsKey(fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
+ {
+ // Unreferenced file, so remove it
+ if (infoStream != null)
+ {
+ Message("refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\"");
+ }
+ DeleteFile(fileName);
+ }
+ }
+ }
+
+ public void Refresh()
+ {
+ Refresh(null);
+ }
+
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ // DecRef old files from the last checkpoint, if any:
+ int size = lastFiles.Count;
+ if (size > 0)
+ {
+ for (int i = 0; i < size; i++)
+ DecRef(lastFiles[i]);
+ lastFiles.Clear();
+ }
+
+ DeletePendingFiles();
+ }
+
+ private void DeletePendingFiles()
+ {
+ if (deletable != null)
+ {
+ IList<string> oldDeletable = deletable;
+ deletable = null;
+ int size = oldDeletable.Count;
+ for (int i = 0; i < size; i++)
+ {
+ if (infoStream != null)
+ {
+ Message("delete pending file " + oldDeletable[i]);
+ }
+ DeleteFile(oldDeletable[i]);
+ }
+ }
+ }
+
+ /// <summary> For definition of "check point" see IndexWriter comments:
+ /// "Clarification: Check Points (and commits)".
+ ///
+ /// Writer calls this when it has made a "consistent
+ /// change" to the index, meaning new files are written to
+ /// the index and the in-memory SegmentInfos have been
+ /// modified to point to those files.
+ ///
+ /// This may or may not be a commit (segments_N may or may
+ /// not have been written).
+ ///
+ /// We simply incref the files referenced by the new
+ /// SegmentInfos and decref the files we had previously
+ /// seen (if any).
+ ///
+ /// If this is a commit, we also call the policy to give it
+ /// a chance to remove other commits. If any commits are
+ /// removed, we decref their files as well.
+ /// </summary>
+ public void Checkpoint(SegmentInfos segmentInfos, bool isCommit)
+ {
+
+ if (infoStream != null)
+ {
+ Message("now checkpoint \"" + segmentInfos.GetCurrentSegmentFileName() + "\" [" + segmentInfos.Count + " segments " + "; isCommit = " + isCommit + "]");
+ }
+
+ // Try again now to delete any previously un-deletable
+ // files (because they were in use, on Windows):
+ DeletePendingFiles();
+
+ // Incref the files:
+ IncRef(segmentInfos, isCommit);
+
+ if (isCommit)
+ {
+ // Append to our commits list:
+ commits.Add(new CommitPoint(this, commitsToDelete, directory, segmentInfos));
+
+ // Tell policy so it can remove commits:
+ policy.OnCommit(commits);
+
+ // Decref files for commits that were deleted by the policy:
+ DeleteCommits();
+ }
+ else
+ {
+
+ IList<string> docWriterFiles;
+ if (docWriter != null)
+ {
+ docWriterFiles = docWriter.OpenFiles();
+ if (docWriterFiles != null)
+ // We must incRef these files before decRef'ing
+ // last files to make sure we don't accidentally
+ // delete them:
+ IncRef(docWriterFiles);
+ }
+ else
+ docWriterFiles = null;
+
+ // DecRef old files from the last checkpoint, if any:
+ int size = lastFiles.Count;
+ if (size > 0)
+ {
+ for (int i = 0; i < size; i++)
+ DecRef(lastFiles[i]);
+ lastFiles.Clear();
+ }
+
+ // Save files so we can decr on next checkpoint/commit:
+ lastFiles.Add(segmentInfos.Files(directory, false));
+
+ if (docWriterFiles != null)
+ {
+ lastFiles.Add(docWriterFiles);
+ }
+ }
+ }
+
+ internal void IncRef(SegmentInfos segmentInfos, bool isCommit)
+ {
+ // If this is a commit point, also incRef the
+ // segments_N file:
+ foreach(string fileName in segmentInfos.Files(directory, isCommit))
+ {
+ IncRef(fileName);
+ }
+ }
+
+ internal void IncRef(ICollection<string> files)
+ {
+ foreach(string file in files)
+ {
+ IncRef(file);
+ }
+ }
+
+ internal void IncRef(string fileName)
+ {
+ RefCount rc = GetRefCount(fileName);
+ if (infoStream != null && VERBOSE_REF_COUNTS)
+ {
+ Message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
+ }
+ rc.IncRef();
+ }
+
+ internal void DecRef(ICollection<string> files)
+ {
+ foreach(string file in files)
+ {
+ DecRef(file);
+ }
+ }
+
+ internal void DecRef(System.String fileName)
+ {
+ RefCount rc = GetRefCount(fileName);
+ if (infoStream != null && VERBOSE_REF_COUNTS)
+ {
+ Message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
+ }
+ if (0 == rc.DecRef())
+ {
+ // This file is no longer referenced by any past
+ // commit points nor by the in-memory SegmentInfos:
+ DeleteFile(fileName);
+ refCounts.Remove(fileName);
+
+ if (synced != null) {
+ lock(synced)
+ {
+ synced.Remove(fileName);
+ }
+ }
+ }
+ }
+
+ internal void DecRef(SegmentInfos segmentInfos)
+ {
+ foreach(string file in segmentInfos.Files(directory, false))
+ {
+ DecRef(file);
+ }
+ }
+
+ public bool Exists(String fileName)
+ {
+ if (!refCounts.ContainsKey(fileName))
+ {
+ return false;
+ }
+ else
+ {
+ return GetRefCount(fileName).count > 0;
+ }
+ }
+
+ private RefCount GetRefCount(System.String fileName)
+ {
+ RefCount rc;
+ if (!refCounts.ContainsKey(fileName))
+ {
+ rc = new RefCount(fileName);
+ refCounts[fileName] = rc;
+ }
+ else
+ {
+ rc = refCounts[fileName];
+ }
+ return rc;
+ }
+
+ internal void DeleteFiles(System.Collections.Generic.IList<string> files)
+ {
+ foreach(string file in files)
+ DeleteFile(file);
+ }
+
+ /// <summary>Deletes the specified files, but only if they are new
+ /// (have not yet been incref'd).
+ /// </summary>
+ internal void DeleteNewFiles(System.Collections.Generic.ICollection<string> files)
+ {
+ foreach(string fileName in files)
+ {
+ if (!refCounts.ContainsKey(fileName))
+ {
+ if (infoStream != null)
+ {
+ Message("delete new file \"" + fileName + "\"");
+ }
+ DeleteFile(fileName);
+ }
+ }
+ }
+
+ internal void DeleteFile(System.String fileName)
+ {
+ try
+ {
+ if (infoStream != null)
+ {
+ Message("delete \"" + fileName + "\"");
+ }
+ directory.DeleteFile(fileName);
+ }
+ catch (System.IO.IOException e)
+ {
+ // if delete fails
+ if (directory.FileExists(fileName))
+ {
+
+ // Some operating systems (e.g. Windows) don't
+ // permit a file to be deleted while it is opened
+ // for read (e.g. by another process or thread). So
+ // we assume that when a delete fails it is because
+ // the file is open in another process, and queue
+ // the file for subsequent deletion.
+
+ if (infoStream != null)
+ {
+ Message("IndexFileDeleter: unable to remove file \"" + fileName + "\": " + e.ToString() + "; Will re-try later.");
+ }
+ if (deletable == null)
+ {
+ deletable = new List<string>();
+ }
+ deletable.Add(fileName); // add to deletable
+ }
+ }
+ }
+
+ /// <summary> Tracks the reference count for a single index file:</summary>
+ sealed private class RefCount
+ {
+
+ // fileName used only for better assert error messages
+ internal System.String fileName;
+ internal bool initDone;
+ internal RefCount(System.String fileName)
+ {
+ this.fileName = fileName;
+ }
+
+ internal int count;
+
+ public int IncRef()
+ {
+ if (!initDone)
+ {
+ initDone = true;
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert(count > 0, "RefCount is 0 pre-increment for file " + fileName);
+ }
+ return ++count;
+ }
+
+ public int DecRef()
+ {
+ System.Diagnostics.Debug.Assert(count > 0, "RefCount is 0 pre-decrement for file " + fileName);
+ return --count;
+ }
+ }
+
+ /// <summary> Holds details for each commit point. This class is
+ /// also passed to the deletion policy. Note: this class
+ /// has a natural ordering that is inconsistent with
+ /// equals.
+ /// </summary>
+
+ sealed private class CommitPoint:IndexCommit, System.IComparable<CommitPoint>
+ {
+ private void InitBlock(IndexFileDeleter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private IndexFileDeleter enclosingInstance;
+ public IndexFileDeleter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal long gen;
+ internal ICollection<string> files;
+ internal string segmentsFileName;
+ internal bool deleted;
+ internal Directory directory;
+ internal ICollection<CommitPoint> commitsToDelete;
+ internal long version;
+ internal long generation;
+ internal bool isOptimized;
+ internal IDictionary<string, string> userData;
+
+ public CommitPoint(IndexFileDeleter enclosingInstance, ICollection<CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos)
+ {
+ InitBlock(enclosingInstance);
+ this.directory = directory;
+ this.commitsToDelete = commitsToDelete;
+ userData = segmentInfos.UserData;
+ segmentsFileName = segmentInfos.GetCurrentSegmentFileName();
+ version = segmentInfos.Version;
+ generation = segmentInfos.Generation;
+ files = segmentInfos.Files(directory, true);
+ gen = segmentInfos.Generation;
+ isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions();
+
+ System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory));
+ }
+
+ public override string ToString()
+ {
+ return "IndexFileDeleter.CommitPoint(" + segmentsFileName + ")";
+ }
+
+ public override bool IsOptimized
+ {
+ get { return isOptimized; }
+ }
+
+ public override string SegmentsFileName
+ {
+ get { return segmentsFileName; }
+ }
+
+ public override ICollection<string> FileNames
+ {
+ get { return files; }
+ }
+
+ public override Directory Directory
+ {
+ get { return directory; }
+ }
+
+ public override long Version
+ {
+ get { return version; }
+ }
+
+ public override long Generation
+ {
+ get { return generation; }
+ }
+
+ public override IDictionary<string, string> UserData
+ {
+ get { return userData; }
+ }
+
+ /// <summary> Called only be the deletion policy, to remove this
+ /// commit point from the index.
+ /// </summary>
+ public override void Delete()
+ {
+ if (!deleted)
+ {
+ deleted = true;
+ Enclosing_Instance.commitsToDelete.Add(this);
+ }
+ }
+
+ public override bool IsDeleted
+ {
+ get { return deleted; }
+ }
+
+ public int CompareTo(CommitPoint commit)
+ {
+ if (gen < commit.gen)
+ {
+ return - 1;
+ }
+ else if (gen > commit.gen)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IndexFileNameFilter.cs b/src/core/Index/IndexFileNameFilter.cs
new file mode 100644
index 0000000..474381f
--- /dev/null
+++ b/src/core/Index/IndexFileNameFilter.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Filename filter that accept filenames and extensions only created by Lucene. </summary>
+ public class IndexFileNameFilter
+ {
+
+ private static IndexFileNameFilter singleton = new IndexFileNameFilter();
+ private HashSet<String> extensions;
+ private HashSet<String> extensionsInCFS;
+
+ // Prevent instantiation.
+ private IndexFileNameFilter()
+ {
+ extensions = new HashSet<String>();
+ for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
+ {
+ extensions.Add(IndexFileNames.INDEX_EXTENSIONS[i]);
+ }
+ extensionsInCFS = new HashSet<String>();
+ for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.Length; i++)
+ {
+ extensionsInCFS.Add(IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i]);
+ }
+ }
+
+ /* (non-Javadoc)
+ * <see cref="java.io.FilenameFilter.accept(java.io.File, java.lang.String)"/>
+ */
+ public virtual bool Accept(System.IO.FileInfo dir, System.String name)
+ {
+ int i = name.LastIndexOf((System.Char) '.');
+ if (i != - 1)
+ {
+ System.String extension = name.Substring(1 + i);
+ if (extensions.Contains(extension))
+ {
+ return true;
+ }
+ else if (extension.StartsWith("f") && (new System.Text.RegularExpressions.Regex("f\\d+")).Match(extension).Success)
+ {
+ return true;
+ }
+ else if (extension.StartsWith("s") && (new System.Text.RegularExpressions.Regex("s\\d+")).Match(extension).Success)
+ {
+ return true;
+ }
+ }
+ else
+ {
+ if (name.Equals(IndexFileNames.DELETABLE))
+ return true;
+ else if (name.StartsWith(IndexFileNames.SEGMENTS))
+ return true;
+ }
+ return false;
+ }
+
+ /// <summary> Returns true if this is a file that would be contained
+ /// in a CFS file. This function should only be called on
+ /// files that pass the above "accept" (ie, are already
+ /// known to be a Lucene index file).
+ /// </summary>
+ public virtual bool IsCFSFile(System.String name)
+ {
+ int i = name.LastIndexOf((System.Char) '.');
+ if (i != - 1)
+ {
+ System.String extension = name.Substring(1 + i);
+ if (extensionsInCFS.Contains(extension))
+ {
+ return true;
+ }
+ if (extension.StartsWith("f") && (new System.Text.RegularExpressions.Regex("f\\d+")).Match(extension).Success)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static IndexFileNameFilter Filter
+ {
+ get { return singleton; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IndexFileNames.cs b/src/core/Index/IndexFileNames.cs
new file mode 100644
index 0000000..ef50119
--- /dev/null
+++ b/src/core/Index/IndexFileNames.cs
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Useful constants representing filenames and extensions used by lucene</summary>
+ public sealed class IndexFileNames
+ {
+
+ /// <summary>Name of the index segment file </summary>
+ public /*internal*/ const System.String SEGMENTS = "segments";
+
+ /// <summary>Name of the generation reference file name </summary>
+ public /*internal*/ const System.String SEGMENTS_GEN = "segments.gen";
+
+ /// <summary>Name of the index deletable file (only used in
+ /// pre-lockless indices)
+ /// </summary>
+ public /*internal*/ const System.String DELETABLE = "deletable";
+
+ /// <summary>Extension of norms file </summary>
+ public /*internal*/ const System.String NORMS_EXTENSION = "nrm";
+
+ /// <summary>Extension of freq postings file </summary>
+ public /*internal*/ const System.String FREQ_EXTENSION = "frq";
+
+ /// <summary>Extension of prox postings file </summary>
+ public /*internal*/ const System.String PROX_EXTENSION = "prx";
+
+ /// <summary>Extension of terms file </summary>
+ public /*internal*/ const System.String TERMS_EXTENSION = "tis";
+
+ /// <summary>Extension of terms index file </summary>
+ public /*internal*/ const System.String TERMS_INDEX_EXTENSION = "tii";
+
+ /// <summary>Extension of stored fields index file </summary>
+ public /*internal*/ const System.String FIELDS_INDEX_EXTENSION = "fdx";
+
+ /// <summary>Extension of stored fields file </summary>
+ public /*internal*/ const System.String FIELDS_EXTENSION = "fdt";
+
+ /// <summary>Extension of vectors fields file </summary>
+ public /*internal*/ const System.String VECTORS_FIELDS_EXTENSION = "tvf";
+
+ /// <summary>Extension of vectors documents file </summary>
+ public /*internal*/ const System.String VECTORS_DOCUMENTS_EXTENSION = "tvd";
+
+ /// <summary>Extension of vectors index file </summary>
+ public /*internal*/ const System.String VECTORS_INDEX_EXTENSION = "tvx";
+
+ /// <summary>Extension of compound file </summary>
+ public /*internal*/ const System.String COMPOUND_FILE_EXTENSION = "cfs";
+
+ /// <summary>Extension of compound file for doc store files</summary>
+ public /*internal*/ const System.String COMPOUND_FILE_STORE_EXTENSION = "cfx";
+
+ /// <summary>Extension of deletes </summary>
+ internal const System.String DELETES_EXTENSION = "del";
+
+ /// <summary>Extension of field infos </summary>
+ public /*internal*/ const System.String FIELD_INFOS_EXTENSION = "fnm";
+
+ /// <summary>Extension of plain norms </summary>
+ public /*internal*/ const System.String PLAIN_NORMS_EXTENSION = "f";
+
+ /// <summary>Extension of separate norms </summary>
+ public /*internal*/ const System.String SEPARATE_NORMS_EXTENSION = "s";
+
+ /// <summary>Extension of gen file </summary>
+ public /*internal*/ const System.String GEN_EXTENSION = "gen";
+
+ /// <summary> This array contains all filename extensions used by
+ /// Lucene's index files, with two exceptions, namely the
+ /// extension made up from <c>.f</c> + a number and
+ /// from <c>.s</c> + a number. Also note that
+ /// Lucene's <c>segments_N</c> files do not have any
+ /// filename extension.
+ /// </summary>
+ public /*internal*/ static readonly System.String[] INDEX_EXTENSIONS = new System.String[]{COMPOUND_FILE_EXTENSION, FIELD_INFOS_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION, TERMS_INDEX_EXTENSION, TERMS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, DELETES_EXTENSION, VECTORS_INDEX_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, VECTORS_FIELDS_EXTENSION, GEN_EXTENSION, NORMS_EXTENSION, COMPOUND_FILE_STORE_EXTENSION};
+
+ /// <summary>File extensions that are added to a compound file
+ /// (same as above, minus "del", "gen", "cfs").
+ /// </summary>
+ public /*internal*/ static readonly System.String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new System.String[]{FIELD_INFOS_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION, TERMS_INDEX_EXTENSION, TERMS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, VECTORS_INDEX_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, VECTORS_FIELDS_EXTENSION, NORMS_EXTENSION};
+
+ public /*internal*/ static readonly System.String[] STORE_INDEX_EXTENSIONS = new System.String[]{VECTORS_INDEX_EXTENSION, VECTORS_FIELDS_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION};
+
+ public /*internal*/ static readonly System.String[] NON_STORE_INDEX_EXTENSIONS = new System.String[]{FIELD_INFOS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, TERMS_EXTENSION, TERMS_INDEX_EXTENSION, NORMS_EXTENSION};
+
+ /// <summary>File extensions of old-style index files </summary>
+ public /*internal*/ static readonly System.String[] COMPOUND_EXTENSIONS = new System.String[]{FIELD_INFOS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION, TERMS_INDEX_EXTENSION, TERMS_EXTENSION};
+
+ /// <summary>File extensions for term vector support </summary>
+ public /*internal*/ static readonly System.String[] VECTOR_EXTENSIONS = new System.String[]{VECTORS_INDEX_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, VECTORS_FIELDS_EXTENSION};
+
+ /// <summary> Computes the full file name from base, extension and
+ /// generation. If the generation is -1, the file name is
+ /// null. If it's 0, the file name is
+ /// If it's > 0, the file name is
+ ///
+ /// </summary>
+ /// <param name="base_Renamed">-- main part of the file name
+ /// </param>
+ /// <param name="extension">-- extension of the filename (including .)
+ /// </param>
+ /// <param name="gen">-- generation
+ /// </param>
+ public /*internal*/ static System.String FileNameFromGeneration(System.String base_Renamed, System.String extension, long gen)
+ {
+ if (gen == SegmentInfo.NO)
+ {
+ return null;
+ }
+ else if (gen == SegmentInfo.WITHOUT_GEN)
+ {
+ return base_Renamed + extension;
+ }
+ else
+ {
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ return base_Renamed + "_" + Number.ToString(gen) + extension;
+#else
+ return base_Renamed + "_" + System.Convert.ToString(gen, 16) + extension;
+#endif
+ }
+ }
+
+ /// <summary> Returns true if the provided filename is one of the doc
+ /// store files (ends with an extension in
+ /// STORE_INDEX_EXTENSIONS).
+ /// </summary>
+ internal static bool IsDocStoreFile(System.String fileName)
+ {
+ if (fileName.EndsWith(COMPOUND_FILE_STORE_EXTENSION))
+ return true;
+ for (int i = 0; i < STORE_INDEX_EXTENSIONS.Length; i++)
+ if (fileName.EndsWith(STORE_INDEX_EXTENSIONS[i]))
+ return true;
+ return false;
+ }
+
+ internal static System.String SegmentFileName(System.String segmentName, System.String ext)
+ {
+ return segmentName + "." + ext;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IndexReader.cs b/src/core/Index/IndexReader.cs
new file mode 100644
index 0000000..5c3bd9b
--- /dev/null
+++ b/src/core/Index/IndexReader.cs
@@ -0,0 +1,1374 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Documents;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using Lucene.Net.Store;
+using Similarity = Lucene.Net.Search.Similarity;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>IndexReader is an abstract class, providing an interface for accessing an
+ /// index. Search of an index is done entirely through this abstract interface,
+ /// so that any subclass which implements it is searchable.
+ /// <p/> Concrete subclasses of IndexReader are usually constructed with a call to
+ /// one of the static <c>open()</c> methods, e.g. <see cref="Open(Lucene.Net.Store.Directory, bool)" />
+ ///.
+ /// <p/> For efficiency, in this API documents are often referred to via
+ /// <i>document numbers</i>, non-negative integers which each name a unique
+ /// document in the index. These document numbers are ephemeral--they may change
+ /// as documents are added to and deleted from an index. Clients should thus not
+ /// rely on a given document having the same number between sessions.
+ /// <p/> An IndexReader can be opened on a directory for which an IndexWriter is
+ /// opened already, but it cannot be used to delete documents from the index then.
+ /// <p/>
+ /// <b>NOTE</b>: for backwards API compatibility, several methods are not listed
+ /// as abstract, but have no useful implementations in this base class and
+ /// instead always throw UnsupportedOperationException. Subclasses are
+ /// strongly encouraged to override these methods, but in many cases may not
+ /// need to.
+ /// <p/>
+ /// <p/>
+ /// <b>NOTE</b>: as of 2.4, it's possible to open a read-only
+ /// IndexReader using the static open methods that accepts the
+ /// boolean readOnly parameter. Such a reader has better
+ /// better concurrency as it's not necessary to synchronize on the
+ /// isDeleted method. You must explicitly specify false
+ /// if you want to make changes with the resulting IndexReader.
+ /// <p/>
+ /// <a name="thread-safety"></a><p/><b>NOTE</b>: <see cref="IndexReader" />
+ /// instances are completely thread
+ /// safe, meaning multiple threads can call any of its methods,
+ /// concurrently. If your application requires external
+ /// synchronization, you should <b>not</b> synchronize on the
+ /// <c>IndexReader</c> instance; use your own
+ /// (non-Lucene) objects instead.
+ /// </summary>
+ public abstract class IndexReader : System.ICloneable, System.IDisposable
+ {
+ private class AnonymousClassFindSegmentsFile : SegmentInfos.FindSegmentsFile
+ {
+ private void InitBlock(Lucene.Net.Store.Directory directory2)
+ {
+ this.directory2 = directory2;
+ }
+ private Lucene.Net.Store.Directory directory2;
+ internal AnonymousClassFindSegmentsFile(Lucene.Net.Store.Directory directory2, Lucene.Net.Store.Directory Param1):base(Param1)
+ {
+ InitBlock(directory2);
+ }
+ public override System.Object DoBody(System.String segmentFileName)
+ {
+ return (long) directory2.FileModified(segmentFileName);
+ }
+ }
+
+ /// <summary> Constants describing field properties, for example used for
+ /// <see cref="IndexReader.GetFieldNames(FieldOption)" />.
+ /// </summary>
+ public sealed class FieldOption
+ {
+ private readonly System.String option;
+ internal FieldOption()
+ {
+ }
+ internal FieldOption(System.String option)
+ {
+ this.option = option;
+ }
+ public override System.String ToString()
+ {
+ return this.option;
+ }
+ /// <summary>All fields </summary>
+ public static readonly FieldOption ALL = new FieldOption("ALL");
+ /// <summary>All indexed fields </summary>
+ public static readonly FieldOption INDEXED = new FieldOption("INDEXED");
+ /// <summary>All fields that store payloads </summary>
+ public static readonly FieldOption STORES_PAYLOADS = new FieldOption("STORES_PAYLOADS");
+ /// <summary>All fields that omit tf </summary>
+ public static readonly FieldOption OMIT_TERM_FREQ_AND_POSITIONS = new FieldOption("OMIT_TERM_FREQ_AND_POSITIONS");
+ /// <summary>All fields which are not indexed </summary>
+ public static readonly FieldOption UNINDEXED = new FieldOption("UNINDEXED");
+ /// <summary>All fields which are indexed with termvectors enabled </summary>
+ public static readonly FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption("INDEXED_WITH_TERMVECTOR");
+ /// <summary>All fields which are indexed but don't have termvectors enabled </summary>
+ public static readonly FieldOption INDEXED_NO_TERMVECTOR = new FieldOption("INDEXED_NO_TERMVECTOR");
+ /// <summary>All fields with termvectors enabled. Please note that only standard termvector fields are returned </summary>
+ public static readonly FieldOption TERMVECTOR = new FieldOption("TERMVECTOR");
+ /// <summary>All fields with termvectors with position values enabled </summary>
+ public static readonly FieldOption TERMVECTOR_WITH_POSITION = new FieldOption("TERMVECTOR_WITH_POSITION");
+ /// <summary>All fields with termvectors with offset values enabled </summary>
+ public static readonly FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption("TERMVECTOR_WITH_OFFSET");
+ /// <summary>All fields with termvectors with offset values and position values enabled </summary>
+ public static readonly FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption("TERMVECTOR_WITH_POSITION_OFFSET");
+ }
+
+ private bool closed;
+ protected internal bool hasChanges;
+
+ private int refCount;
+
+ protected internal static int DEFAULT_TERMS_INDEX_DIVISOR = 1;
+
+ /// <summary>Expert: returns the current refCount for this reader </summary>
+ public virtual int RefCount
+ {
+ get
+ {
+ lock (this)
+ {
+ return refCount;
+ }
+ }
+ }
+
+ /// <summary> Expert: increments the refCount of this IndexReader
+ /// instance. RefCounts are used to determine when a
+ /// reader can be closed safely, i.e. as soon as there are
+ /// no more references. Be sure to always call a
+ /// corresponding <see cref="DecRef" />, in a finally clause;
+ /// otherwise the reader may never be closed. Note that
+ /// <see cref="Close" /> simply calls decRef(), which means that
+ /// the IndexReader will not really be closed until <see cref="DecRef" />
+ /// has been called for all outstanding
+ /// references.
+ ///
+ /// </summary>
+ /// <seealso cref="DecRef">
+ /// </seealso>
+ public virtual void IncRef()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0);
+ EnsureOpen();
+ refCount++;
+ }
+ }
+
+ /// <summary> Expert: decreases the refCount of this IndexReader
+ /// instance. If the refCount drops to 0, then pending
+ /// changes (if any) are committed to the index and this
+ /// reader is closed.
+ ///
+ /// </summary>
+ /// <throws> IOException in case an IOException occurs in commit() or doClose() </throws>
+ /// <summary>
+ /// </summary>
+ /// <seealso cref="IncRef">
+ /// </seealso>
+ public virtual void DecRef()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0);
+ EnsureOpen();
+ if (refCount == 1)
+ {
+ Commit();
+ DoClose();
+ }
+ refCount--;
+ }
+ }
+
+ protected internal IndexReader()
+ {
+ refCount = 1;
+ }
+
+ /// <throws> AlreadyClosedException if this IndexReader is closed </throws>
+ protected internal void EnsureOpen()
+ {
+ if (refCount <= 0)
+ {
+ throw new AlreadyClosedException("this IndexReader is closed");
+ }
+ }
+
+ /// <summary>Returns an IndexReader reading the index in the given
+ /// Directory. You should pass readOnly=true, since it
+ /// gives much better concurrent performance, unless you
+ /// intend to do write operations (delete documents or
+ /// change norms) with the reader.
+ /// </summary>
+ /// <param name="directory">the index directory</param>
+ /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader</param>
+ /// <exception cref="CorruptIndexException">CorruptIndexException if the index is corrupt</exception>
+ /// <exception cref="System.IO.IOException">IOException if there is a low-level IO error</exception>
+ public static IndexReader Open(Directory directory, bool readOnly)
+ {
+ return Open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /// <summary>Expert: returns an IndexReader reading the index in the given
+ /// <see cref="IndexCommit" />. You should pass readOnly=true, since it
+ /// gives much better concurrent performance, unless you
+ /// intend to do write operations (delete documents or
+ /// change norms) with the reader.
+ /// </summary>
+ /// <param name="commit">the commit point to open
+ /// </param>
+ /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public static IndexReader Open(IndexCommit commit, bool readOnly)
+ {
+ return Open(commit.Directory, null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /// <summary>Expert: returns an IndexReader reading the index in
+ /// the given Directory, with a custom <see cref="IndexDeletionPolicy" />
+ ///. You should pass readOnly=true,
+ /// since it gives much better concurrent performance,
+ /// unless you intend to do write operations (delete
+ /// documents or change norms) with the reader.
+ /// </summary>
+ /// <param name="directory">the index directory
+ /// </param>
+ /// <param name="deletionPolicy">a custom deletion policy (only used
+ /// if you use this reader to perform deletes or to set
+ /// norms); see <see cref="IndexWriter" /> for details.
+ /// </param>
+ /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly)
+ {
+ return Open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /// <summary>Expert: returns an IndexReader reading the index in
+ /// the given Directory, with a custom <see cref="IndexDeletionPolicy" />
+ ///. You should pass readOnly=true,
+ /// since it gives much better concurrent performance,
+ /// unless you intend to do write operations (delete
+ /// documents or change norms) with the reader.
+ /// </summary>
+ /// <param name="directory">the index directory
+ /// </param>
+ /// <param name="deletionPolicy">a custom deletion policy (only used
+ /// if you use this reader to perform deletes or to set
+ /// norms); see <see cref="IndexWriter" /> for details.
+ /// </param>
+ /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader
+ /// </param>
+ /// <param name="termInfosIndexDivisor">Subsamples which indexed
+ /// terms are loaded into RAM. This has the same effect as <see>
+ /// <cref>IndexWriter.SetTermIndexInterval</cref>
+ /// </see> except that setting
+ /// must be done at indexing time while this setting can be
+ /// set per reader. When set to N, then one in every
+ /// N*termIndexInterval terms in the index is loaded into
+ /// memory. By setting this to a value > 1 you can reduce
+ /// memory usage, at the expense of higher latency when
+ /// loading a TermInfo. The default value is 1. Set this
+ /// to -1 to skip loading the terms index entirely.
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
+ {
+ return Open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor);
+ }
+
+ /// <summary>Expert: returns an IndexReader reading the index in
+ /// the given Directory, using a specific commit and with
+ /// a custom <see cref="IndexDeletionPolicy" />. You should pass
+ /// readOnly=true, since it gives much better concurrent
+ /// performance, unless you intend to do write operations
+ /// (delete documents or change norms) with the reader.
+ /// </summary>
+ /// <param name="commit">the specific <see cref="IndexCommit" /> to open;
+ /// see <see cref="IndexReader.ListCommits" /> to list all commits
+ /// in a directory
+ /// </param>
+ /// <param name="deletionPolicy">a custom deletion policy (only used
+ /// if you use this reader to perform deletes or to set
+ /// norms); see <see cref="IndexWriter" /> for details.
+ /// </param>
+ /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly)
+ {
+ return Open(commit.Directory, deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /// <summary>Expert: returns an IndexReader reading the index in
+ /// the given Directory, using a specific commit and with
+ /// a custom <see cref="IndexDeletionPolicy" />. You should pass
+ /// readOnly=true, since it gives much better concurrent
+ /// performance, unless you intend to do write operations
+ /// (delete documents or change norms) with the reader.
+ /// </summary>
+ /// <param name="commit">the specific <see cref="IndexCommit" /> to open;
+ /// see <see cref="IndexReader.ListCommits" /> to list all commits
+ /// in a directory
+ /// </param>
+ /// <param name="deletionPolicy">a custom deletion policy (only used
+ /// if you use this reader to perform deletes or to set
+ /// norms); see <see cref="IndexWriter" /> for details.
+ /// </param>
+ /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader
+ /// </param>
+ /// <param name="termInfosIndexDivisor">Subsambles which indexed
+ /// terms are loaded into RAM. This has the same effect as <see>
+ /// <cref>IndexWriter.SetTermIndexInterval</cref>
+ /// </see> except that setting
+ /// must be done at indexing time while this setting can be
+ /// set per reader. When set to N, then one in every
+ /// N*termIndexInterval terms in the index is loaded into
+ /// memory. By setting this to a value > 1 you can reduce
+ /// memory usage, at the expense of higher latency when
+ /// loading a TermInfo. The default value is 1. Set this
+ /// to -1 to skip loading the terms index entirely.
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
+ {
+ return Open(commit.Directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
+ }
+
+ private static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor)
+ {
+ return DirectoryReader.Open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
+ }
+
+ /// <summary> Refreshes an IndexReader if the index has changed since this instance
+ /// was (re)opened.
+ /// <p/>
+ /// Opening an IndexReader is an expensive operation. This method can be used
+ /// to refresh an existing IndexReader to reduce these costs. This method
+ /// tries to only load segments that have changed or were created after the
+ /// IndexReader was (re)opened.
+ /// <p/>
+ /// If the index has not changed since this instance was (re)opened, then this
+ /// call is a NOOP and returns this instance. Otherwise, a new instance is
+ /// returned. The old instance is <b>not</b> closed and remains usable.<br/>
+ /// <p/>
+ /// If the reader is reopened, even though they share
+ /// resources internally, it's safe to make changes
+ /// (deletions, norms) with the new reader. All shared
+ /// mutable state obeys "copy on write" semantics to ensure
+ /// the changes are not seen by other readers.
+ /// <p/>
+ /// You can determine whether a reader was actually reopened by comparing the
+ /// old instance with the instance returned by this method:
+ /// <code>
+ /// IndexReader reader = ...
+ /// ...
+ /// IndexReader newReader = r.reopen();
+ /// if (newReader != reader) {
+ /// ... // reader was reopened
+ /// reader.close();
+ /// }
+ /// reader = newReader;
+ /// ...
+ /// </code>
+ ///
+ /// Be sure to synchronize that code so that other threads,
+ /// if present, can never use reader after it has been
+ /// closed and before it's switched to newReader.
+ ///
+ /// <p/><b>NOTE</b>: If this reader is a near real-time
+ /// reader (obtained from <see cref="IndexWriter.GetReader()" />,
+ /// reopen() will simply call writer.getReader() again for
+ /// you, though this may change in the future.
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual IndexReader Reopen()
+ {
+ lock (this)
+ {
+ throw new NotSupportedException("This reader does not support reopen().");
+ }
+ }
+
+
+ /// <summary>Just like <see cref="Reopen()" />, except you can change the
+ /// readOnly of the original reader. If the index is
+ /// unchanged but readOnly is different then a new reader
+ /// will be returned.
+ /// </summary>
+ public virtual IndexReader Reopen(bool openReadOnly)
+ {
+ lock (this)
+ {
+ throw new NotSupportedException("This reader does not support reopen().");
+ }
+ }
+
+ /// <summary>Expert: reopen this reader on a specific commit point.
+ /// This always returns a readOnly reader. If the
+ /// specified commit point matches what this reader is
+ /// already on, and this reader is already readOnly, then
+ /// this same instance is returned; if it is not already
+ /// readOnly, a readOnly clone is returned.
+ /// </summary>
+ public virtual IndexReader Reopen(IndexCommit commit)
+ {
+ lock (this)
+ {
+ throw new NotSupportedException("This reader does not support reopen(IndexCommit).");
+ }
+ }
+
+ /// <summary> Efficiently clones the IndexReader (sharing most
+ /// internal state).
+ /// <p/>
+ /// On cloning a reader with pending changes (deletions,
+ /// norms), the original reader transfers its write lock to
+ /// the cloned reader. This means only the cloned reader
+ /// may make further changes to the index, and commit the
+ /// changes to the index on close, but the old reader still
+ /// reflects all changes made up until it was cloned.
+ /// <p/>
+ /// Like <see cref="Reopen()" />, it's safe to make changes to
+ /// either the original or the cloned reader: all shared
+ /// mutable state obeys "copy on write" semantics to ensure
+ /// the changes are not seen by other readers.
+ /// <p/>
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual System.Object Clone()
+ {
+ throw new System.NotSupportedException("This reader does not implement clone()");
+ }
+
+ /// <summary> Clones the IndexReader and optionally changes readOnly. A readOnly
+ /// reader cannot open a writeable reader.
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual IndexReader Clone(bool openReadOnly)
+ {
+ lock (this)
+ {
+ throw new System.NotSupportedException("This reader does not implement clone()");
+ }
+ }
+
+ /// <summary> Returns the directory associated with this index. The Default
+ /// implementation returns the directory specified by subclasses when
+ /// delegating to the IndexReader(Directory) constructor, or throws an
+ /// UnsupportedOperationException if one was not specified.
+ /// </summary>
+ /// <throws> UnsupportedOperationException if no directory </throws>
+ public virtual Directory Directory()
+ {
+ EnsureOpen();
+ throw new NotSupportedException("This reader does not support this method.");
+ }
+
+ /// <summary> Returns the time the index in the named directory was last modified.
+ /// Do not use this to check whether the reader is still up-to-date, use
+ /// <see cref="IsCurrent()" /> instead.
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public static long LastModified(Directory directory2)
+ {
+ return (long) ((System.Int64) new AnonymousClassFindSegmentsFile(directory2, directory2).Run());
+ }
+
+ /// <summary> Reads version number from segments files. The version number is
+ /// initialized with a timestamp and then increased by one for each change of
+ /// the index.
+ ///
+ /// </summary>
+ /// <param name="directory">where the index resides.
+ /// </param>
+ /// <returns> version number.
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public static long GetCurrentVersion(Directory directory)
+ {
+ return SegmentInfos.ReadCurrentVersion(directory);
+ }
+
+ /// <summary> Reads commitUserData, previously passed to
+ /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />,
+ /// from current index segments file. This will return null if
+ /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />
+ /// has never been called for this index.
+ /// </summary>
+ /// <param name="directory">where the index resides.
+ /// </param>
+ /// <returns> commit userData.
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ /// <summary>
+ /// </summary>
+ /// <seealso cref="GetCommitUserData(Store.Directory)">
+ /// </seealso>
+ public static System.Collections.Generic.IDictionary<string, string> GetCommitUserData(Directory directory)
+ {
+ return SegmentInfos.ReadCurrentUserData(directory);
+ }
+
+ /// <summary> Version number when this IndexReader was opened. Not implemented in the
+ /// IndexReader base class.
+ ///
+ /// <p/>
+ /// If this reader is based on a Directory (ie, was created by calling
+ /// <see cref="Open(Lucene.Net.Store.Directory, bool)" />, or <see cref="Reopen()" />
+ /// on a reader based on a Directory), then
+ /// this method returns the version recorded in the commit that the reader
+ /// opened. This version is advanced every time <see cref="IndexWriter.Commit()" /> is
+ /// called.
+ /// <p/>
+ ///
+ /// <p/>
+ /// If instead this reader is a near real-time reader (ie, obtained by a call
+ /// to <see cref="IndexWriter.GetReader()" />, or by calling <see cref="Reopen()" /> on a near
+ /// real-time reader), then this method returns the version of the last
+ /// commit done by the writer. Note that even as further changes are made
+ /// with the writer, the version will not changed until a commit is
+ /// completed. Thus, you should not rely on this method to determine when a
+ /// near real-time reader should be opened. Use <see cref="IsCurrent" /> instead.
+ /// <p/>
+ ///
+ /// </summary>
+ /// <throws> UnsupportedOperationException </throws>
+ /// <summary> unless overridden in subclass
+ /// </summary>
+ public virtual long Version
+ {
+ get { throw new System.NotSupportedException("This reader does not support this method."); }
+ }
+
+ /// <summary> Retrieve the String userData optionally passed to
+ /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />.
+ /// This will return null if
+ /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />
+ /// has never been called for this index.
+ /// </summary>
+ /// <seealso cref="GetCommitUserData(Store.Directory)">
+ /// </seealso>
+ public virtual IDictionary<string, string> CommitUserData
+ {
+ get { throw new System.NotSupportedException("This reader does not support this method."); }
+ }
+
+ /// <summary> Check whether any new changes have occurred to the index since this
+ /// reader was opened.
+ ///
+ /// <p/>
+ /// If this reader is based on a Directory (ie, was created by calling
+ /// <see>
+ /// <cref>Open(Store.Directory)</cref>
+ /// </see> , or <see cref="Reopen()" /> on a reader based on a Directory), then
+ /// this method checks if any further commits (see <see cref="IndexWriter.Commit()" />
+ /// have occurred in that directory).
+ /// <p/>
+ ///
+ /// <p/>
+ /// If instead this reader is a near real-time reader (ie, obtained by a call
+ /// to <see cref="IndexWriter.GetReader()" />, or by calling <see cref="Reopen()" /> on a near
+ /// real-time reader), then this method checks if either a new commmit has
+ /// occurred, or any new uncommitted changes have taken place via the writer.
+ /// Note that even if the writer has only performed merging, this method will
+ /// still return false.
+ /// <p/>
+ ///
+ /// <p/>
+ /// In any event, if this returns false, you should call <see cref="Reopen()" /> to
+ /// get a new reader that sees the changes.
+ /// <p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ /// <throws> UnsupportedOperationException unless overridden in subclass </throws>
+ public virtual bool IsCurrent()
+ {
+ throw new NotSupportedException("This reader does not support this method.");
+ }
+
+ /// <summary> Checks is the index is optimized (if it has a single segment and
+ /// no deletions). Not implemented in the IndexReader base class.
+ /// </summary>
+ /// <returns> &amp;lt;c&amp;gt;true&amp;lt;/c&amp;gt; if the index is optimized; &amp;lt;c&amp;gt;false&amp;lt;/c&amp;gt; otherwise </returns>
+ /// <throws> UnsupportedOperationException unless overridden in subclass </throws>
+ public virtual bool IsOptimized()
+ {
+ throw new NotSupportedException("This reader does not support this method.");
+ }
+
+ /// <summary> Return an array of term frequency vectors for the specified document.
+ /// The array contains a vector for each vectorized field in the document.
+ /// Each vector contains terms and frequencies for all terms in a given vectorized field.
+ /// If no such fields existed, the method returns null. The term vectors that are
+ /// returned may either be of type <see cref="ITermFreqVector" />
+ /// or of type <see cref="TermPositionVector" /> if
+ /// positions or offsets have been stored.
+ ///
+ /// </summary>
+ /// <param name="docNumber">document for which term frequency vectors are returned
+ /// </param>
+ /// <returns> array of term frequency vectors. May be null if no term vectors have been
+ /// stored for the specified document.
+ /// </returns>
+ /// <throws> IOException if index cannot be accessed </throws>
+ /// <seealso cref="Lucene.Net.Documents.Field.TermVector">
+ /// </seealso>
+ abstract public ITermFreqVector[] GetTermFreqVectors(int docNumber);
+
+
+ /// <summary> Return a term frequency vector for the specified document and field. The
+ /// returned vector contains terms and frequencies for the terms in
+ /// the specified field of this document, if the field had the storeTermVector
+ /// flag set. If termvectors had been stored with positions or offsets, a
+ /// <see cref="TermPositionVector" /> is returned.
+ ///
+ /// </summary>
+ /// <param name="docNumber">document for which the term frequency vector is returned
+ /// </param>
+ /// <param name="field">field for which the term frequency vector is returned.
+ /// </param>
+ /// <returns> term frequency vector May be null if field does not exist in the specified
+ /// document or term vector was not stored.
+ /// </returns>
+ /// <throws> IOException if index cannot be accessed </throws>
+ /// <seealso cref="Lucene.Net.Documents.Field.TermVector">
+ /// </seealso>
+ abstract public ITermFreqVector GetTermFreqVector(int docNumber, String field);
+
+ /// <summary> Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
+ /// the <see cref="ITermFreqVector" />.
+ /// </summary>
+ /// <param name="docNumber">The number of the document to load the vector for
+ /// </param>
+ /// <param name="field">The name of the field to load
+ /// </param>
+ /// <param name="mapper">The <see cref="TermVectorMapper" /> to process the vector. Must not be null
+ /// </param>
+ /// <throws> IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. </throws>
+ /// <summary>
+ /// </summary>
+ abstract public void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper);
+
+ /// <summary> Map all the term vectors for all fields in a Document</summary>
+ /// <param name="docNumber">The number of the document to load the vector for
+ /// </param>
+ /// <param name="mapper">The <see cref="TermVectorMapper" /> to process the vector. Must not be null
+ /// </param>
+ /// <throws> IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. </throws>
+ abstract public void GetTermFreqVector(int docNumber, TermVectorMapper mapper);
+
+ /// <summary> Returns <c>true</c> if an index exists at the specified directory.
+ /// If the directory does not exist or if there is no index in it.
+ /// </summary>
+ /// <param name="directory">the directory to check for an index
+ /// </param>
+ /// <returns> <c>true</c> if an index exists; <c>false</c> otherwise
+ /// </returns>
+ /// <throws> IOException if there is a problem with accessing the index </throws>
+ public static bool IndexExists(Directory directory)
+ {
+ return SegmentInfos.GetCurrentSegmentGeneration(directory) != - 1;
+ }
+
+ /// <summary>Returns the number of documents in this index. </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public abstract int NumDocs();
+
+ /// <summary>Returns one greater than the largest possible document number.
+ /// This may be used to, e.g., determine how big to allocate an array which
+ /// will have an element for every document number in an index.
+ /// </summary>
+ public abstract int MaxDoc { get; }
+
+ /// <summary>Returns the number of deleted documents. </summary>
+ public virtual int NumDeletedDocs
+ {
+ get { return MaxDoc - NumDocs(); }
+ }
+
+ /// <summary> Returns the stored fields of the <c>n</c><sup>th</sup>
+ /// <c>Document</c> in this index.
+ /// <p/>
+ /// <b>NOTE:</b> for performance reasons, this method does not check if the
+ /// requested document is deleted, and therefore asking for a deleted document
+ /// may yield unspecified results. Usually this is not required, however you
+ /// can call <see cref="IsDeleted(int)" /> with the requested document ID to verify
+ /// the document is not deleted.
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual Document Document(int n)
+ {
+ EnsureOpen();
+ return Document(n, null);
+ }
+
+ /// <summary> Returns the stored fields of the <c>n</c><sup>th</sup>
+ /// <c>Document</c> in this index.
+ /// <p/>
+ /// <b>NOTE:</b> for performance reasons, this method does not check if the
+ /// requested document is deleted, and therefore asking for a deleted document
+ /// may yield unspecified results. Usually this is not required, however you
+ /// can call <see cref="IsDeleted(int)" /> with the requested document ID to verify
+ /// the document is not deleted.
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public Document this[int doc]
+ {
+ get { return Document(doc); }
+ }
+
+ /// <summary> Get the <see cref="Lucene.Net.Documents.Document" /> at the <c>n</c>
+ /// <sup>th</sup> position. The <see cref="FieldSelector" /> may be used to determine
+ /// what <see cref="Lucene.Net.Documents.Field" />s to load and how they should
+ /// be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying
+ /// <c>FieldsReader</c>) is closed before the lazy
+ /// <see cref="Lucene.Net.Documents.Field" /> is loaded an exception may be
+ /// thrown. If you want the value of a lazy
+ /// <see cref="Lucene.Net.Documents.Field" /> to be available after closing you
+ /// must explicitly load it or fetch the Document again with a new loader.
+ /// <p/>
+ /// <b>NOTE:</b> for performance reasons, this method does not check if the
+ /// requested document is deleted, and therefore asking for a deleted document
+ /// may yield unspecified results. Usually this is not required, however you
+ /// can call <see cref="IsDeleted(int)" /> with the requested document ID to verify
+ /// the document is not deleted.
+ ///
+ /// </summary>
+ /// <param name="n">Get the document at the <c>n</c><sup>th</sup> position
+ /// </param>
+ /// <param name="fieldSelector">The <see cref="FieldSelector" /> to use to determine what
+ /// Fields should be loaded on the Document. May be null, in which case
+ /// all Fields will be loaded.
+ /// </param>
+ /// <returns> The stored fields of the
+ /// <see cref="Lucene.Net.Documents.Document" /> at the nth position
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ /// <seealso cref="IFieldable">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Documents.FieldSelector">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Documents.SetBasedFieldSelector">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Documents.LoadFirstFieldSelector">
+ /// </seealso>
+ // TODO (1.5): When we convert to JDK 1.5 make this Set<String>
+ public abstract Document Document(int n, FieldSelector fieldSelector);
+
+ /// <summary>Returns true if document <i>n</i> has been deleted </summary>
+ public abstract bool IsDeleted(int n);
+
+ /// <summary>Returns true if any documents have been deleted </summary>
+ public abstract bool HasDeletions { get; }
+
+ /// <summary>Returns true if there are norms stored for this field. </summary>
+ public virtual bool HasNorms(System.String field)
+ {
+ // backward compatible implementation.
+ // SegmentReader has an efficient implementation.
+ EnsureOpen();
+ return Norms(field) != null;
+ }
+
+ /// <summary>
+ /// Returns the byte-encoded normalization factor for the named field of
+ /// every document. This is used by the search code to score documents.
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Documents.AbstractField.Boost" />
+ public abstract byte[] Norms(System.String field);
+
+ /// <summary>
+ /// Reads the byte-encoded normalization factor for the named field of every
+ /// document. This is used by the search code to score documents.
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Documents.AbstractField.Boost" />
+ public abstract void Norms(System.String field, byte[] bytes, int offset);
+
+ /// <summary>Expert: Resets the normalization factor for the named field of the named
+ /// document. The norm represents the product of the field's <see cref="IFieldable.Boost">boost</see>
+ /// and its <see cref="Similarity.LengthNorm(String,int)">length normalization</see>. Thus, to preserve the length normalization
+ /// values when resetting this, one should base the new value upon the old.
+ ///
+ /// <b>NOTE:</b> If this field does not store norms, then
+ /// this method call will silently do nothing.
+ /// </summary>
+ /// <seealso cref="Norms(String)" />
+ /// <seealso cref="Similarity.DecodeNorm(byte)" />
+ /// <exception cref="StaleReaderException">
+ /// If the index has changed since this reader was opened
+ /// </exception>
+ /// <exception cref="CorruptIndexException">
+ /// If the index is corrupt
+ /// </exception>
+ /// <exception cref="LockObtainFailedException">
+ /// If another writer has this index open (<c>write.lock</c> could not be obtained)
+ /// </exception>
+ /// <exception cref="System.IO.IOException">
+ /// If there is a low-level IO error
+ /// </exception>
+ public virtual void SetNorm(int doc, String field, byte value)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ AcquireWriteLock();
+ hasChanges = true;
+ DoSetNorm(doc, field, value);
+ }
+ }
+
+ /// <summary>Implements setNorm in subclass.</summary>
+ protected internal abstract void DoSetNorm(int doc, System.String field, byte value_Renamed);
+
+ /// <summary>
+ /// Expert: Resets the normalization factor for the named field of the named document.
+ /// </summary>
+ /// <seealso cref="Norms(String)" />
+ /// <seealso cref="Similarity.DecodeNorm(byte)" />
+ /// <exception cref="StaleReaderException">
+ /// If the index has changed since this reader was opened
+ /// </exception>
+ /// <exception cref="CorruptIndexException">
+ /// If the index is corrupt
+ /// </exception>
+ /// <exception cref="LockObtainFailedException">
+ /// If another writer has this index open (<c>write.lock</c> could not be obtained)
+ /// </exception>
+ /// <exception cref="System.IO.IOException">
+ /// If there is a low-level IO error
+ /// </exception>
+ public virtual void SetNorm(int doc, System.String field, float value)
+ {
+ EnsureOpen();
+ SetNorm(doc, field, Similarity.EncodeNorm(value));
+ }
+
+ /// <summary>Returns an enumeration of all the terms in the index. The
+ /// enumeration is ordered by Term.compareTo(). Each term is greater
+ /// than all that precede it in the enumeration. Note that after
+ /// calling terms(), <see cref="TermEnum.Next()" /> must be called
+ /// on the resulting enumeration before calling other methods such as
+ /// <see cref="TermEnum.Term" />.
+ /// </summary>
+ /// <exception cref="System.IO.IOException">
+ /// If there is a low-level IO error
+ /// </exception>
+ public abstract TermEnum Terms();
+
+ /// <summary>Returns an enumeration of all terms starting at a given term. If
+ /// the given term does not exist, the enumeration is positioned at the
+ /// first term greater than the supplied term. The enumeration is
+ /// ordered by Term.compareTo(). Each term is greater than all that
+ /// precede it in the enumeration.
+ /// </summary>
+ /// <exception cref="System.IO.IOException">
+ /// If there is a low-level IO error
+ /// </exception>
+ public abstract TermEnum Terms(Term t);
+
+ /// <summary>Returns the number of documents containing the term <c>t</c>.</summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public abstract int DocFreq(Term t);
+
+ /// <summary>Returns an enumeration of all the documents which contain
+ /// <c>term</c>. For each document, the document number, the frequency of
+ /// the term in that document is also provided, for use in
+ /// search scoring. If term is null, then all non-deleted
+ /// docs are returned with freq=1.
+ /// Thus, this method implements the mapping:
+ /// <p/><list>
+ /// Term &#160;&#160; =&gt; &#160;&#160; &lt;docNum, freq&gt;<sup>*</sup>
+ /// </list>
+ /// <p/>The enumeration is ordered by document number. Each document number
+ /// is greater than all that precede it in the enumeration.
+ /// </summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual TermDocs TermDocs(Term term)
+ {
+ EnsureOpen();
+ TermDocs termDocs = TermDocs();
+ termDocs.Seek(term);
+ return termDocs;
+ }
+
+ /// <summary>Returns an unpositioned <see cref="Lucene.Net.Index.TermDocs" /> enumerator.</summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public abstract TermDocs TermDocs();
+
+ /// <summary>Returns an enumeration of all the documents which contain
+ /// <c>term</c>. For each document, in addition to the document number
+ /// and frequency of the term in that document, a list of all of the ordinal
+ /// positions of the term in the document is available. Thus, this method
+ /// implements the mapping:
+ ///
+ /// <p/><list>
+ /// Term &#160;&#160; =&gt; &#160;&#160; &lt;docNum, freq,
+ /// &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
+ /// pos<sub>freq-1</sub>&gt;
+ /// &gt;<sup>*</sup>
+ /// </list>
+ /// <p/> This positional information facilitates phrase and proximity searching.
+ /// <p/>The enumeration is ordered by document number. Each document number is
+ /// greater than all that precede it in the enumeration.
+ /// </summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual TermPositions TermPositions(Term term)
+ {
+ EnsureOpen();
+ TermPositions termPositions = TermPositions();
+ termPositions.Seek(term);
+ return termPositions;
+ }
+
+ /// <summary>Returns an unpositioned <see cref="Lucene.Net.Index.TermPositions" /> enumerator.</summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public abstract TermPositions TermPositions();
+
+
+
+ /// <summary>
+ /// Deletes the document numbered <c>docNum</c>. Once a document is
+ /// deleted it will not appear in TermDocs or TermPostitions enumerations.
+ /// Attempts to read its field with the <see cref="Document(int)" />
+ /// method will result in an error. The presence of this document may still be
+ /// reflected in the <see cref="DocFreq" /> statistic, though
+ /// this will be corrected eventually as the index is further modified.
+ /// </summary>
+ /// <exception cref="StaleReaderException">
+ /// If the index has changed since this reader was opened
+ /// </exception>
+ /// <exception cref="CorruptIndexException">If the index is corrupt</exception>
+ /// <exception cref="LockObtainFailedException">
+ /// If another writer has this index open (<c>write.lock</c> could not be obtained)
+ /// </exception>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual void DeleteDocument(int docNum)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ AcquireWriteLock();
+ hasChanges = true;
+ DoDelete(docNum);
+ }
+ }
+
+
+ /// <summary>Implements deletion of the document numbered <c>docNum</c>.
+ /// Applications should call <see cref="DeleteDocument(int)" /> or <see cref="DeleteDocuments(Term)" />.
+ /// </summary>
+ protected internal abstract void DoDelete(int docNum);
+
+
+ /// <summary>
+ /// Deletes all documents that have a given <c>term</c> indexed.
+ /// This is useful if one uses a document field to hold a unique ID string for
+ /// the document. Then to delete such a document, one merely constructs a
+ /// term with the appropriate field and the unique ID string as its text and
+ /// passes it to this method.
+ /// See <see cref="DeleteDocument(int)" /> for information about when this deletion will
+ /// become effective.
+ /// </summary>
+ /// <returns>The number of documents deleted</returns>
+ /// <exception cref="StaleReaderException">
+ /// If the index has changed since this reader was opened
+ /// </exception>
+ /// <exception cref="CorruptIndexException">If the index is corrupt</exception>
+ /// <exception cref="LockObtainFailedException">
+ /// If another writer has this index open (<c>write.lock</c> could not be obtained)
+ /// </exception>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual int DeleteDocuments(Term term)
+ {
+ EnsureOpen();
+ TermDocs docs = TermDocs(term);
+ if (docs == null)
+ return 0;
+ int n = 0;
+ try
+ {
+ while (docs.Next())
+ {
+ DeleteDocument(docs.Doc);
+ n++;
+ }
+ }
+ finally
+ {
+ docs.Close();
+ }
+ return n;
+ }
+
+ /// <summary>Undeletes all documents currently marked as deleted in this index.
+ ///
+ /// </summary>
+ /// <exception cref="StaleReaderException">
+ /// If the index has changed since this reader was opened
+ /// </exception>
+ /// <exception cref="CorruptIndexException">If the index is corrupt</exception>
+ /// <exception cref="LockObtainFailedException">
+ /// If another writer has this index open (<c>write.lock</c> could not be obtained)
+ /// </exception>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public virtual void UndeleteAll()
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ AcquireWriteLock();
+ hasChanges = true;
+ DoUndeleteAll();
+ }
+ }
+
+ /// <summary>Implements actual undeleteAll() in subclass. </summary>
+ protected internal abstract void DoUndeleteAll();
+
+ /// <summary>
+ /// Does nothing by default. Subclasses that require a write lock for
+ /// index modifications must implement this method.
+ /// </summary>
+ protected internal virtual void AcquireWriteLock()
+ {
+ lock (this)
+ {
+ /* NOOP */
+ }
+ }
+
+ /// <summary> </summary>
+ /// <exception cref="System.IO.IOException" />
+ public void Flush()
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ Commit();
+ }
+ }
+
+ /// <param name="commitUserData">Opaque Map (String -> String)
+ /// that's recorded into the segments file in the index,
+ /// and retrievable by <see cref="IndexReader.GetCommitUserData" />
+ /// </param>
+ /// <exception cref="System.IO.IOException" />
+ public void Flush(IDictionary<string, string> commitUserData)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ Commit(commitUserData);
+ }
+ }
+
+ /// <summary> Commit changes resulting from delete, undeleteAll, or
+ /// setNorm operations
+ ///
+ /// If an exception is hit, then either no changes or all
+ /// changes will have been committed to the index
+ /// (transactional semantics).
+ /// </summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public /*protected internal*/ void Commit()
+ {
+ lock (this)
+ {
+ Commit(null);
+ }
+ }
+
+ /// <summary> Commit changes resulting from delete, undeleteAll, or
+ /// setNorm operations
+ ///
+ /// If an exception is hit, then either no changes or all
+ /// changes will have been committed to the index
+ /// (transactional semantics).
+ /// </summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public void Commit(IDictionary<string, string> commitUserData)
+ {
+ lock (this)
+ {
+ if (hasChanges)
+ {
+ DoCommit(commitUserData);
+ }
+ hasChanges = false;
+ }
+ }
+
+ /// <summary>Implements commit.</summary>
+ protected internal abstract void DoCommit(IDictionary<string, string> commitUserData);
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary> Closes files associated with this index.
+ /// Also saves any new deletions to disk.
+ /// No other methods should be called after this has been called.
+ /// </summary>
+ /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ lock (this)
+ {
+ if (!closed)
+ {
+ DecRef();
+ closed = true;
+ }
+ }
+ }
+ }
+
+ /// <summary>Implements close. </summary>
+ protected internal abstract void DoClose();
+
+
+ /// <summary> Get a list of unique field names that exist in this index and have the specified
+ /// field option information.
+ /// </summary>
+ /// <param name="fldOption">specifies which field option should be available for the returned fields
+ /// </param>
+ /// <returns> Collection of Strings indicating the names of the fields.
+ /// </returns>
+ /// <seealso cref="IndexReader.FieldOption">
+ /// </seealso>
+ public abstract ICollection<string> GetFieldNames(FieldOption fldOption);
+
+ /// <summary> Expert: return the IndexCommit that this reader has
+ /// opened. This method is only implemented by those
+ /// readers that correspond to a Directory with its own
+ /// segments_N file.
+ ///
+ /// <p/><b>WARNING</b>: this API is new and experimental and
+ /// may suddenly change.<p/>
+ /// </summary>
+ public virtual IndexCommit IndexCommit
+ {
+ get { throw new NotSupportedException("This reader does not support this method."); }
+ }
+
+ /// <summary> Prints the filename and size of each file within a given compound file.
+ /// Add the -extract flag to extract files to the current working directory.
+ /// In order to make the extracted version of the index work, you have to copy
+ /// the segments file from the compound index into the directory where the extracted files are stored.
+ /// </summary>
+ /// <param name="args">Usage: Lucene.Net.Index.IndexReader [-extract] &lt;cfsfile&gt;
+ /// </param>
+ [STAThread]
+ public static void Main(String[] args)
+ {
+ System.String filename = null;
+ bool extract = false;
+
+ foreach (string t in args)
+ {
+ if (t.Equals("-extract"))
+ {
+ extract = true;
+ }
+ else if (filename == null)
+ {
+ filename = t;
+ }
+ }
+
+ if (filename == null)
+ {
+ System.Console.Out.WriteLine("Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile>");
+ return ;
+ }
+
+ Directory dir = null;
+ CompoundFileReader cfr = null;
+
+ try
+ {
+ var file = new System.IO.FileInfo(filename);
+ System.String dirname = new System.IO.FileInfo(file.FullName).DirectoryName;
+ filename = file.Name;
+ dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirname));
+ cfr = new CompoundFileReader(dir, filename);
+
+ System.String[] files = cfr.ListAll();
+ System.Array.Sort(files); // sort the array of filename so that the output is more readable
+
+ foreach (string t in files)
+ {
+ long len = cfr.FileLength(t);
+
+ if (extract)
+ {
+ System.Console.Out.WriteLine("extract " + t + " with " + len + " bytes to local directory...");
+ IndexInput ii = cfr.OpenInput(t);
+
+ var f = new System.IO.FileStream(t, System.IO.FileMode.Create);
+
+ // read and write with a small buffer, which is more effectiv than reading byte by byte
+ var buffer = new byte[1024];
+ int chunk = buffer.Length;
+ while (len > 0)
+ {
+ var bufLen = (int) System.Math.Min(chunk, len);
+ ii.ReadBytes(buffer, 0, bufLen);
+ f.Write(buffer, 0, bufLen);
+ len -= bufLen;
+ }
+
+ f.Close();
+ ii.Close();
+ }
+ else
+ System.Console.Out.WriteLine(t + ": " + len + " bytes");
+ }
+ }
+ catch (System.IO.IOException ioe)
+ {
+ System.Console.Error.WriteLine(ioe.StackTrace);
+ }
+ finally
+ {
+ try
+ {
+ if (dir != null)
+ dir.Close();
+ if (cfr != null)
+ cfr.Close();
+ }
+ catch (System.IO.IOException ioe)
+ {
+ System.Console.Error.WriteLine(ioe.StackTrace);
+ }
+ }
+ }
+
+ /// <summary>Returns all commit points that exist in the Directory.
+ /// Normally, because the default is <see cref="KeepOnlyLastCommitDeletionPolicy" />
+ ///, there would be only
+ /// one commit point. But if you're using a custom <see cref="IndexDeletionPolicy" />
+ /// then there could be many commits.
+ /// Once you have a given commit, you can open a reader on
+ /// it by calling <see cref="IndexReader.Open(IndexCommit,bool)" />
+ /// There must be at least one commit in
+ /// the Directory, else this method throws <see cref="System.IO.IOException" />.
+ /// Note that if a commit is in
+ /// progress while this method is running, that commit
+ /// may or may not be returned array.
+ /// </summary>
+ public static System.Collections.Generic.ICollection<IndexCommit> ListCommits(Directory dir)
+ {
+ return DirectoryReader.ListCommits(dir);
+ }
+
+ /// <summary>Expert: returns the sequential sub readers that this
+ /// reader is logically composed of. For example,
+ /// IndexSearcher uses this API to drive searching by one
+ /// sub reader at a time. If this reader is not composed
+ /// of sequential child readers, it should return null.
+ /// If this method returns an empty array, that means this
+ /// reader is a null reader (for example a MultiReader
+ /// that has no sub readers).
+ /// <p/>
+ /// NOTE: You should not try using sub-readers returned by
+ /// this method to make any changes (setNorm, deleteDocument,
+ /// etc.). While this might succeed for one composite reader
+ /// (like MultiReader), it will most likely lead to index
+ /// corruption for other readers (like DirectoryReader obtained
+ /// through <see cref="IndexReader.Open(Lucene.Net.Store.Directory,bool)" />. Use the parent reader directly.
+ /// </summary>
+ public virtual IndexReader[] GetSequentialSubReaders()
+ {
+ return null;
+ }
+
+ /// <summary>Expert</summary>
+ public virtual object FieldCacheKey
+ {
+ get { return this; }
+ }
+
+ /* Expert. Warning: this returns null if the reader has
+ * no deletions
+ */
+
+ public virtual object DeletesCacheKey
+ {
+ get { return this; }
+ }
+
+ /// <summary>Returns the number of unique terms (across all fields)
+ /// in this reader.
+ ///
+ /// This method returns long, even though internally
+ /// Lucene cannot handle more than 2^31 unique terms, for
+ /// a possible future when this limitation is removed.
+ ///
+ /// </summary>
+ /// <throws> UnsupportedOperationException if this count </throws>
+ /// <summary> cannot be easily determined (eg Multi*Readers).
+ /// Instead, you should call <see cref="GetSequentialSubReaders" />
+ /// and ask each sub reader for
+ /// its unique term count.
+ /// </summary>
+ public virtual long UniqueTermCount
+ {
+ get { throw new System.NotSupportedException("this reader does not implement getUniqueTermCount()"); }
+ }
+
+ /// <summary>
+ /// For IndexReader implementations that use
+ /// TermInfosReader to read terms, this returns the
+ /// current indexDivisor as specified when the reader was
+ /// opened.
+ /// </summary>
+ public virtual int TermInfosIndexDivisor
+ {
+ get { throw new NotSupportedException("This reader does not support this method."); }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IndexWriter.cs b/src/core/Index/IndexWriter.cs
new file mode 100644
index 0000000..dda1738
--- /dev/null
+++ b/src/core/Index/IndexWriter.cs
@@ -0,0 +1,5928 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Support;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Document = Lucene.Net.Documents.Document;
+using IndexingChain = Lucene.Net.Index.DocumentsWriter.IndexingChain;
+using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using Directory = Lucene.Net.Store.Directory;
+using Lock = Lucene.Net.Store.Lock;
+using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException;
+using Constants = Lucene.Net.Util.Constants;
+using Query = Lucene.Net.Search.Query;
+using Similarity = Lucene.Net.Search.Similarity;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>An <c>IndexWriter</c> creates and maintains an index.
+ /// <p/>The <c>create</c> argument to the
+ /// <see cref="IndexWriter(Directory, Analyzer, bool, MaxFieldLength)">constructor</see> determines
+ /// whether a new index is created, or whether an existing index is
+ /// opened. Note that you can open an index with <c>create=true</c>
+ /// even while readers are using the index. The old readers will
+ /// continue to search the "point in time" snapshot they had opened,
+ /// and won't see the newly created index until they re-open. There are
+ /// also <see cref="IndexWriter(Directory, Analyzer, MaxFieldLength)">constructors</see>
+ /// with no <c>create</c> argument which will create a new index
+ /// if there is not already an index at the provided path and otherwise
+ /// open the existing index.<p/>
+ /// <p/>In either case, documents are added with <see cref="AddDocument(Document)" />
+ /// and removed with <see cref="DeleteDocuments(Term)" /> or
+ /// <see cref="DeleteDocuments(Query)" />. A document can be updated with
+ /// <see cref="UpdateDocument(Term, Document)" /> (which just deletes
+ /// and then adds the entire document). When finished adding, deleting
+ /// and updating documents, <see cref="Close()" /> should be called.<p/>
+ /// <a name="flush"></a>
+ /// <p/>These changes are buffered in memory and periodically
+ /// flushed to the <see cref="Directory" /> (during the above method
+ /// calls). A flush is triggered when there are enough
+ /// buffered deletes (see <see cref="SetMaxBufferedDeleteTerms" />)
+ /// or enough added documents since the last flush, whichever
+ /// is sooner. For the added documents, flushing is triggered
+ /// either by RAM usage of the documents (see
+ /// <see cref="SetRAMBufferSizeMB" />) or the number of added documents.
+ /// The default is to flush when RAM usage hits 16 MB. For
+ /// best indexing speed you should flush by RAM usage with a
+ /// large RAM buffer. Note that flushing just moves the
+ /// internal buffered state in IndexWriter into the index, but
+ /// these changes are not visible to IndexReader until either
+ /// <see cref="Commit()" /> or <see cref="Close()" /> is called. A flush may
+ /// also trigger one or more segment merges which by default
+ /// run with a background thread so as not to block the
+ /// addDocument calls (see <a href="#mergePolicy">below</a>
+ /// for changing the <see cref="MergeScheduler" />).
+ /// <p/>
+ /// If an index will not have more documents added for a while and optimal search
+ /// performance is desired, then either the full <see cref="Optimize()" />
+ /// method or partial <see cref="Optimize(int)" /> method should be
+ /// called before the index is closed.
+ /// <p/>
+ /// Opening an <c>IndexWriter</c> creates a lock file for the directory in use. Trying to open
+ /// another <c>IndexWriter</c> on the same directory will lead to a
+ /// <see cref="LockObtainFailedException" />. The <see cref="LockObtainFailedException" />
+ /// is also thrown if an IndexReader on the same directory is used to delete documents
+ /// from the index.<p/>
+ /// </summary>
+ /// <summary><a name="deletionPolicy"></a>
+ /// <p/>Expert: <c>IndexWriter</c> allows an optional
+ /// <see cref="IndexDeletionPolicy" /> implementation to be
+ /// specified. You can use this to control when prior commits
+ /// are deleted from the index. The default policy is <see cref="KeepOnlyLastCommitDeletionPolicy" />
+ /// which removes all prior
+ /// commits as soon as a new commit is done (this matches
+ /// behavior before 2.2). Creating your own policy can allow
+ /// you to explicitly keep previous "point in time" commits
+ /// alive in the index for some time, to allow readers to
+ /// refresh to the new commit without having the old commit
+ /// deleted out from under them. This is necessary on
+ /// filesystems like NFS that do not support "delete on last
+ /// close" semantics, which Lucene's "point in time" search
+ /// normally relies on. <p/>
+ /// <a name="mergePolicy"></a> <p/>Expert:
+ /// <c>IndexWriter</c> allows you to separately change
+ /// the <see cref="MergePolicy" /> and the <see cref="MergeScheduler" />.
+ /// The <see cref="MergePolicy" /> is invoked whenever there are
+ /// changes to the segments in the index. Its role is to
+ /// select which merges to do, if any, and return a <see cref="Index.MergePolicy.MergeSpecification" />
+ /// describing the merges. It
+ /// also selects merges to do for optimize(). (The default is
+ /// <see cref="LogByteSizeMergePolicy" />. Then, the <see cref="MergeScheduler" />
+ /// is invoked with the requested merges and
+ /// it decides when and how to run the merges. The default is
+ /// <see cref="ConcurrentMergeScheduler" />. <p/>
+ /// <a name="OOME"></a><p/><b>NOTE</b>: if you hit an
+ /// OutOfMemoryError then IndexWriter will quietly record this
+ /// fact and block all future segment commits. This is a
+ /// defensive measure in case any internal state (buffered
+ /// documents and deletions) were corrupted. Any subsequent
+ /// calls to <see cref="Commit()" /> will throw an
+ /// IllegalStateException. The only course of action is to
+ /// call <see cref="Close()" />, which internally will call <see cref="Rollback()" />
+ ///, to undo any changes to the index since the
+ /// last commit. You can also just call <see cref="Rollback()" />
+ /// directly.<p/>
+ /// <a name="thread-safety"></a><p/><b>NOTE</b>:
+ /// <see cref="IndexWriter" /> instances are completely thread
+ /// safe, meaning multiple threads can call any of its
+ /// methods, concurrently. If your application requires
+ /// external synchronization, you should <b>not</b>
+ /// synchronize on the <c>IndexWriter</c> instance as
+ /// this may cause deadlock; use your own (non-Lucene) objects
+ /// instead. <p/>
+ /// <b>NOTE:</b> if you call
+ /// <c>Thread.Interrupt()</c> on a thread that's within
+ /// IndexWriter, IndexWriter will try to catch this (eg, if
+ /// it's in a Wait() or Thread.Sleep()), and will then throw
+ /// the unchecked exception <see cref="System.Threading.ThreadInterruptedException"/>
+ /// and <b>clear</b> the interrupt status on the thread<p/>
+ /// </summary>
+
+ /*
+ * Clarification: Check Points (and commits)
+ * IndexWriter writes new index files to the directory without writing a new segments_N
+ * file which references these new files. It also means that the state of
+ * the in memory SegmentInfos object is different than the most recent
+ * segments_N file written to the directory.
+ *
+ * Each time the SegmentInfos is changed, and matches the (possibly
+ * modified) directory files, we have a new "check point".
+ * If the modified/new SegmentInfos is written to disk - as a new
+ * (generation of) segments_N file - this check point is also an
+ * IndexCommit.
+ *
+ * A new checkpoint always replaces the previous checkpoint and
+ * becomes the new "front" of the index. This allows the IndexFileDeleter
+ * to delete files that are referenced only by stale checkpoints.
+ * (files that were created since the last commit, but are no longer
+ * referenced by the "front" of the index). For this, IndexFileDeleter
+ * keeps track of the last non commit checkpoint.
+ */
+ public class IndexWriter : System.IDisposable
+ {
+ private void InitBlock()
+ {
+ similarity = Search.Similarity.Default;
+ mergePolicy = new LogByteSizeMergePolicy(this);
+ readerPool = new ReaderPool(this);
+ }
+
+ /// <summary> Default value for the write lock timeout (1,000).</summary>
+ /// <seealso cref="DefaultWriteLockTimeout">
+ /// </seealso>
+ public static long WRITE_LOCK_TIMEOUT = 1000;
+
+ private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
+
+ /// <summary> Name of the write lock in the index.</summary>
+ public const System.String WRITE_LOCK_NAME = "write.lock";
+
+ /// <summary> Value to denote a flush trigger is disabled</summary>
+ public const int DISABLE_AUTO_FLUSH = - 1;
+
+ /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
+ /// by default). Change using <see cref="SetMaxBufferedDocs(int)" />.
+ /// </summary>
+ public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
+
+ /// <summary> Default value is 16 MB (which means flush when buffered
+ /// docs consume 16 MB RAM). Change using <see cref="SetRAMBufferSizeMB" />.
+ /// </summary>
+ public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
+
+ /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
+ /// by default). Change using <see cref="SetMaxBufferedDeleteTerms(int)" />.
+ /// </summary>
+ public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
+
+ /// <summary> Default value is 10,000. Change using <see cref="SetMaxFieldLength(int)" />.</summary>
+ public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
+
+ /// <summary> Default value is 128. Change using <see cref="TermIndexInterval" />.</summary>
+ public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
+
+ /// <summary> Absolute hard maximum length for a term. If a term
+ /// arrives from the analyzer longer than this length, it
+ /// is skipped and a message is printed to infoStream, if
+ /// set (see <see cref="SetInfoStream" />).
+ /// </summary>
+ public static readonly int MAX_TERM_LENGTH;
+
+ // The normal read buffer size defaults to 1024, but
+ // increasing this during merging seems to yield
+ // performance gains. However we don't want to increase
+ // it too much because there are quite a few
+ // BufferedIndexInputs created during merging. See
+ // LUCENE-888 for details.
+ private const int MERGE_READ_BUFFER_SIZE = 4096;
+
+ // Used for printing messages
+ private static System.Object MESSAGE_ID_LOCK = new System.Object();
+ private static int MESSAGE_ID = 0;
+ private int messageID = - 1;
+ private volatile bool hitOOM;
+
+ private Directory directory; // where this index resides
+ private Analyzer analyzer; // how to analyze text
+
+ private Similarity similarity; // how to normalize
+
+ private volatile uint changeCount; // increments every time a change is completed
+ private long lastCommitChangeCount; // last changeCount that was committed
+
+ private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+ private HashMap<SegmentInfo, int?> rollbackSegments;
+
+ internal volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
+ internal volatile uint pendingCommitChangeCount;
+
+ private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+ private int localFlushedDocCount; // saved docWriter.getFlushedDocCount during local transaction
+
+ private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
+ private int optimizeMaxNumSegments;
+
+ private DocumentsWriter docWriter;
+ private IndexFileDeleter deleter;
+
+ private ISet<SegmentInfo> segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<SegmentInfo>(); // used by optimize to note those needing optimization
+
+ private Lock writeLock;
+
+ private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+
+ private bool closed;
+ private bool closing;
+
+ // Holds all SegmentInfo instances currently involved in
+ // merges
+ private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
+
+ private MergePolicy mergePolicy;
+ private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
+ private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
+ private ISet<MergePolicy.OneMerge> runningMerges = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<MergePolicy.OneMerge>();
+ private IList<MergePolicy.OneMerge> mergeExceptions = new List<MergePolicy.OneMerge>();
+ private long mergeGen;
+ private bool stopMerges;
+
+ private int flushCount;
+ private int flushDeletesCount;
+
+ // Used to only allow one addIndexes to proceed at once
+ // TODO: use ReadWriteLock once we are on 5.0
+ private int readCount; // count of how many threads are holding read lock
+ private ThreadClass writeThread; // non-null if any thread holds write lock
+ internal ReaderPool readerPool;
+ private int upgradeCount;
+
+ private int readerTermsIndexDivisor = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;
+
+ // This is a "write once" variable (like the organic dye
+ // on a DVD-R that may or may not be heated by a laser and
+ // then cooled to permanently record the event): it's
+ // false, until getReader() is called for the first time,
+ // at which point it's switched to true and never changes
+ // back to false. Once this is true, we hold open and
+ // reuse SegmentReader instances internally for applying
+ // deletes, doing merges, and reopening near real-time
+ // readers.
+ private volatile bool poolReaders;
+
+ /// <summary> Expert: returns a readonly reader, covering all committed as well as
+ /// un-committed changes to the index. This provides "near real-time"
+ /// searching, in that changes made during an IndexWriter session can be
+ /// quickly made available for searching without closing the writer nor
+ /// calling <see cref="Commit()" />.
+ ///
+ /// <p/>
+ /// Note that this is functionally equivalent to calling {#commit} and then
+ /// using <see cref="IndexReader.Open(Lucene.Net.Store.Directory, bool)" /> to open a new reader. But the turarnound
+ /// time of this method should be faster since it avoids the potentially
+ /// costly <see cref="Commit()" />.
+ /// <p/>
+ ///
+ /// You must close the <see cref="IndexReader" /> returned by this method once you are done using it.
+ ///
+ /// <p/>
+ /// It's <i>near</i> real-time because there is no hard
+ /// guarantee on how quickly you can get a new reader after
+ /// making changes with IndexWriter. You'll have to
+ /// experiment in your situation to determine if it's
+ /// faster enough. As this is a new and experimental
+ /// feature, please report back on your findings so we can
+ /// learn, improve and iterate.<p/>
+ ///
+ /// <p/>The resulting reader suppports <see cref="IndexReader.Reopen()" />
+ ///, but that call will simply forward
+ /// back to this method (though this may change in the
+ /// future).<p/>
+ ///
+ /// <p/>The very first time this method is called, this
+ /// writer instance will make every effort to pool the
+ /// readers that it opens for doing merges, applying
+ /// deletes, etc. This means additional resources (RAM,
+ /// file descriptors, CPU time) will be consumed.<p/>
+ ///
+ /// <p/>For lower latency on reopening a reader, you should call <see cref="MergedSegmentWarmer" />
+ /// to call <see cref="MergedSegmentWarmer" /> to
+ /// pre-warm a newly merged segment before it's committed
+ /// to the index. This is important for minimizing index-to-search
+ /// delay after a large merge.
+ ///
+ /// <p/>If an addIndexes* call is running in another thread,
+ /// then this reader will only search those segments from
+ /// the foreign index that have been successfully copied
+ /// over, so far<p/>.
+ ///
+ /// <p/><b>NOTE</b>: Once the writer is closed, any
+ /// outstanding readers may continue to be used. However,
+ /// if you attempt to reopen any of those readers, you'll
+ /// hit an <see cref="AlreadyClosedException" />.<p/>
+ ///
+ /// <p/><b>NOTE:</b> This API is experimental and might
+ /// change in incompatible ways in the next release.<p/>
+ ///
+ /// </summary>
+ /// <returns> IndexReader that covers entire index plus all
+ /// changes made so far by this IndexWriter instance
+ ///
+ /// </returns>
+ /// <throws> IOException </throws>
+ public virtual IndexReader GetReader()
+ {
+ return GetReader(readerTermsIndexDivisor);
+ }
+
+ /// <summary>Expert: like <see cref="GetReader()" />, except you can
+ /// specify which termInfosIndexDivisor should be used for
+ /// any newly opened readers.
+ /// </summary>
+ /// <param name="termInfosIndexDivisor">Subsambles which indexed
+ /// terms are loaded into RAM. This has the same effect as <see cref="IndexWriter.TermIndexInterval" />
+ /// except that setting
+ /// must be done at indexing time while this setting can be
+ /// set per reader. When set to N, then one in every
+ /// N*termIndexInterval terms in the index is loaded into
+ /// memory. By setting this to a value > 1 you can reduce
+ /// memory usage, at the expense of higher latency when
+ /// loading a TermInfo. The default value is 1. Set this
+ /// to -1 to skip loading the terms index entirely.
+ /// </param>
+ public virtual IndexReader GetReader(int termInfosIndexDivisor)
+ {
+ EnsureOpen();
+
+ if (infoStream != null)
+ {
+ Message("flush at getReader");
+ }
+
+ // Do this up front before flushing so that the readers
+ // obtained during this flush are pooled, the first time
+ // this method is called:
+ poolReaders = true;
+
+ // Prevent segmentInfos from changing while opening the
+ // reader; in theory we could do similar retry logic,
+ // just like we do when loading segments_N
+ IndexReader r;
+ lock (this)
+ {
+ Flush(false, true, true);
+ r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor);
+ }
+ MaybeMerge();
+ return r;
+ }
+
+ /// <summary>Holds shared SegmentReader instances. IndexWriter uses
+ /// SegmentReaders for 1) applying deletes, 2) doing
+ /// merges, 3) handing out a real-time reader. This pool
+ /// reuses instances of the SegmentReaders in all these
+ /// places if it is in "near real-time mode" (getReader()
+ /// has been called on this instance).
+ /// </summary>
+
+ internal class ReaderPool : IDisposable
+ {
+ public ReaderPool(IndexWriter enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(IndexWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private IndexWriter enclosingInstance;
+ public IndexWriter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ private IDictionary<SegmentInfo, SegmentReader> readerMap = new HashMap<SegmentInfo, SegmentReader>();
+
+ /// <summary>Forcefully clear changes for the specifed segments,
+ /// and remove from the pool. This is called on succesful merge.
+ /// </summary>
+ internal virtual void Clear(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ if (infos == null)
+ {
+ foreach(KeyValuePair<SegmentInfo, SegmentReader> ent in readerMap)
+ {
+ ent.Value.hasChanges = false;
+ }
+ }
+ else
+ {
+ foreach(SegmentInfo info in infos)
+ {
+ if (readerMap.ContainsKey(info))
+ {
+ readerMap[info].hasChanges = false;
+ }
+ }
+ }
+ }
+ }
+
+ // used only by asserts
+ public virtual bool InfoIsLive(SegmentInfo info)
+ {
+ lock (this)
+ {
+ int idx = Enclosing_Instance.segmentInfos.IndexOf(info);
+ System.Diagnostics.Debug.Assert(idx != -1);
+ System.Diagnostics.Debug.Assert(Enclosing_Instance.segmentInfos[idx] == info);
+ return true;
+ }
+ }
+
+ public virtual SegmentInfo MapToLive(SegmentInfo info)
+ {
+ lock (this)
+ {
+ int idx = Enclosing_Instance.segmentInfos.IndexOf(info);
+ if (idx != - 1)
+ {
+ info = Enclosing_Instance.segmentInfos[idx];
+ }
+ return info;
+ }
+ }
+
+ /// <summary> Release the segment reader (i.e. decRef it and close if there
+ /// are no more references.
+ /// </summary>
+ /// <param name="sr">
+ /// </param>
+ /// <throws> IOException </throws>
+ public virtual void Release(SegmentReader sr)
+ {
+ lock (this)
+ {
+ Release(sr, false);
+ }
+ }
+
+ /// <summary> Release the segment reader (i.e. decRef it and close if there
+ /// are no more references.
+ /// </summary>
+ /// <param name="sr">
+ /// </param>
+ /// <param name="drop"></param>
+ /// <throws> IOException </throws>
+ public virtual void Release(SegmentReader sr, bool drop)
+ {
+ lock (this)
+ {
+
+ bool pooled = readerMap.ContainsKey(sr.SegmentInfo);
+
+ System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.SegmentInfo] == sr);
+
+ // Drop caller's ref; for an external reader (not
+ // pooled), this decRef will close it
+ sr.DecRef();
+
+ if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.RefCount == 1)))
+ {
+
+ // We invoke deleter.checkpoint below, so we must be
+ // sync'd on IW if there are changes:
+
+ // TODO: Java 1.5 has this, .NET can't.
+ // System.Diagnostics.Debug.Assert(!sr.hasChanges || Thread.holdsLock(enclosingInstance));
+
+ // Discard (don't save) changes when we are dropping
+ // the reader; this is used only on the sub-readers
+ // after a successful merge.
+ sr.hasChanges &= !drop;
+
+ bool hasChanges = sr.hasChanges;
+
+ // Drop our ref -- this will commit any pending
+ // changes to the dir
+ sr.Close();
+
+ // We are the last ref to this reader; since we're
+ // not pooling readers, we release it:
+ readerMap.Remove(sr.SegmentInfo);
+
+ if (hasChanges)
+ {
+ // Must checkpoint w/ deleter, because this
+ // segment reader will have created new _X_N.del
+ // file.
+ enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
+ }
+ }
+ }
+ }
+
+ /// <summary>Remove all our references to readers, and commits
+ /// any pending changes.
+ /// </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ // We invoke deleter.checkpoint below, so we must be
+ // sync'd on IW:
+ // TODO: assert Thread.holdsLock(IndexWriter.this);
+ // TODO: Should this class have bool _isDisposed?
+ lock (this)
+ {
+ //var toRemove = new List<SegmentInfo>();
+ foreach (var ent in readerMap)
+ {
+ SegmentReader sr = ent.Value;
+ if (sr.hasChanges)
+ {
+ System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo));
+ sr.DoCommit(null);
+ // Must checkpoint w/ deleter, because this
+ // segment reader will have created new _X_N.del
+ // file.
+ enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
+ }
+
+ //toRemove.Add(ent.Key);
+
+ // NOTE: it is allowed that this decRef does not
+ // actually close the SR; this can happen when a
+ // near real-time reader is kept open after the
+ // IndexWriter instance is closed
+ sr.DecRef();
+ }
+
+ //foreach (var key in toRemove)
+ // readerMap.Remove(key);
+ readerMap.Clear();
+ }
+ }
+ }
+
+ /// <summary> Commit all segment reader in the pool.</summary>
+ /// <throws> IOException </throws>
+ internal virtual void Commit()
+ {
+ // We invoke deleter.checkpoint below, so we must be
+ // sync'd on IW:
+ // TODO: assert Thread.holdsLock(IndexWriter.this);
+ lock (this)
+ {
+ foreach(KeyValuePair<SegmentInfo,SegmentReader> ent in readerMap)
+ {
+ SegmentReader sr = ent.Value;
+ if (sr.hasChanges)
+ {
+ System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo));
+ sr.DoCommit(null);
+ // Must checkpoint w/ deleter, because this
+ // segment reader will have created new _X_N.del
+ // file.
+ enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
+ }
+ }
+ }
+ }
+
+ /// <summary> Returns a ref to a clone. NOTE: this clone is not
+ /// enrolled in the pool, so you should simply close()
+ /// it when you're done (ie, do not call release()).
+ /// </summary>
+ public virtual SegmentReader GetReadOnlyClone(SegmentInfo info, bool doOpenStores, int termInfosIndexDivisor)
+ {
+ lock (this)
+ {
+ SegmentReader sr = Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
+ try
+ {
+ return (SegmentReader) sr.Clone(true);
+ }
+ finally
+ {
+ sr.DecRef();
+ }
+ }
+ }
+
+ /// <summary> Obtain a SegmentReader from the readerPool. The reader
+ /// must be returned by calling <see cref="Release(SegmentReader)" />
+ /// </summary>
+ /// <seealso cref="Release(SegmentReader)">
+ /// </seealso>
+ /// <param name="info">
+ /// </param>
+ /// <param name="doOpenStores">
+ /// </param>
+ /// <throws> IOException </throws>
+ public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores)
+ {
+ lock (this)
+ {
+ return Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, enclosingInstance.readerTermsIndexDivisor);
+ }
+ }
+ /// <summary> Obtain a SegmentReader from the readerPool. The reader
+ /// must be returned by calling <see cref="Release(SegmentReader)" />
+ ///
+ /// </summary>
+ /// <seealso cref="Release(SegmentReader)">
+ /// </seealso>
+ /// <param name="info">
+ /// </param>
+ /// <param name="doOpenStores">
+ /// </param>
+ /// <param name="readBufferSize">
+ /// </param>
+ /// <param name="termsIndexDivisor">
+ /// </param>
+ /// <throws> IOException </throws>
+ public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor)
+ {
+ lock (this)
+ {
+ if (Enclosing_Instance.poolReaders)
+ {
+ readBufferSize = BufferedIndexInput.BUFFER_SIZE;
+ }
+
+ SegmentReader sr = readerMap[info];
+ if (sr == null)
+ {
+ // TODO: we may want to avoid doing this while
+ // synchronized
+ // Returns a ref, which we xfer to readerMap:
+ sr = SegmentReader.Get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
+ if (info.dir == enclosingInstance.directory)
+ {
+ // Only pool if reader is not external
+ readerMap[info]=sr;
+ }
+ }
+ else
+ {
+ if (doOpenStores)
+ {
+ sr.OpenDocStores();
+ }
+ if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded())
+ {
+ // If this reader was originally opened because we
+ // needed to merge it, we didn't load the terms
+ // index. But now, if the caller wants the terms
+ // index (eg because it's doing deletes, or an NRT
+ // reader is being opened) we ask the reader to
+ // load its terms index.
+ sr.LoadTermsIndex(termsIndexDivisor);
+ }
+ }
+
+ // Return a ref to our caller
+ if (info.dir == enclosingInstance.directory)
+ {
+ // Only incRef if we pooled (reader is not external)
+ sr.IncRef();
+ }
+ return sr;
+ }
+ }
+
+ // Returns a ref
+ public virtual SegmentReader GetIfExists(SegmentInfo info)
+ {
+ lock (this)
+ {
+ SegmentReader sr = readerMap[info];
+ if (sr != null)
+ {
+ sr.IncRef();
+ }
+ return sr;
+ }
+ }
+ }
+
+ /// <summary> Obtain the number of deleted docs for a pooled reader.
+ /// If the reader isn't being pooled, the segmentInfo's
+ /// delCount is returned.
+ /// </summary>
+ public virtual int NumDeletedDocs(SegmentInfo info)
+ {
+ SegmentReader reader = readerPool.GetIfExists(info);
+ try
+ {
+ if (reader != null)
+ {
+ return reader.NumDeletedDocs;
+ }
+ else
+ {
+ return info.GetDelCount();
+ }
+ }
+ finally
+ {
+ if (reader != null)
+ {
+ readerPool.Release(reader);
+ }
+ }
+ }
+
+ internal virtual void AcquireWrite()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(writeThread != ThreadClass.Current());
+ while (writeThread != null || readCount > 0)
+ DoWait();
+
+ // We could have been closed while we were waiting:
+ EnsureOpen();
+
+ writeThread = ThreadClass.Current();
+ }
+ }
+
+ internal virtual void ReleaseWrite()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(ThreadClass.Current() == writeThread);
+ writeThread = null;
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ internal virtual void AcquireRead()
+ {
+ lock (this)
+ {
+ ThreadClass current = ThreadClass.Current();
+ while (writeThread != null && writeThread != current)
+ DoWait();
+
+ readCount++;
+ }
+ }
+
+ // Allows one readLock to upgrade to a writeLock even if
+ // there are other readLocks as long as all other
+ // readLocks are also blocked in this method:
+ internal virtual void UpgradeReadToWrite()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(readCount > 0);
+ upgradeCount++;
+ while (readCount > upgradeCount || writeThread != null)
+ {
+ DoWait();
+ }
+
+ writeThread = ThreadClass.Current();
+ readCount--;
+ upgradeCount--;
+ }
+ }
+
+ internal virtual void ReleaseRead()
+ {
+ lock (this)
+ {
+ readCount--;
+ System.Diagnostics.Debug.Assert(readCount >= 0);
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ internal bool IsOpen(bool includePendingClose)
+ {
+ lock (this)
+ {
+ return !(closed || (includePendingClose && closing));
+ }
+ }
+
+ /// <summary> Used internally to throw an <see cref="AlreadyClosedException" />
+ /// if this IndexWriter has been
+ /// closed.
+ /// </summary>
+ /// <throws> AlreadyClosedException if this IndexWriter is </throws>
+ protected internal void EnsureOpen(bool includePendingClose)
+ {
+ lock (this)
+ {
+ if (!IsOpen(includePendingClose))
+ {
+ throw new AlreadyClosedException("this IndexWriter is closed");
+ }
+ }
+ }
+
+ protected internal void EnsureOpen()
+ {
+ lock (this)
+ {
+ EnsureOpen(true);
+ }
+ }
+
+ /// <summary> Prints a message to the infoStream (if non-null),
+ /// prefixed with the identifying information for this
+ /// writer and the thread that's calling it.
+ /// </summary>
+ public virtual void Message(System.String message)
+ {
+ if (infoStream != null)
+ infoStream.WriteLine("IW " + messageID + " [" + DateTime.Now.ToString() + "; " + ThreadClass.Current().Name + "]: " + message);
+ }
+
+ private void SetMessageID(System.IO.StreamWriter infoStream)
+ {
+ lock (this)
+ {
+ if (infoStream != null && messageID == - 1)
+ {
+ lock (MESSAGE_ID_LOCK)
+ {
+ messageID = MESSAGE_ID++;
+ }
+ }
+ this.infoStream = infoStream;
+ }
+ }
+
+ /// <summary> Casts current mergePolicy to LogMergePolicy, and throws
+ /// an exception if the mergePolicy is not a LogMergePolicy.
+ /// </summary>
+ private LogMergePolicy LogMergePolicy
+ {
+ get
+ {
+ if (mergePolicy is LogMergePolicy)
+ return (LogMergePolicy) mergePolicy;
+
+ throw new System.ArgumentException(
+ "this method can only be called when the merge policy is the default LogMergePolicy");
+ }
+ }
+
+ /// <summary><p/>Gets or sets the current setting of whether newly flushed
+ /// segments will use the compound file format. Note that
+ /// this just returns the value previously set with
+ /// setUseCompoundFile(boolean), or the default value
+ /// (true). You cannot use this to query the status of
+ /// previously flushed segments.<p/>
+ ///
+ /// <p/>Note that this method is a convenience method: it
+ /// just calls mergePolicy.getUseCompoundFile as long as
+ /// mergePolicy is an instance of <see cref="LogMergePolicy" />.
+ /// Otherwise an IllegalArgumentException is thrown.<p/>
+ ///
+ /// </summary>
+ public virtual bool UseCompoundFile
+ {
+ get { return LogMergePolicy.GetUseCompoundFile(); }
+ set
+ {
+ LogMergePolicy.SetUseCompoundFile(value);
+ LogMergePolicy.SetUseCompoundDocStore(value);
+ }
+ }
+
+ /// <summary>Expert: Set the Similarity implementation used by this IndexWriter.
+ /// </summary>
+ public virtual void SetSimilarity(Similarity similarity)
+ {
+ EnsureOpen();
+ this.similarity = similarity;
+ docWriter.SetSimilarity(similarity);
+ }
+
+ /// <summary>Expert: Return the Similarity implementation used by this IndexWriter.
+ ///
+ /// <p/>This defaults to the current value of <see cref="Search.Similarity.Default" />.
+ /// </summary>
+ public virtual Similarity Similarity
+ {
+ get
+ {
+ EnsureOpen();
+ return this.similarity;
+ }
+ }
+
+
+ /// <summary>Expert: Gets or sets the interval between indexed terms. Large values cause less
+ /// memory to be used by IndexReader, but slow random-access to terms. Small
+ /// values cause more memory to be used by an IndexReader, and speed
+ /// random-access to terms.
+ ///
+ /// This parameter determines the amount of computation required per query
+ /// term, regardless of the number of documents that contain that term. In
+ /// particular, it is the maximum number of other terms that must be
+ /// scanned before a term is located and its frequency and position information
+ /// may be processed. In a large index with user-entered query terms, query
+ /// processing time is likely to be dominated not by term lookup but rather
+ /// by the processing of frequency and positional data. In a small index
+ /// or when many uncommon query terms are generated (e.g., by wildcard
+ /// queries) term lookup may become a dominant cost.
+ ///
+ /// In particular, <c>numUniqueTerms/interval</c> terms are read into
+ /// memory by an IndexReader, and, on average, <c>interval/2</c> terms
+ /// must be scanned for each random term access.
+ ///
+ /// </summary>
+ /// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL">
+ /// </seealso>
+ public virtual int TermIndexInterval
+ {
+ get
+ {
+ // We pass false because this method is called by SegmentMerger while we are in the process of closing
+ EnsureOpen(false);
+ return termIndexInterval;
+ }
+ set
+ {
+ EnsureOpen();
+ this.termIndexInterval = value;
+ }
+ }
+
+ /// <summary> Constructs an IndexWriter for the index in <c>d</c>.
+ /// Text will be analyzed with <c>a</c>. If <c>create</c>
+ /// is true, then a new, empty index will be created in
+ /// <c>d</c>, replacing the index already there, if any.
+ ///
+ /// </summary>
+ /// <param name="d">the index directory
+ /// </param>
+ /// <param name="a">the analyzer to use
+ /// </param>
+ /// <param name="create"><c>true</c> to create the index or overwrite
+ /// the existing one; <c>false</c> to append to the existing
+ /// index
+ /// </param>
+ /// <param name="mfl">Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
+ /// via the MaxFieldLength constructor.
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> LockObtainFailedException if another writer </throws>
+ /// <summary> has this index open (<c>write.lock</c> could not
+ /// be obtained)
+ /// </summary>
+ /// <throws> IOException if the directory cannot be read/written to, or </throws>
+ /// <summary> if it does not exist and <c>create</c> is
+ /// <c>false</c> or if there is any other low-level
+ /// IO error
+ /// </summary>
+ public IndexWriter(Directory d, Analyzer a, bool create, MaxFieldLength mfl)
+ {
+ InitBlock();
+ Init(d, a, create, null, mfl.Limit, null, null);
+ }
+
+ /// <summary> Constructs an IndexWriter for the index in
+ /// <c>d</c>, first creating it if it does not
+ /// already exist.
+ ///
+ /// </summary>
+ /// <param name="d">the index directory
+ /// </param>
+ /// <param name="a">the analyzer to use
+ /// </param>
+ /// <param name="mfl">Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
+ /// via the MaxFieldLength constructor.
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> LockObtainFailedException if another writer </throws>
+ /// <summary> has this index open (<c>write.lock</c> could not
+ /// be obtained)
+ /// </summary>
+ /// <throws> IOException if the directory cannot be </throws>
+ /// <summary> read/written to or if there is any other low-level
+ /// IO error
+ /// </summary>
+ public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
+ {
+ InitBlock();
+ Init(d, a, null, mfl.Limit, null, null);
+ }
+
+ /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" />
+ ///, for the index in <c>d</c>,
+ /// first creating it if it does not already exist. Text
+ /// will be analyzed with <c>a</c>.
+ ///
+ /// </summary>
+ /// <param name="d">the index directory
+ /// </param>
+ /// <param name="a">the analyzer to use
+ /// </param>
+ /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
+ /// </param>
+ /// <param name="mfl">whether or not to limit field lengths
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> LockObtainFailedException if another writer </throws>
+ /// <summary> has this index open (<c>write.lock</c> could not
+ /// be obtained)
+ /// </summary>
+ /// <throws> IOException if the directory cannot be </throws>
+ /// <summary> read/written to or if there is any other low-level
+ /// IO error
+ /// </summary>
+ public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
+ {
+ InitBlock();
+ Init(d, a, deletionPolicy, mfl.Limit, null, null);
+ }
+
+ /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" />
+ ///, for the index in <c>d</c>.
+ /// Text will be analyzed with <c>a</c>. If
+ /// <c>create</c> is true, then a new, empty index
+ /// will be created in <c>d</c>, replacing the index
+ /// already there, if any.
+ ///
+ /// </summary>
+ /// <param name="d">the index directory
+ /// </param>
+ /// <param name="a">the analyzer to use
+ /// </param>
+ /// <param name="create"><c>true</c> to create the index or overwrite
+ /// the existing one; <c>false</c> to append to the existing
+ /// index
+ /// </param>
+ /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
+ /// </param>
+ /// <param name="mfl"><see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />, whether or not to limit field lengths. Value is in number of terms/tokens
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> LockObtainFailedException if another writer </throws>
+ /// <summary> has this index open (<c>write.lock</c> could not
+ /// be obtained)
+ /// </summary>
+ /// <throws> IOException if the directory cannot be read/written to, or </throws>
+ /// <summary> if it does not exist and <c>create</c> is
+ /// <c>false</c> or if there is any other low-level
+ /// IO error
+ /// </summary>
+ public IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
+ {
+ InitBlock();
+ Init(d, a, create, deletionPolicy, mfl.Limit, null, null);
+ }
+
+ /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" />
+ /// and <see cref="IndexingChain" />,
+ /// for the index in <c>d</c>.
+ /// Text will be analyzed with <c>a</c>. If
+ /// <c>create</c> is true, then a new, empty index
+ /// will be created in <c>d</c>, replacing the index
+ /// already there, if any.
+ ///
+ /// </summary>
+ /// <param name="d">the index directory
+ /// </param>
+ /// <param name="a">the analyzer to use
+ /// </param>
+ /// <param name="create"><c>true</c> to create the index or overwrite
+ /// the existing one; <c>false</c> to append to the existing
+ /// index
+ /// </param>
+ /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
+ /// </param>
+ /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />.
+ /// </param>
+ /// <param name="indexingChain">the <see cref="DocConsumer" /> chain to be used to
+ /// process documents
+ /// </param>
+ /// <param name="commit">which commit to open
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> LockObtainFailedException if another writer </throws>
+ /// <summary> has this index open (<c>write.lock</c> could not
+ /// be obtained)
+ /// </summary>
+ /// <throws> IOException if the directory cannot be read/written to, or </throws>
+ /// <summary> if it does not exist and <c>create</c> is
+ /// <c>false</c> or if there is any other low-level
+ /// IO error
+ /// </summary>
+ internal IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit)
+ {
+ InitBlock();
+ Init(d, a, create, deletionPolicy, mfl.Limit, indexingChain, commit);
+ }
+
+ /// <summary> Expert: constructs an IndexWriter on specific commit
+ /// point, with a custom <see cref="IndexDeletionPolicy" />, for
+ /// the index in <c>d</c>. Text will be analyzed
+ /// with <c>a</c>.
+ ///
+ /// <p/> This is only meaningful if you've used a <see cref="IndexDeletionPolicy" />
+ /// in that past that keeps more than
+ /// just the last commit.
+ ///
+ /// <p/>This operation is similar to <see cref="Rollback()" />,
+ /// except that method can only rollback what's been done
+ /// with the current instance of IndexWriter since its last
+ /// commit, whereas this method can rollback to an
+ /// arbitrary commit point from the past, assuming the
+ /// <see cref="IndexDeletionPolicy" /> has preserved past
+ /// commits.
+ ///
+ /// </summary>
+ /// <param name="d">the index directory
+ /// </param>
+ /// <param name="a">the analyzer to use
+ /// </param>
+ /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
+ /// </param>
+ /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />.
+ /// </param>
+ /// <param name="commit">which commit to open
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> LockObtainFailedException if another writer </throws>
+ /// <summary> has this index open (<c>write.lock</c> could not
+ /// be obtained)
+ /// </summary>
+ /// <throws> IOException if the directory cannot be read/written to, or </throws>
+ /// <summary> if it does not exist and <c>create</c> is
+ /// <c>false</c> or if there is any other low-level
+ /// IO error
+ /// </summary>
+ public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
+ {
+ InitBlock();
+ Init(d, a, false, deletionPolicy, mfl.Limit, null, commit);
+ }
+
+ private void Init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
+ {
+ if (IndexReader.IndexExists(d))
+ {
+ Init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit);
+ }
+ else
+ {
+ Init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit);
+ }
+ }
+
+ private void Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
+ {
+ directory = d;
+ analyzer = a;
+ SetMessageID(defaultInfoStream);
+ this.maxFieldLength = maxFieldLength;
+
+ if (indexingChain == null)
+ indexingChain = DocumentsWriter.DefaultIndexingChain;
+
+ if (create)
+ {
+ // Clear the write lock in case it's leftover:
+ directory.ClearLock(WRITE_LOCK_NAME);
+ }
+
+ Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME);
+ if (!writeLock.Obtain(writeLockTimeout))
+ // obtain write lock
+ {
+ throw new LockObtainFailedException("Index locked for write: " + writeLock);
+ }
+ this.writeLock = writeLock; // save it
+
+ bool success = false;
+ try
+ {
+ if (create)
+ {
+ // Try to read first. This is to allow create
+ // against an index that's currently open for
+ // searching. In this case we write the next
+ // segments_N file with no segments:
+ bool doCommit;
+ try
+ {
+ segmentInfos.Read(directory);
+ segmentInfos.Clear();
+ doCommit = false;
+ }
+ catch (System.IO.IOException)
+ {
+ // Likely this means it's a fresh directory
+ doCommit = true;
+ }
+
+ if (doCommit)
+ {
+ // Only commit if there is no segments file
+ // in this dir already.
+ segmentInfos.Commit(directory);
+ synced.UnionWith(segmentInfos.Files(directory, true));
+ }
+ else
+ {
+ // Record that we have a change (zero out all
+ // segments) pending:
+ changeCount++;
+ }
+ }
+ else
+ {
+ segmentInfos.Read(directory);
+
+ if (commit != null)
+ {
+ // Swap out all segments, but, keep metadata in
+ // SegmentInfos, like version & generation, to
+ // preserve write-once. This is important if
+ // readers are open against the future commit
+ // points.
+ if (commit.Directory != directory)
+ throw new System.ArgumentException("IndexCommit's directory doesn't match my directory");
+ SegmentInfos oldInfos = new SegmentInfos();
+ oldInfos.Read(directory, commit.SegmentsFileName);
+ segmentInfos.Replace(oldInfos);
+ changeCount++;
+ if (infoStream != null)
+ Message("init: loaded commit \"" + commit.SegmentsFileName + "\"");
+ }
+
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ synced.UnionWith(segmentInfos.Files(directory, true));
+ }
+
+ SetRollbackSegmentInfos(segmentInfos);
+
+ docWriter = new DocumentsWriter(directory, this, indexingChain);
+ docWriter.SetInfoStream(infoStream);
+ docWriter.SetMaxFieldLength(maxFieldLength);
+
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter, synced);
+
+ if (deleter.startingCommitDeleted)
+ // Deletion policy deleted the "head" commit point.
+ // We have to mark ourself as changed so that if we
+ // are closed w/o any further changes we write a new
+ // segments_N file.
+ changeCount++;
+
+ PushMaxBufferedDocs();
+
+ if (infoStream != null)
+ {
+ Message("init: create=" + create);
+ MessageState();
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ if (infoStream != null)
+ {
+ Message("init: hit exception on init; releasing write lock");
+ }
+ try
+ {
+ writeLock.Release();
+ }
+ catch (Exception)
+ {
+ // don't mask the original exception
+ }
+ writeLock = null;
+ }
+ }
+ }
+
+ private void SetRollbackSegmentInfos(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ rollbackSegmentInfos = (SegmentInfos) infos.Clone();
+ System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory));
+ rollbackSegments = new HashMap<SegmentInfo, int?>();
+ int size = rollbackSegmentInfos.Count;
+ for (int i = 0; i < size; i++)
+ rollbackSegments[rollbackSegmentInfos.Info(i)] = i;
+ }
+ }
+
+ /// <summary> Expert: set the merge policy used by this writer.</summary>
+ public virtual void SetMergePolicy(MergePolicy mp)
+ {
+ EnsureOpen();
+ if (mp == null)
+ throw new System.NullReferenceException("MergePolicy must be non-null");
+
+ if (mergePolicy != mp)
+ mergePolicy.Close();
+ mergePolicy = mp;
+ PushMaxBufferedDocs();
+ if (infoStream != null)
+ {
+ Message("setMergePolicy " + mp);
+ }
+ }
+
+ /// <summary> Expert: returns the current MergePolicy in use by this writer.</summary>
+ /// <seealso cref="SetMergePolicy">
+ /// </seealso>
+ public virtual MergePolicy MergePolicy
+ {
+ get
+ {
+ EnsureOpen();
+ return mergePolicy;
+ }
+ }
+
+ /// <summary> Expert: set the merge scheduler used by this writer.</summary>
+ public virtual void SetMergeScheduler(MergeScheduler mergeScheduler)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ if (mergeScheduler == null)
+ throw new System.NullReferenceException("MergeScheduler must be non-null");
+
+ if (this.mergeScheduler != mergeScheduler)
+ {
+ FinishMerges(true);
+ this.mergeScheduler.Close();
+ }
+ this.mergeScheduler = mergeScheduler;
+ if (infoStream != null)
+ {
+ Message("setMergeScheduler " + mergeScheduler);
+ }
+ }
+ }
+
+ /// <summary> Expert: returns the current MergePolicy in use by this
+ /// writer.
+ /// </summary>
+ /// <seealso cref="SetMergePolicy">
+ /// </seealso>
+ public virtual MergeScheduler MergeScheduler
+ {
+ get
+ {
+ EnsureOpen();
+ return mergeScheduler;
+ }
+ }
+
+ /// <summary> <p/>Gets or sets the largest segment (measured by document
+ /// count) that may be merged with other segments.
+ /// <p/>
+ /// Small values (e.g., less than 10,000) are best for
+ /// interactive indexing, as this limits the length of
+ /// pauses while indexing to a few seconds. Larger values
+ /// are best for batched indexing and speedier
+ /// searches.
+ /// <p/>
+ /// The default value is <see cref="int.MaxValue" />.
+ /// <p/>
+ /// Note that this method is a convenience method: it
+ /// just calls mergePolicy.getMaxMergeDocs as long as
+ /// mergePolicy is an instance of <see cref="LogMergePolicy" />.
+ /// Otherwise an IllegalArgumentException is thrown.<p/>
+ ///
+ /// The default merge policy (<see cref="LogByteSizeMergePolicy" />)
+ /// also allows you to set this
+ /// limit by net size (in MB) of the segment, using
+ /// <see cref="LogByteSizeMergePolicy.MaxMergeMB" />.<p/>
+ /// </summary>
+ /// <seealso cref="MaxMergeDocs">
+ /// </seealso>
+ public virtual int MaxMergeDocs
+ {
+ get { return LogMergePolicy.MaxMergeDocs; }
+ set { LogMergePolicy.MaxMergeDocs = value; }
+ }
+
+ /// <summary> The maximum number of terms that will be indexed for a single field in a
+ /// document. This limits the amount of memory required for indexing, so that
+ /// collections with very large files will not crash the indexing process by
+ /// running out of memory. This setting refers to the number of running terms,
+ /// not to the number of different terms.<p/>
+ /// <strong>Note:</strong> this silently truncates large documents, excluding from the
+ /// index all terms that occur further in the document. If you know your source
+ /// documents are large, be sure to set this value high enough to accomodate
+ /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
+ /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
+ /// By default, no more than <see cref="DEFAULT_MAX_FIELD_LENGTH" /> terms
+ /// will be indexed for a field.
+ /// </summary>
+ public virtual void SetMaxFieldLength(int maxFieldLength)
+ {
+ EnsureOpen();
+ this.maxFieldLength = maxFieldLength;
+ docWriter.SetMaxFieldLength(maxFieldLength);
+ if (infoStream != null)
+ Message("setMaxFieldLength " + maxFieldLength);
+ }
+
+ /// <summary> Returns the maximum number of terms that will be
+ /// indexed for a single field in a document.
+ /// </summary>
+ /// <seealso cref="SetMaxFieldLength">
+ /// </seealso>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual int GetMaxFieldLength()
+ {
+ EnsureOpen();
+ return maxFieldLength;
+ }
+
+ /// Gets or sets the termsIndexDivisor passed to any readers that
+ /// IndexWriter opens, for example when applying deletes
+ /// or creating a near-real-time reader in
+ /// <see cref="GetReader()"/>. Default value is
+ /// <see cref="IndexReader.DEFAULT_TERMS_INDEX_DIVISOR"/>.
+ public int ReaderTermsIndexDivisor
+ {
+ get
+ {
+ EnsureOpen();
+ return readerTermsIndexDivisor;
+ }
+ set
+ {
+ EnsureOpen();
+ if (value <= 0)
+ {
+ throw new ArgumentException("divisor must be >= 1 (got " + value + ")");
+ }
+ readerTermsIndexDivisor = value;
+ if (infoStream != null)
+ {
+ Message("setReaderTermsIndexDivisor " + readerTermsIndexDivisor);
+ }
+ }
+ }
+
+ /// <summary>Determines the minimal number of documents required
+ /// before the buffered in-memory documents are flushed as
+ /// a new Segment. Large values generally gives faster
+ /// indexing.
+ ///
+ /// <p/>When this is set, the writer will flush every
+ /// maxBufferedDocs added documents. Pass in <see cref="DISABLE_AUTO_FLUSH" />
+ /// to prevent triggering a flush due
+ /// to number of buffered documents. Note that if flushing
+ /// by RAM usage is also enabled, then the flush will be
+ /// triggered by whichever comes first.<p/>
+ ///
+ /// <p/>Disabled by default (writer flushes by RAM usage).<p/>
+ ///
+ /// </summary>
+ /// <throws> IllegalArgumentException if maxBufferedDocs is </throws>
+ /// <summary> enabled but smaller than 2, or it disables maxBufferedDocs
+ /// when ramBufferSize is already disabled
+ /// </summary>
+ /// <seealso cref="SetRAMBufferSizeMB">
+ /// </seealso>
+ public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
+ {
+ EnsureOpen();
+ if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
+ throw new ArgumentException("maxBufferedDocs must at least be 2 when enabled");
+
+ if (maxBufferedDocs == DISABLE_AUTO_FLUSH && (int)GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
+ throw new ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
+
+ docWriter.MaxBufferedDocs = maxBufferedDocs;
+ PushMaxBufferedDocs();
+ if (infoStream != null)
+ Message("setMaxBufferedDocs " + maxBufferedDocs);
+ }
+
+ /// <summary> If we are flushing by doc count (not by RAM usage), and
+ /// using LogDocMergePolicy then push maxBufferedDocs down
+ /// as its minMergeDocs, to keep backwards compatibility.
+ /// </summary>
+ private void PushMaxBufferedDocs()
+ {
+ if (docWriter.MaxBufferedDocs != DISABLE_AUTO_FLUSH)
+ {
+ MergePolicy mp = mergePolicy;
+ if (mp is LogDocMergePolicy)
+ {
+ LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
+ int maxBufferedDocs = docWriter.MaxBufferedDocs;
+ if (lmp.MinMergeDocs != maxBufferedDocs)
+ {
+ if (infoStream != null)
+ Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
+ lmp.MinMergeDocs = maxBufferedDocs;
+ }
+ }
+ }
+ }
+
+ /// <summary> Returns the number of buffered added documents that will
+ /// trigger a flush if enabled.
+ /// </summary>
+ /// <seealso cref="SetMaxBufferedDocs">
+ /// </seealso>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual int GetMaxBufferedDocs()
+ {
+ EnsureOpen();
+ return docWriter.MaxBufferedDocs;
+ }
+
+ /// <summary>Determines the amount of RAM that may be used for
+ /// buffering added documents and deletions before they are
+ /// flushed to the Directory. Generally for faster
+ /// indexing performance it's best to flush by RAM usage
+ /// instead of document count and use as large a RAM buffer
+ /// as you can.
+ ///
+ /// <p/>When this is set, the writer will flush whenever
+ /// buffered documents and deletions use this much RAM.
+ /// Pass in <see cref="DISABLE_AUTO_FLUSH" /> to prevent
+ /// triggering a flush due to RAM usage. Note that if
+ /// flushing by document count is also enabled, then the
+ /// flush will be triggered by whichever comes first.<p/>
+ ///
+ /// <p/> <b>NOTE</b>: the account of RAM usage for pending
+ /// deletions is only approximate. Specifically, if you
+ /// delete by Query, Lucene currently has no way to measure
+ /// the RAM usage if individual Queries so the accounting
+ /// will under-estimate and you should compensate by either
+ /// calling commit() periodically yourself, or by using
+ /// <see cref="SetMaxBufferedDeleteTerms" /> to flush by count
+ /// instead of RAM usage (each buffered delete Query counts
+ /// as one).
+ ///
+ /// <p/>
+ /// <b>NOTE</b>: because IndexWriter uses <c>int</c>s when managing its
+ /// internal storage, the absolute maximum value for this setting is somewhat
+ /// less than 2048 MB. The precise limit depends on various factors, such as
+ /// how large your documents are, how many fields have norms, etc., so it's
+ /// best to set this value comfortably under 2048.
+ /// <p/>
+ ///
+ /// <p/> The default value is <see cref="DEFAULT_RAM_BUFFER_SIZE_MB" />.<p/>
+ ///
+ /// </summary>
+ /// <throws> IllegalArgumentException if ramBufferSize is </throws>
+ /// <summary> enabled but non-positive, or it disables ramBufferSize
+ /// when maxBufferedDocs is already disabled
+ /// </summary>
+ public virtual void SetRAMBufferSizeMB(double mb)
+ {
+ if (mb > 2048.0)
+ {
+ throw new System.ArgumentException("ramBufferSize " + mb + " is too large; should be comfortably less than 2048");
+ }
+ if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
+ throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled");
+ if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
+ throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
+ docWriter.SetRAMBufferSizeMB(mb);
+ if (infoStream != null)
+ Message("setRAMBufferSizeMB " + mb);
+ }
+
+ /// <summary> Returns the value set by <see cref="SetRAMBufferSizeMB" /> if enabled.</summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual double GetRAMBufferSizeMB()
+ {
+ return docWriter.GetRAMBufferSizeMB();
+ }
+
+ /// <summary> <p/>Determines the minimal number of delete terms required before the buffered
+ /// in-memory delete terms are applied and flushed. If there are documents
+ /// buffered in memory at the time, they are merged and a new segment is
+ /// created.<p/>
+ /// <p/>Disabled by default (writer flushes by RAM usage).<p/>
+ ///
+ /// </summary>
+ /// <throws> IllegalArgumentException if maxBufferedDeleteTerms </throws>
+ /// <summary> is enabled but smaller than 1
+ /// </summary>
+ /// <seealso cref="SetRAMBufferSizeMB">
+ /// </seealso>
+ public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)
+ {
+ EnsureOpen();
+ if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1)
+ throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled");
+ docWriter.MaxBufferedDeleteTerms = maxBufferedDeleteTerms;
+ if (infoStream != null)
+ Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
+ }
+
+ /// <summary> Returns the number of buffered deleted terms that will
+ /// trigger a flush if enabled.
+ /// </summary>
+ /// <seealso cref="SetMaxBufferedDeleteTerms">
+ /// </seealso>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual int GetMaxBufferedDeleteTerms()
+ {
+ EnsureOpen();
+ return docWriter.MaxBufferedDeleteTerms;
+ }
+
+ /// <summary>Gets or sets the number of segments that are merged at
+ /// once and also controls the total number of segments
+ /// allowed to accumulate in the index.
+ /// <p/>Determines how often segment indices are merged by addDocument(). With
+ /// smaller values, less RAM is used while indexing, and searches on
+ /// unoptimized indices are faster, but indexing speed is slower. With larger
+ /// values, more RAM is used during indexing, and while searches on unoptimized
+ /// indices are slower, indexing is faster. Thus larger values (> 10) are best
+ /// for batch index creation, and smaller values (&lt; 10) for indices that are
+ /// interactively maintained.
+ ///
+ /// <p/>Note that this method is a convenience method: it
+ /// just calls mergePolicy.setMergeFactor as long as
+ /// mergePolicy is an instance of <see cref="LogMergePolicy" />.
+ /// Otherwise an IllegalArgumentException is thrown.<p/>
+ ///
+ /// <p/>This must never be less than 2. The default value is 10.
+ /// </summary>
+ public virtual int MergeFactor
+ {
+ set { LogMergePolicy.MergeFactor = value; }
+ get { return LogMergePolicy.MergeFactor; }
+ }
+
+ /// <summary>Gets or sets the default info stream.
+ /// If non-null, this will be the default infoStream used
+ /// by a newly instantiated IndexWriter.
+ /// </summary>
+ /// <seealso cref="SetInfoStream">
+ /// </seealso>
+ public static StreamWriter DefaultInfoStream
+ {
+ set { IndexWriter.defaultInfoStream = value; }
+ get { return IndexWriter.defaultInfoStream; }
+ }
+
+ /// <summary>If non-null, information about merges, deletes and a
+ /// message when maxFieldLength is reached will be printed
+ /// to this.
+ /// </summary>
+ public virtual void SetInfoStream(System.IO.StreamWriter infoStream)
+ {
+ EnsureOpen();
+ SetMessageID(infoStream);
+ docWriter.SetInfoStream(infoStream);
+ deleter.SetInfoStream(infoStream);
+ if (infoStream != null)
+ MessageState();
+ }
+
+ private void MessageState()
+ {
+ Message("setInfoStream: dir=" + directory +
+ " mergePolicy=" + mergePolicy +
+ " mergeScheduler=" + mergeScheduler +
+ " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() +
+ " maxBufferedDocs=" + docWriter.MaxBufferedDocs +
+ " maxBuffereDeleteTerms=" + docWriter.MaxBufferedDeleteTerms +
+ " maxFieldLength=" + maxFieldLength +
+ " index=" + SegString());
+ }
+
+ /// <summary> Returns the current infoStream in use by this writer.</summary>
+ /// <seealso cref="SetInfoStream">
+ /// </seealso>
+ public virtual StreamWriter InfoStream
+ {
+ get
+ {
+ EnsureOpen();
+ return infoStream;
+ }
+ }
+
+ /// <summary>Returns true if verbosing is enabled (i.e., infoStream != null). </summary>
+ public virtual bool Verbose
+ {
+ get { return infoStream != null; }
+ }
+
+ /// <summary>Gets or sets allowed timeout when acquiring the write lock.</summary>
+ public virtual long WriteLockTimeout
+ {
+ get
+ {
+ EnsureOpen();
+ return writeLockTimeout;
+ }
+ set
+ {
+ EnsureOpen();
+ this.writeLockTimeout = value;
+ }
+ }
+
+ /// <summary> Gets or sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
+ /// milliseconds).
+ /// </summary>
+ public static long DefaultWriteLockTimeout
+ {
+ set { IndexWriter.WRITE_LOCK_TIMEOUT = value; }
+ get { return IndexWriter.WRITE_LOCK_TIMEOUT; }
+ }
+
+ /// <summary> Commits all changes to an index and closes all
+ /// associated files. Note that this may be a costly
+ /// operation, so, try to re-use a single writer instead of
+ /// closing and opening a new one. See <see cref="Commit()" /> for
+ /// caveats about write caching done by some IO devices.
+ ///
+ /// <p/> If an Exception is hit during close, eg due to disk
+ /// full or some other reason, then both the on-disk index
+ /// and the internal state of the IndexWriter instance will
+ /// be consistent. However, the close will not be complete
+ /// even though part of it (flushing buffered documents)
+ /// may have succeeded, so the write lock will still be
+ /// held.<p/>
+ ///
+ /// <p/> If you can correct the underlying cause (eg free up
+ /// some disk space) then you can call close() again.
+ /// Failing that, if you want to force the write lock to be
+ /// released (dangerous, because you may then lose buffered
+ /// docs in the IndexWriter instance) then you can do
+ /// something like this:<p/>
+ ///
+ /// <code>
+ /// try {
+ /// writer.close();
+ /// } finally {
+ /// if (IndexWriter.isLocked(directory)) {
+ /// IndexWriter.unlock(directory);
+ /// }
+ /// }
+ /// </code>
+ ///
+ /// after which, you must be certain not to use the writer
+ /// instance anymore.<p/>
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer, again. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose(true);
+ }
+
+ /// <summary> Commits all changes to an index and closes all
+ /// associated files. Note that this may be a costly
+ /// operation, so, try to re-use a single writer instead of
+ /// closing and opening a new one. See <see cref="Commit()" /> for
+ /// caveats about write caching done by some IO devices.
+ ///
+ /// <p/> If an Exception is hit during close, eg due to disk
+ /// full or some other reason, then both the on-disk index
+ /// and the internal state of the IndexWriter instance will
+ /// be consistent. However, the close will not be complete
+ /// even though part of it (flushing buffered documents)
+ /// may have succeeded, so the write lock will still be
+ /// held.<p/>
+ ///
+ /// <p/> If you can correct the underlying cause (eg free up
+ /// some disk space) then you can call close() again.
+ /// Failing that, if you want to force the write lock to be
+ /// released (dangerous, because you may then lose buffered
+ /// docs in the IndexWriter instance) then you can do
+ /// something like this:<p/>
+ ///
+ /// <code>
+ /// try {
+ /// writer.close();
+ /// } finally {
+ /// if (IndexWriter.isLocked(directory)) {
+ /// IndexWriter.unlock(directory);
+ /// }
+ /// }
+ /// </code>
+ ///
+ /// after which, you must be certain not to use the writer
+ /// instance anymore.<p/>
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer, again. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void Dispose()
+ {
+ Dispose(true);
+ }
+
+ /// <summary> Closes the index with or without waiting for currently
+ /// running merges to finish. This is only meaningful when
+ /// using a MergeScheduler that runs merges in background
+ /// threads.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer, again. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// <p/><b>NOTE</b>: it is dangerous to always call
+ /// close(false), especially when IndexWriter is not open
+ /// for very long, because this can result in "merge
+ /// starvation" whereby long merges will never have a
+ /// chance to finish. This will cause too many segments in
+ /// your index over time.<p/>
+ ///
+ /// </summary>
+ /// <param name="waitForMerges">if true, this call will block
+ /// until all merges complete; else, it will ask all
+ /// running merges to abort, wait until those merges have
+ /// finished (which should be at most a few seconds), and
+ /// then return.
+ /// </param>
+ public virtual void Dispose(bool waitForMerges)
+ {
+ Dispose(true, waitForMerges);
+ }
+
+ protected virtual void Dispose(bool disposing, bool waitForMerges)
+ {
+ if (disposing)
+ {
+ // Ensure that only one thread actually gets to do the closing:
+ if (ShouldClose())
+ {
+ // If any methods have hit OutOfMemoryError, then abort
+ // on close, in case the internal state of IndexWriter
+ // or DocumentsWriter is corrupt
+ if (hitOOM)
+ RollbackInternal();
+ else
+ CloseInternal(waitForMerges);
+ }
+ }
+ }
+
+ /// <summary> Closes the index with or without waiting for currently
+ /// running merges to finish. This is only meaningful when
+ /// using a MergeScheduler that runs merges in background
+ /// threads.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer, again. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// <p/><b>NOTE</b>: it is dangerous to always call
+ /// close(false), especially when IndexWriter is not open
+ /// for very long, because this can result in "merge
+ /// starvation" whereby long merges will never have a
+ /// chance to finish. This will cause too many segments in
+ /// your index over time.<p/>
+ ///
+ /// </summary>
+ /// <param name="waitForMerges">if true, this call will block
+ /// until all merges complete; else, it will ask all
+ /// running merges to abort, wait until those merges have
+ /// finished (which should be at most a few seconds), and
+ /// then return.
+ /// </param>
+ [Obsolete("Use Dispose(bool) instead")]
+ public virtual void Close(bool waitForMerges)
+ {
+ Dispose(waitForMerges);
+ }
+
+ // Returns true if this thread should attempt to close, or
+ // false if IndexWriter is now closed; else, waits until
+ // another thread finishes closing
+ private bool ShouldClose()
+ {
+ lock (this)
+ {
+ while (true)
+ {
+ if (!closed)
+ {
+ if (!closing)
+ {
+ closing = true;
+ return true;
+ }
+ else
+ {
+ // Another thread is presently trying to close;
+ // wait until it finishes one way (closes
+ // successfully) or another (fails to close)
+ DoWait();
+ }
+ }
+ else
+ return false;
+ }
+ }
+ }
+
+ private void CloseInternal(bool waitForMerges)
+ {
+
+ docWriter.PauseAllThreads();
+
+ try
+ {
+ if (infoStream != null)
+ Message("now flush at close");
+
+ docWriter.Dispose();
+
+ // Only allow a new merge to be triggered if we are
+ // going to wait for merges:
+ if (!hitOOM)
+ {
+ Flush(waitForMerges, true, true);
+ }
+
+ if (waitForMerges)
+ // Give merge scheduler last chance to run, in case
+ // any pending merges are waiting:
+ mergeScheduler.Merge(this);
+
+ mergePolicy.Close();
+
+ FinishMerges(waitForMerges);
+ stopMerges = true;
+
+ mergeScheduler.Close();
+
+ if (infoStream != null)
+ Message("now call final commit()");
+
+ if (!hitOOM)
+ {
+ Commit(0);
+ }
+
+ if (infoStream != null)
+ Message("at close: " + SegString());
+
+ lock (this)
+ {
+ readerPool.Dispose();
+ docWriter = null;
+ deleter.Dispose();
+ }
+
+ if (writeLock != null)
+ {
+ writeLock.Release(); // release write lock
+ writeLock = null;
+ }
+ lock (this)
+ {
+ closed = true;
+ }
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "closeInternal");
+ }
+ finally
+ {
+ lock (this)
+ {
+ closing = false;
+ System.Threading.Monitor.PulseAll(this);
+ if (!closed)
+ {
+ if (docWriter != null)
+ docWriter.ResumeAllThreads();
+ if (infoStream != null)
+ Message("hit exception while closing");
+ }
+ }
+ }
+ }
+
+ /// <summary>Tells the docWriter to close its currently open shared
+ /// doc stores (stored fields &amp; vectors files).
+ /// Return value specifices whether new doc store files are compound or not.
+ /// </summary>
+ private bool FlushDocStores()
+ {
+ lock (this)
+ {
+ if (infoStream != null)
+ {
+ Message("flushDocStores segment=" + docWriter.DocStoreSegment);
+ }
+
+ bool useCompoundDocStore = false;
+ if (infoStream != null)
+ {
+ Message("closeDocStores segment=" + docWriter.DocStoreSegment);
+ }
+
+ System.String docStoreSegment;
+
+ bool success = false;
+ try
+ {
+ docStoreSegment = docWriter.CloseDocStore();
+ success = true;
+ }
+ finally
+ {
+ if (!success && infoStream != null)
+ {
+ Message("hit exception closing doc store segment");
+ }
+ }
+
+ if (infoStream != null)
+ {
+ Message("flushDocStores files=" + docWriter.ClosedFiles());
+ }
+
+ useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos);
+
+ if (useCompoundDocStore && docStoreSegment != null && docWriter.ClosedFiles().Count != 0)
+ {
+ // Now build compound doc store file
+
+ if (infoStream != null)
+ {
+ Message("create compound file " + docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
+ }
+
+ success = false;
+
+ int numSegments = segmentInfos.Count;
+ System.String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
+
+ try
+ {
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
+ foreach(string file in docWriter.closedFiles)
+ {
+ cfsWriter.AddFile(file);
+ }
+
+ // Perform the merge
+ cfsWriter.Close();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ if (infoStream != null)
+ Message("hit exception building compound file doc store for segment " + docStoreSegment);
+ deleter.DeleteFile(compoundFileName);
+ docWriter.Abort();
+ }
+ }
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo si = segmentInfos.Info(i);
+ if (si.DocStoreOffset != - 1 && si.DocStoreSegment.Equals(docStoreSegment))
+ si.DocStoreIsCompoundFile = true;
+ }
+
+ Checkpoint();
+
+ // In case the files we just merged into a CFS were
+ // not previously checkpointed:
+ deleter.DeleteNewFiles(docWriter.ClosedFiles());
+ }
+
+ return useCompoundDocStore;
+ }
+ }
+
+ /// <summary>Returns the Directory used by this index. </summary>
+ public virtual Directory Directory
+ {
+ get
+ {
+ // Pass false because the flush during closing calls getDirectory
+ EnsureOpen(false);
+ return directory;
+ }
+ }
+
+ /// <summary>Returns the analyzer used by this index. </summary>
+ public virtual Analyzer Analyzer
+ {
+ get
+ {
+ EnsureOpen();
+ return analyzer;
+ }
+ }
+
+ /// <summary>Returns total number of docs in this index, including
+ /// docs not yet flushed (still in the RAM buffer),
+ /// not counting deletions.
+ /// </summary>
+ /// <seealso cref="NumDocs">
+ /// </seealso>
+ public virtual int MaxDoc()
+ {
+ lock (this)
+ {
+ int count;
+ if (docWriter != null)
+ count = docWriter.NumDocsInRAM;
+ else
+ count = 0;
+
+ for (int i = 0; i < segmentInfos.Count; i++)
+ count += segmentInfos.Info(i).docCount;
+ return count;
+ }
+ }
+
+ /// <summary>Returns total number of docs in this index, including
+ /// docs not yet flushed (still in the RAM buffer), and
+ /// including deletions. <b>NOTE:</b> buffered deletions
+ /// are not counted. If you really need these to be
+ /// counted you should call <see cref="Commit()" /> first.
+ /// </summary>
+ /// <seealso cref="NumDocs">
+ /// </seealso>
+ public virtual int NumDocs()
+ {
+ lock (this)
+ {
+ int count;
+ if (docWriter != null)
+ count = docWriter.NumDocsInRAM;
+ else
+ count = 0;
+
+ for (int i = 0; i < segmentInfos.Count; i++)
+ {
+ SegmentInfo info = segmentInfos.Info(i);
+ count += info.docCount - info.GetDelCount();
+ }
+ return count;
+ }
+ }
+
+ public virtual bool HasDeletions()
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ if (docWriter.HasDeletes())
+ return true;
+ for (int i = 0; i < segmentInfos.Count; i++)
+ if (segmentInfos.Info(i).HasDeletions())
+ return true;
+ return false;
+ }
+ }
+
+ /// <summary> The maximum number of terms that will be indexed for a single field in a
+ /// document. This limits the amount of memory required for indexing, so that
+ /// collections with very large files will not crash the indexing process by
+ /// running out of memory.<p/>
+ /// Note that this effectively truncates large documents, excluding from the
+ /// index terms that occur further in the document. If you know your source
+ /// documents are large, be sure to set this value high enough to accomodate
+ /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
+ /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
+ /// By default, no more than 10,000 terms will be indexed for a field.
+ ///
+ /// </summary>
+ /// <seealso cref="MaxFieldLength">
+ /// </seealso>
+ private int maxFieldLength;
+
+ /// <summary> Adds a document to this index. If the document contains more than
+ /// <see cref="SetMaxFieldLength(int)" /> terms for a given field, the remainder are
+ /// discarded.
+ ///
+ /// <p/> Note that if an Exception is hit (for example disk full)
+ /// then the index will be consistent, but this document
+ /// may not have been added. Furthermore, it's possible
+ /// the index will have one segment in non-compound format
+ /// even when using compound files (when a merge has
+ /// partially succeeded).<p/>
+ ///
+ /// <p/> This method periodically flushes pending documents
+ /// to the Directory (see <a href="#flush">above</a>), and
+ /// also periodically triggers segment merges in the index
+ /// according to the <see cref="MergePolicy" /> in use.<p/>
+ ///
+ /// <p/>Merges temporarily consume space in the
+ /// directory. The amount of space required is up to 1X the
+ /// size of all segments being merged, when no
+ /// readers/searchers are open against the index, and up to
+ /// 2X the size of all segments being merged when
+ /// readers/searchers are open against the index (see
+ /// <see cref="Optimize()" /> for details). The sequence of
+ /// primitive merge operations performed is governed by the
+ /// merge policy.
+ ///
+ /// <p/>Note that each term in the document can be no longer
+ /// than 16383 characters, otherwise an
+ /// IllegalArgumentException will be thrown.<p/>
+ ///
+ /// <p/>Note that it's possible to create an invalid Unicode
+ /// string in java if a UTF16 surrogate pair is malformed.
+ /// In this case, the invalid characters are silently
+ /// replaced with the Unicode replacement character
+ /// U+FFFD.<p/>
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void AddDocument(Document doc)
+ {
+ AddDocument(doc, analyzer);
+ }
+
+ /// <summary> Adds a document to this index, using the provided analyzer instead of the
+ /// value of <see cref="Analyzer" />. If the document contains more than
+ /// <see cref="SetMaxFieldLength(int)" /> terms for a given field, the remainder are
+ /// discarded.
+ ///
+ /// <p/>See <see cref="AddDocument(Document)" /> for details on
+ /// index and IndexWriter state after an Exception, and
+ /// flushing/merging temporary free space requirements.<p/>
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void AddDocument(Document doc, Analyzer analyzer)
+ {
+ EnsureOpen();
+ bool doFlush = false;
+ bool success = false;
+ try
+ {
+ try
+ {
+ doFlush = docWriter.AddDocument(doc, analyzer);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+
+ if (infoStream != null)
+ Message("hit exception adding document");
+
+ lock (this)
+ {
+ // If docWriter has some aborted files that were
+ // never incref'd, then we clean them up here
+ if (docWriter != null)
+ {
+ ICollection<string> files = docWriter.AbortedFiles();
+ if (files != null)
+ deleter.DeleteNewFiles(files);
+ }
+ }
+ }
+ }
+ if (doFlush)
+ Flush(true, false, false);
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "addDocument");
+ }
+ }
+
+ /// <summary> Deletes the document(s) containing <c>term</c>.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="term">the term to identify the documents to be deleted
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void DeleteDocuments(Term term)
+ {
+ EnsureOpen();
+ try
+ {
+ bool doFlush = docWriter.BufferDeleteTerm(term);
+ if (doFlush)
+ Flush(true, false, false);
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "deleteDocuments(Term)");
+ }
+ }
+
+ /// <summary> Deletes the document(s) containing any of the
+ /// terms. All deletes are flushed at the same time.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="terms">array of terms to identify the documents
+ /// to be deleted
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void DeleteDocuments(params Term[] terms)
+ {
+ EnsureOpen();
+ try
+ {
+ bool doFlush = docWriter.BufferDeleteTerms(terms);
+ if (doFlush)
+ Flush(true, false, false);
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "deleteDocuments(params Term[])");
+ }
+ }
+
+ /// <summary> Deletes the document(s) matching the provided query.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="query">the query to identify the documents to be deleted
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void DeleteDocuments(Query query)
+ {
+ EnsureOpen();
+ bool doFlush = docWriter.BufferDeleteQuery(query);
+ if (doFlush)
+ Flush(true, false, false);
+ }
+
+ /// <summary> Deletes the document(s) matching any of the provided queries.
+ /// All deletes are flushed at the same time.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="queries">array of queries to identify the documents
+ /// to be deleted
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void DeleteDocuments(params Query[] queries)
+ {
+ EnsureOpen();
+ bool doFlush = docWriter.BufferDeleteQueries(queries);
+ if (doFlush)
+ Flush(true, false, false);
+ }
+
+ /// <summary> Updates a document by first deleting the document(s)
+ /// containing <c>term</c> and then adding the new
+ /// document. The delete and then add are atomic as seen
+ /// by a reader on the same index (flush may happen only after
+ /// the add).
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="term">the term to identify the document(s) to be
+ /// deleted
+ /// </param>
+ /// <param name="doc">the document to be added
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void UpdateDocument(Term term, Document doc)
+ {
+ EnsureOpen();
+ UpdateDocument(term, doc, Analyzer);
+ }
+
+ /// <summary> Updates a document by first deleting the document(s)
+ /// containing <c>term</c> and then adding the new
+ /// document. The delete and then add are atomic as seen
+ /// by a reader on the same index (flush may happen only after
+ /// the add).
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="term">the term to identify the document(s) to be
+ /// deleted
+ /// </param>
+ /// <param name="doc">the document to be added
+ /// </param>
+ /// <param name="analyzer">the analyzer to use when analyzing the document
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer)
+ {
+ EnsureOpen();
+ try
+ {
+ bool doFlush = false;
+ bool success = false;
+ try
+ {
+ doFlush = docWriter.UpdateDocument(term, doc, analyzer);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+
+ if (infoStream != null)
+ Message("hit exception updating document");
+
+ lock (this)
+ {
+ // If docWriter has some aborted files that were
+ // never incref'd, then we clean them up here
+ ICollection<string> files = docWriter.AbortedFiles();
+ if (files != null)
+ deleter.DeleteNewFiles(files);
+ }
+ }
+ }
+ if (doFlush)
+ Flush(true, false, false);
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "updateDocument");
+ }
+ }
+
+ // for test purpose
+ internal int GetSegmentCount()
+ {
+ lock (this)
+ {
+ return segmentInfos.Count;
+ }
+ }
+
+ // for test purpose
+ internal int GetNumBufferedDocuments()
+ {
+ lock (this)
+ {
+ return docWriter.NumDocsInRAM;
+ }
+ }
+
+ // for test purpose
+ public /*internal*/ int GetDocCount(int i)
+ {
+ lock (this)
+ {
+ if (i >= 0 && i < segmentInfos.Count)
+ {
+ return segmentInfos.Info(i).docCount;
+ }
+ else
+ {
+ return - 1;
+ }
+ }
+ }
+
+ // for test purpose
+ internal int GetFlushCount()
+ {
+ lock (this)
+ {
+ return flushCount;
+ }
+ }
+
+ // for test purpose
+ internal int GetFlushDeletesCount()
+ {
+ lock (this)
+ {
+ return flushDeletesCount;
+ }
+ }
+
+ internal System.String NewSegmentName()
+ {
+ // Cannot synchronize on IndexWriter because that causes
+ // deadlock
+ lock (segmentInfos)
+ {
+ // Important to increment changeCount so that the
+ // segmentInfos is written on close. Otherwise we
+ // could close, re-open and re-return the same segment
+ // name that was previously returned which can cause
+ // problems at least with ConcurrentMergeScheduler.
+ changeCount++;
+ return "_" + Number.ToString(segmentInfos.counter++);
+ }
+ }
+
+ /// <summary>If non-null, information about merges will be printed to this.</summary>
+ private System.IO.StreamWriter infoStream = null;
+ private static System.IO.StreamWriter defaultInfoStream = null;
+
+ /// <summary> Requests an "optimize" operation on an index, priming the index
+ /// for the fastest available search. Traditionally this has meant
+ /// merging all segments into a single segment as is done in the
+ /// default merge policy, but individaul merge policies may implement
+ /// optimize in different ways.
+ ///
+ /// <p/>It is recommended that this method be called upon completion of indexing. In
+ /// environments with frequent updates, optimize is best done during low volume times, if at all.
+ ///
+ /// <p/>
+ /// <p/>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. <p/>
+ ///
+ /// <p/>Note that optimize requires 2X the index size free
+ /// space in your Directory (3X if you're using compound
+ /// file format). For example, if your index
+ /// size is 10 MB then you need 20 MB free for optimize to
+ /// complete (30 MB if you're using compound fiel format).<p/>
+ ///
+ /// <p/>If some but not all readers re-open while an
+ /// optimize is underway, this will cause > 2X temporary
+ /// space to be consumed as those new readers will then
+ /// hold open the partially optimized segments at that
+ /// time. It is best not to re-open readers while optimize
+ /// is running.<p/>
+ ///
+ /// <p/>The actual temporary usage could be much less than
+ /// these figures (it depends on many factors).<p/>
+ ///
+ /// <p/>In general, once the optimize completes, the total size of the
+ /// index will be less than the size of the starting index.
+ /// It could be quite a bit smaller (if there were many
+ /// pending deletes) or just slightly smaller.<p/>
+ ///
+ /// <p/>If an Exception is hit during optimize(), for example
+ /// due to disk full, the index will not be corrupt and no
+ /// documents will have been lost. However, it may have
+ /// been partially optimized (some segments were merged but
+ /// not all), and it's possible that one of the segments in
+ /// the index will be in non-compound format even when
+ /// using compound file format. This will occur when the
+ /// Exception is hit during conversion of the segment into
+ /// compound format.<p/>
+ ///
+ /// <p/>This call will optimize those segments present in
+ /// the index when the call started. If other threads are
+ /// still adding documents and flushing segments, those
+ /// newly created segments will not be optimized unless you
+ /// call optimize again.<p/>
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ /// <seealso cref="Index.LogMergePolicy.FindMergesForOptimize">
+ /// </seealso>
+ public virtual void Optimize()
+ {
+ Optimize(true);
+ }
+
+ /// <summary> Optimize the index down to &lt;= maxNumSegments. If
+ /// maxNumSegments==1 then this is the same as <see cref="Optimize()" />
+ ///.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="maxNumSegments">maximum number of segments left
+ /// in the index after optimization finishes
+ /// </param>
+ public virtual void Optimize(int maxNumSegments)
+ {
+ Optimize(maxNumSegments, true);
+ }
+
+ /// <summary>Just like <see cref="Optimize()" />, except you can specify
+ /// whether the call should block until the optimize
+ /// completes. This is only meaningful with a
+ /// <see cref="MergeScheduler" /> that is able to run merges in
+ /// background threads.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ /// </summary>
+ public virtual void Optimize(bool doWait)
+ {
+ Optimize(1, doWait);
+ }
+
+ /// <summary>Just like <see cref="Optimize(int)" />, except you can
+ /// specify whether the call should block until the
+ /// optimize completes. This is only meaningful with a
+ /// <see cref="MergeScheduler" /> that is able to run merges in
+ /// background threads.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ /// </summary>
+ public virtual void Optimize(int maxNumSegments, bool doWait)
+ {
+ EnsureOpen();
+
+ if (maxNumSegments < 1)
+ throw new System.ArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
+
+ if (infoStream != null)
+ Message("optimize: index now " + SegString());
+
+ Flush(true, false, true);
+
+ lock (this)
+ {
+ ResetMergeExceptions();
+ segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<SegmentInfo>();
+ optimizeMaxNumSegments = maxNumSegments;
+ int numSegments = segmentInfos.Count;
+ for (int i = 0; i < numSegments; i++)
+ segmentsToOptimize.Add(segmentInfos.Info(i));
+
+ // Now mark all pending & running merges as optimize
+ // merge:
+ foreach(MergePolicy.OneMerge merge in pendingMerges)
+ {
+ merge.optimize = true;
+ merge.maxNumSegmentsOptimize = maxNumSegments;
+ }
+
+ foreach(MergePolicy.OneMerge merge in runningMerges)
+ {
+ merge.optimize = true;
+ merge.maxNumSegmentsOptimize = maxNumSegments;
+ }
+ }
+
+ MaybeMerge(maxNumSegments, true);
+
+ if (doWait)
+ {
+ lock (this)
+ {
+ while (true)
+ {
+
+ if (hitOOM)
+ {
+ throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete optimize");
+ }
+
+ if (mergeExceptions.Count > 0)
+ {
+ // Forward any exceptions in background merge
+ // threads to the current thread:
+ int size = mergeExceptions.Count;
+ for (int i = 0; i < size; i++)
+ {
+ MergePolicy.OneMerge merge = mergeExceptions[i];
+ if (merge.optimize)
+ {
+ System.IO.IOException err;
+ System.Exception t = merge.GetException();
+ if (t != null)
+ err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t);
+ else
+ err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory));
+ throw err;
+ }
+ }
+ }
+
+ if (OptimizeMergesPending())
+ DoWait();
+ else
+ break;
+ }
+ }
+
+ // If close is called while we are still
+ // running, throw an exception so the calling
+ // thread will know the optimize did not
+ // complete
+ EnsureOpen();
+ }
+
+ // NOTE: in the ConcurrentMergeScheduler case, when
+ // doWait is false, we can return immediately while
+ // background threads accomplish the optimization
+ }
+
+ /// <summary>Returns true if any merges in pendingMerges or
+ /// runningMerges are optimization merges.
+ /// </summary>
+ private bool OptimizeMergesPending()
+ {
+ lock (this)
+ {
+ foreach (MergePolicy.OneMerge merge in pendingMerges)
+ {
+ if (merge.optimize) return true;
+ }
+
+ foreach(MergePolicy.OneMerge merge in runningMerges)
+ {
+ if (merge.optimize) return true;
+ }
+
+ return false;
+ }
+ }
+
+ /// <summary>Just like <see cref="ExpungeDeletes()" />, except you can
+ /// specify whether the call should block until the
+ /// operation completes. This is only meaningful with a
+ /// <see cref="MergeScheduler" /> that is able to run merges in
+ /// background threads.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ /// </summary>
+ public virtual void ExpungeDeletes(bool doWait)
+ {
+ EnsureOpen();
+
+ if (infoStream != null)
+ Message("expungeDeletes: index now " + SegString());
+
+ MergePolicy.MergeSpecification spec;
+
+ lock (this)
+ {
+ spec = mergePolicy.FindMergesToExpungeDeletes(segmentInfos);
+ if (spec != null)
+ {
+ int numMerges = spec.merges.Count;
+ for (int i = 0; i < numMerges; i++)
+ RegisterMerge(spec.merges[i]);
+ }
+ }
+
+ mergeScheduler.Merge(this);
+
+ if (spec != null && doWait)
+ {
+ int numMerges = spec.merges.Count;
+ lock (this)
+ {
+ bool running = true;
+ while (running)
+ {
+
+ if (hitOOM)
+ {
+ throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete expungeDeletes");
+ }
+
+ // Check each merge that MergePolicy asked us to
+ // do, to see if any of them are still running and
+ // if any of them have hit an exception.
+ running = false;
+ for (int i = 0; i < numMerges; i++)
+ {
+ MergePolicy.OneMerge merge = spec.merges[i];
+ if (pendingMerges.Contains(merge) || runningMerges.Contains(merge))
+ running = true;
+ System.Exception t = merge.GetException();
+ if (t != null)
+ {
+ System.IO.IOException ioe = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t);
+ throw ioe;
+ }
+ }
+
+ // If any of our merges are still running, wait:
+ if (running)
+ DoWait();
+ }
+ }
+ }
+
+ // NOTE: in the ConcurrentMergeScheduler case, when
+ // doWait is false, we can return immediately while
+ // background threads accomplish the optimization
+ }
+
+
+ /// <summary>Expunges all deletes from the index. When an index
+ /// has many document deletions (or updates to existing
+ /// documents), it's best to either call optimize or
+ /// expungeDeletes to remove all unused data in the index
+ /// associated with the deleted documents. To see how
+ /// many deletions you have pending in your index, call
+ /// <see cref="IndexReader.NumDeletedDocs" />
+ /// This saves disk space and memory usage while
+ /// searching. expungeDeletes should be somewhat faster
+ /// than optimize since it does not insist on reducing the
+ /// index to a single segment (though, this depends on the
+ /// <see cref="MergePolicy" />; see <see cref="Index.MergePolicy.FindMergesToExpungeDeletes" />.). Note that
+ /// this call does not first commit any buffered
+ /// documents, so you must do so yourself if necessary.
+ /// See also <seealso cref="ExpungeDeletes(bool)" />
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ /// </summary>
+ public virtual void ExpungeDeletes()
+ {
+ ExpungeDeletes(true);
+ }
+
+ /// <summary> Expert: asks the mergePolicy whether any merges are
+ /// necessary now and if so, runs the requested merges and
+ /// then iterate (test again if merges are needed) until no
+ /// more merges are returned by the mergePolicy.
+ ///
+ /// Explicit calls to maybeMerge() are usually not
+ /// necessary. The most common case is when merge policy
+ /// parameters have changed.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ /// </summary>
+ public void MaybeMerge()
+ {
+ MaybeMerge(false);
+ }
+
+ private void MaybeMerge(bool optimize)
+ {
+ MaybeMerge(1, optimize);
+ }
+
+ private void MaybeMerge(int maxNumSegmentsOptimize, bool optimize)
+ {
+ UpdatePendingMerges(maxNumSegmentsOptimize, optimize);
+ mergeScheduler.Merge(this);
+ }
+
+ private void UpdatePendingMerges(int maxNumSegmentsOptimize, bool optimize)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(!optimize || maxNumSegmentsOptimize > 0);
+
+ if (stopMerges)
+ {
+ return;
+ }
+
+ // Do not start new merges if we've hit OOME
+ if (hitOOM)
+ {
+ return ;
+ }
+
+ MergePolicy.MergeSpecification spec;
+ if (optimize)
+ {
+ spec = mergePolicy.FindMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
+
+ if (spec != null)
+ {
+ int numMerges = spec.merges.Count;
+ for (int i = 0; i < numMerges; i++)
+ {
+ MergePolicy.OneMerge merge = spec.merges[i];
+ merge.optimize = true;
+ merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
+ }
+ }
+ }
+ else
+ {
+ spec = mergePolicy.FindMerges(segmentInfos);
+ }
+
+ if (spec != null)
+ {
+ int numMerges = spec.merges.Count;
+ for (int i = 0; i < numMerges; i++)
+ RegisterMerge(spec.merges[i]);
+ }
+ }
+ }
+
+ /// <summary>Expert: the <see cref="MergeScheduler" /> calls this method
+ /// to retrieve the next merge requested by the
+ /// MergePolicy
+ /// </summary>
+ internal virtual MergePolicy.OneMerge GetNextMerge()
+ {
+ lock (this)
+ {
+ if (pendingMerges.Count == 0)
+ return null;
+ else
+ {
+ // Advance the merge from pending to running
+ MergePolicy.OneMerge merge = pendingMerges.First.Value;
+ pendingMerges.RemoveFirst();
+ runningMerges.Add(merge);
+ return merge;
+ }
+ }
+ }
+
+ /// <summary>Like getNextMerge() except only returns a merge if it's
+ /// external.
+ /// </summary>
+ private MergePolicy.OneMerge GetNextExternalMerge()
+ {
+ lock (this)
+ {
+ if (pendingMerges.Count == 0)
+ return null;
+ else
+ {
+ var it = pendingMerges.GetEnumerator();
+ while (it.MoveNext())
+ {
+ MergePolicy.OneMerge merge = it.Current;
+ if (merge.isExternal)
+ {
+ // Advance the merge from pending to running
+ pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue?
+ runningMerges.Add(merge);
+ return merge;
+ }
+ }
+
+ // All existing merges do not involve external segments
+ return null;
+ }
+ }
+ }
+
+ /*
+ * Begin a transaction. During a transaction, any segment
+ * merges that happen (or ram segments flushed) will not
+ * write a new segments file and will not remove any files
+ * that were present at the start of the transaction. You
+ * must make a matched (try/finally) call to
+ * commitTransaction() or rollbackTransaction() to finish
+ * the transaction.
+ *
+ * Note that buffered documents and delete terms are not handled
+ * within the transactions, so they must be flushed before the
+ * transaction is started.
+ */
+ private void StartTransaction(bool haveReadLock)
+ {
+ lock (this)
+ {
+
+ bool success = false;
+ try
+ {
+ if (infoStream != null)
+ Message("now start transaction");
+
+ System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0 ,
+ "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.GetNumBufferedDeleteTerms());
+ System.Diagnostics.Debug.Assert(docWriter.NumDocsInRAM == 0 ,
+ "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.NumDocsInRAM);
+
+ EnsureOpen();
+
+ // If a transaction is trying to roll back (because
+ // addIndexes hit an exception) then wait here until
+ // that's done:
+ lock (this)
+ {
+ while (stopMerges)
+ DoWait();
+ }
+ success = true;
+ }
+ finally
+ {
+ // Release the write lock if our caller held it, on
+ // hitting an exception
+ if (!success && haveReadLock)
+ ReleaseRead();
+ }
+
+ if (haveReadLock)
+ {
+ UpgradeReadToWrite();
+ }
+ else
+ {
+ AcquireWrite();
+ }
+
+ success = false;
+ try
+ {
+ localRollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
+
+ System.Diagnostics.Debug.Assert(!HasExternalSegments());
+
+ localFlushedDocCount = docWriter.GetFlushedDocCount();
+
+ // Remove the incRef we did in startTransaction:
+ deleter.IncRef(segmentInfos, false);
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ FinishAddIndexes();
+ }
+ }
+ }
+
+ /*
+ * Rolls back the transaction and restores state to where
+ * we were at the start.
+ */
+ private void RollbackTransaction()
+ {
+ lock (this)
+ {
+
+ if (infoStream != null)
+ Message("now rollback transaction");
+
+ if (docWriter != null)
+ {
+ docWriter.SetFlushedDocCount(localFlushedDocCount);
+ }
+
+ // Must finish merges before rolling back segmentInfos
+ // so merges don't hit exceptions on trying to commit
+ // themselves, don't get files deleted out from under
+ // them, etc:
+ FinishMerges(false);
+
+ // Keep the same segmentInfos instance but replace all
+ // of its SegmentInfo instances. This is so the next
+ // attempt to commit using this instance of IndexWriter
+ // will always write to a new generation ("write once").
+ segmentInfos.Clear();
+ segmentInfos.AddRange(localRollbackSegmentInfos);
+ localRollbackSegmentInfos = null;
+
+ // This must come after we rollback segmentInfos, so
+ // that if a commit() kicks off it does not see the
+ // segmentInfos with external segments
+ FinishAddIndexes();
+
+ // Ask deleter to locate unreferenced files we had
+ // created & remove them:
+ deleter.Checkpoint(segmentInfos, false);
+
+ // Remove the incRef we did in startTransaction:
+ deleter.DecRef(segmentInfos);
+
+ // Also ask deleter to remove any newly created files
+ // that were never incref'd; this "garbage" is created
+ // when a merge kicks off but aborts part way through
+ // before it had a chance to incRef the files it had
+ // partially created
+ deleter.Refresh();
+
+ System.Threading.Monitor.PulseAll(this);
+
+ System.Diagnostics.Debug.Assert(!HasExternalSegments());
+ }
+ }
+
+ /*
+ * Commits the transaction. This will write the new
+ * segments file and remove and pending deletions we have
+ * accumulated during the transaction
+ */
+ private void CommitTransaction()
+ {
+ lock (this)
+ {
+
+ if (infoStream != null)
+ Message("now commit transaction");
+
+ // Give deleter a chance to remove files now:
+ Checkpoint();
+
+ // Remove the incRef we did in startTransaction.
+ deleter.DecRef(localRollbackSegmentInfos);
+
+ localRollbackSegmentInfos = null;
+
+ System.Diagnostics.Debug.Assert(!HasExternalSegments());
+
+ FinishAddIndexes();
+ }
+ }
+
+ /// <summary> Close the <c>IndexWriter</c> without committing
+ /// any changes that have occurred since the last commit
+ /// (or since it was opened, if commit hasn't been called).
+ /// This removes any temporary files that had been created,
+ /// after which the state of the index will be the same as
+ /// it was when commit() was last called or when this
+ /// writer was first opened. This also clears a previous
+ /// call to <see cref="PrepareCommit()" />.
+ /// </summary>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void Rollback()
+ {
+ EnsureOpen();
+
+ // Ensure that only one thread actually gets to do the closing:
+ if (ShouldClose())
+ RollbackInternal();
+ }
+
+ private void RollbackInternal()
+ {
+
+ bool success = false;
+
+ if (infoStream != null)
+ {
+ Message("rollback");
+ }
+
+ docWriter.PauseAllThreads();
+
+ try
+ {
+ FinishMerges(false);
+
+ // Must pre-close these two, in case they increment
+ // changeCount so that we can then set it to false
+ // before calling closeInternal
+ mergePolicy.Close();
+ mergeScheduler.Close();
+
+ lock (this)
+ {
+
+ if (pendingCommit != null)
+ {
+ pendingCommit.RollbackCommit(directory);
+ deleter.DecRef(pendingCommit);
+ pendingCommit = null;
+ System.Threading.Monitor.PulseAll(this);
+ }
+
+ // Keep the same segmentInfos instance but replace all
+ // of its SegmentInfo instances. This is so the next
+ // attempt to commit using this instance of IndexWriter
+ // will always write to a new generation ("write
+ // once").
+ segmentInfos.Clear();
+ segmentInfos.AddRange(rollbackSegmentInfos);
+
+ System.Diagnostics.Debug.Assert(!HasExternalSegments());
+
+ docWriter.Abort();
+
+ System.Diagnostics.Debug.Assert(TestPoint("rollback before checkpoint"));
+
+ // Ask deleter to locate unreferenced files & remove
+ // them:
+ deleter.Checkpoint(segmentInfos, false);
+ deleter.Refresh();
+ }
+
+ // Don't bother saving any changes in our segmentInfos
+ readerPool.Clear(null);
+
+ lastCommitChangeCount = changeCount;
+
+ success = true;
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "rollbackInternal");
+ }
+ finally
+ {
+ lock (this)
+ {
+ if (!success)
+ {
+ docWriter.ResumeAllThreads();
+ closing = false;
+ System.Threading.Monitor.PulseAll(this);
+ if (infoStream != null)
+ Message("hit exception during rollback");
+ }
+ }
+ }
+
+ CloseInternal(false);
+ }
+
+ /// <summary> Delete all documents in the index.
+ ///
+ /// <p/>This method will drop all buffered documents and will
+ /// remove all segments from the index. This change will not be
+ /// visible until a <see cref="Commit()" /> has been called. This method
+ /// can be rolled back using <see cref="Rollback()" />.<p/>
+ ///
+ /// <p/>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).<p/>
+ ///
+ /// <p/>NOTE: this method will forcefully abort all merges
+ /// in progress. If other threads are running <see cref="Optimize()" />
+ /// or any of the addIndexes methods, they
+ /// will receive <see cref="Index.MergePolicy.MergeAbortedException" />s.
+ /// </summary>
+ public virtual void DeleteAll()
+ {
+ lock (this)
+ {
+ docWriter.PauseAllThreads();
+ try
+ {
+
+ // Abort any running merges
+ FinishMerges(false);
+
+ // Remove any buffered docs
+ docWriter.Abort();
+ docWriter.SetFlushedDocCount(0);
+
+ // Remove all segments
+ segmentInfos.Clear();
+
+ // Ask deleter to locate unreferenced files & remove them:
+ deleter.Checkpoint(segmentInfos, false);
+ deleter.Refresh();
+
+ // Don't bother saving any changes in our segmentInfos
+ readerPool.Clear(null);
+
+ // Mark that the index has changed
+ ++changeCount;
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "deleteAll");
+ }
+ finally
+ {
+ docWriter.ResumeAllThreads();
+ if (infoStream != null)
+ {
+ Message("hit exception during deleteAll");
+ }
+ }
+ }
+ }
+
+ private void FinishMerges(bool waitForMerges)
+ {
+ lock (this)
+ {
+ if (!waitForMerges)
+ {
+
+ stopMerges = true;
+
+ // Abort all pending & running merges:
+ foreach(MergePolicy.OneMerge merge in pendingMerges)
+ {
+ if (infoStream != null)
+ Message("now abort pending merge " + merge.SegString(directory));
+ merge.Abort();
+ MergeFinish(merge);
+ }
+ pendingMerges.Clear();
+
+ foreach(MergePolicy.OneMerge merge in runningMerges)
+ {
+ if (infoStream != null)
+ Message("now abort running merge " + merge.SegString(directory));
+ merge.Abort();
+ }
+
+ // Ensure any running addIndexes finishes. It's fine
+ // if a new one attempts to start because its merges
+ // will quickly see the stopMerges == true and abort.
+ AcquireRead();
+ ReleaseRead();
+
+ // These merges periodically check whether they have
+ // been aborted, and stop if so. We wait here to make
+ // sure they all stop. It should not take very long
+ // because the merge threads periodically check if
+ // they are aborted.
+ while (runningMerges.Count > 0)
+ {
+ if (infoStream != null)
+ Message("now wait for " + runningMerges.Count + " running merge to abort");
+ DoWait();
+ }
+
+ stopMerges = false;
+ System.Threading.Monitor.PulseAll(this);
+
+ System.Diagnostics.Debug.Assert(0 == mergingSegments.Count);
+
+ if (infoStream != null)
+ Message("all running merges have aborted");
+ }
+ else
+ {
+ // waitForMerges() will ensure any running addIndexes finishes.
+ // It's fine if a new one attempts to start because from our
+ // caller above the call will see that we are in the
+ // process of closing, and will throw an
+ // AlreadyClosedException.
+ WaitForMerges();
+ }
+ }
+ }
+
+ /// <summary> Wait for any currently outstanding merges to finish.
+ ///
+ /// <p/>It is guaranteed that any merges started prior to calling this method
+ /// will have completed once this method completes.<p/>
+ /// </summary>
+ public virtual void WaitForMerges()
+ {
+ lock (this)
+ {
+ // Ensure any running addIndexes finishes.
+ AcquireRead();
+ ReleaseRead();
+
+ while (pendingMerges.Count > 0 || runningMerges.Count > 0)
+ {
+ DoWait();
+ }
+
+ // sanity check
+ System.Diagnostics.Debug.Assert(0 == mergingSegments.Count);
+ }
+ }
+
+ /*
+ * Called whenever the SegmentInfos has been updated and
+ * the index files referenced exist (correctly) in the
+ * index directory.
+ */
+ private void Checkpoint()
+ {
+ lock (this)
+ {
+ changeCount++;
+ deleter.Checkpoint(segmentInfos, false);
+ }
+ }
+
+ private void FinishAddIndexes()
+ {
+ ReleaseWrite();
+ }
+
+ private void BlockAddIndexes(bool includePendingClose)
+ {
+
+ AcquireRead();
+
+ bool success = false;
+ try
+ {
+
+ // Make sure we are still open since we could have
+ // waited quite a while for last addIndexes to finish
+ EnsureOpen(includePendingClose);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ ReleaseRead();
+ }
+ }
+
+ private void ResumeAddIndexes()
+ {
+ ReleaseRead();
+ }
+
+ private void ResetMergeExceptions()
+ {
+ lock (this)
+ {
+ mergeExceptions = new List<MergePolicy.OneMerge>();
+ mergeGen++;
+ }
+ }
+
+ private void NoDupDirs(Directory[] dirs)
+ {
+ HashSet<Directory> dups = new HashSet<Directory>();
+ for (int i = 0; i < dirs.Length; i++)
+ {
+ if (dups.Contains(dirs[i]))
+ {
+ throw new System.ArgumentException("Directory " + dirs[i] + " appears more than once");
+ }
+ if (dirs[i] == directory)
+ throw new System.ArgumentException("Cannot add directory to itself");
+ dups.Add(dirs[i]);
+ }
+ }
+
+ /// <summary> Merges all segments from an array of indexes into this
+ /// index.
+ ///
+ /// <p/>This may be used to parallelize batch indexing. A large document
+ /// collection can be broken into sub-collections. Each sub-collection can be
+ /// indexed in parallel, on a different thread, process or machine. The
+ /// complete index can then be created by merging sub-collection indexes
+ /// with this method.
+ ///
+ /// <p/><b>NOTE:</b> the index in each Directory must not be
+ /// changed (opened by a writer) while this method is
+ /// running. This method does not acquire a write lock in
+ /// each input Directory, so it is up to the caller to
+ /// enforce this.
+ ///
+ /// <p/><b>NOTE:</b> while this is running, any attempts to
+ /// add or delete documents (with another thread) will be
+ /// paused until this method completes.
+ ///
+ /// <p/>This method is transactional in how Exceptions are
+ /// handled: it does not commit a new segments_N file until
+ /// all indexes are added. This means if an Exception
+ /// occurs (for example disk full), then either no indexes
+ /// will have been added or they all will have been.<p/>
+ ///
+ /// <p/>Note that this requires temporary free space in the
+ /// Directory up to 2X the sum of all input indexes
+ /// (including the starting index). If readers/searchers
+ /// are open against the starting index, then temporary
+ /// free space required will be higher by the size of the
+ /// starting index (see <see cref="Optimize()" /> for details).
+ /// <p/>
+ ///
+ /// <p/>Once this completes, the final size of the index
+ /// will be less than the sum of all input index sizes
+ /// (including the starting index). It could be quite a
+ /// bit smaller (if there were many pending deletes) or
+ /// just slightly smaller.<p/>
+ ///
+ /// <p/>
+ /// This requires this index not be among those to be added.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void AddIndexesNoOptimize(params Directory[] dirs)
+ {
+
+ EnsureOpen();
+
+ NoDupDirs(dirs);
+
+ // Do not allow add docs or deletes while we are running:
+ docWriter.PauseAllThreads();
+
+ try
+ {
+ if (infoStream != null)
+ Message("flush at addIndexesNoOptimize");
+ Flush(true, false, true);
+
+ bool success = false;
+
+ StartTransaction(false);
+
+ try
+ {
+
+ int docCount = 0;
+ lock (this)
+ {
+ EnsureOpen();
+
+ for (int i = 0; i < dirs.Length; i++)
+ {
+ if (directory == dirs[i])
+ {
+ // cannot add this index: segments may be deleted in merge before added
+ throw new System.ArgumentException("Cannot add this index to itself");
+ }
+
+ SegmentInfos sis = new SegmentInfos(); // read infos from dir
+ sis.Read(dirs[i]);
+ for (int j = 0; j < sis.Count; j++)
+ {
+ SegmentInfo info = sis.Info(j);
+ System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name);
+ docCount += info.docCount;
+ segmentInfos.Add(info); // add each info
+ }
+ }
+ }
+
+ // Notify DocumentsWriter that the flushed count just increased
+ docWriter.UpdateFlushedDocCount(docCount);
+
+ MaybeMerge();
+
+ EnsureOpen();
+
+ // If after merging there remain segments in the index
+ // that are in a different directory, just copy these
+ // over into our index. This is necessary (before
+ // finishing the transaction) to avoid leaving the
+ // index in an unusable (inconsistent) state.
+ ResolveExternalSegments();
+
+ EnsureOpen();
+
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ CommitTransaction();
+ }
+ else
+ {
+ RollbackTransaction();
+ }
+ }
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "addIndexesNoOptimize");
+ }
+ finally
+ {
+ if (docWriter != null)
+ {
+ docWriter.ResumeAllThreads();
+ }
+ }
+ }
+
+ private bool HasExternalSegments()
+ {
+ return segmentInfos.HasExternalSegments(directory);
+ }
+
+ /* If any of our segments are using a directory != ours
+ * then we have to either copy them over one by one, merge
+ * them (if merge policy has chosen to) or wait until
+ * currently running merges (in the background) complete.
+ * We don't return until the SegmentInfos has no more
+ * external segments. Currently this is only used by
+ * addIndexesNoOptimize(). */
+ private void ResolveExternalSegments()
+ {
+
+ bool any = false;
+
+ bool done = false;
+
+ while (!done)
+ {
+ SegmentInfo info = null;
+ MergePolicy.OneMerge merge = null;
+ lock (this)
+ {
+
+ if (stopMerges)
+ throw new MergePolicy.MergeAbortedException("rollback() was called or addIndexes* hit an unhandled exception");
+
+ int numSegments = segmentInfos.Count;
+
+ done = true;
+ for (int i = 0; i < numSegments; i++)
+ {
+ info = segmentInfos.Info(i);
+ if (info.dir != directory)
+ {
+ done = false;
+ MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), mergePolicy is LogMergePolicy && UseCompoundFile);
+
+ // Returns true if no running merge conflicts
+ // with this one (and, records this merge as
+ // pending), ie, this segment is not currently
+ // being merged:
+ if (RegisterMerge(newMerge))
+ {
+ merge = newMerge;
+
+ // If this segment is not currently being
+ // merged, then advance it to running & run
+ // the merge ourself (below):
+ pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue?
+ runningMerges.Add(merge);
+ break;
+ }
+ }
+ }
+
+ if (!done && merge == null)
+ // We are not yet done (external segments still
+ // exist in segmentInfos), yet, all such segments
+ // are currently "covered" by a pending or running
+ // merge. We now try to grab any pending merge
+ // that involves external segments:
+ merge = GetNextExternalMerge();
+
+ if (!done && merge == null)
+ // We are not yet done, and, all external segments
+ // fall under merges that the merge scheduler is
+ // currently running. So, we now wait and check
+ // back to see if the merge has completed.
+ DoWait();
+ }
+
+ if (merge != null)
+ {
+ any = true;
+ Merge(merge);
+ }
+ }
+
+ if (any)
+ // Sometimes, on copying an external segment over,
+ // more merges may become necessary:
+ mergeScheduler.Merge(this);
+ }
+
+ /// <summary>Merges the provided indexes into this index.
+ /// <p/>After this completes, the index is optimized. <p/>
+ /// <p/>The provided IndexReaders are not closed.<p/>
+ ///
+ /// <p/><b>NOTE:</b> while this is running, any attempts to
+ /// add or delete documents (with another thread) will be
+ /// paused until this method completes.
+ ///
+ /// <p/>See <see cref="AddIndexesNoOptimize(Directory[])" /> for
+ /// details on transactional semantics, temporary free
+ /// space required in the Directory, and non-CFS segments
+ /// on an Exception.<p/>
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void AddIndexes(params IndexReader[] readers)
+ {
+
+ EnsureOpen();
+
+ // Do not allow add docs or deletes while we are running:
+ docWriter.PauseAllThreads();
+
+ // We must pre-acquire a read lock here (and upgrade to
+ // write lock in startTransaction below) so that no
+ // other addIndexes is allowed to start up after we have
+ // flushed & optimized but before we then start our
+ // transaction. This is because the merging below
+ // requires that only one segment is present in the
+ // index:
+ AcquireRead();
+
+ try
+ {
+
+ SegmentInfo info = null;
+ System.String mergedName = null;
+ SegmentMerger merger = null;
+
+ bool success = false;
+
+ try
+ {
+ Flush(true, false, true);
+ Optimize(); // start with zero or 1 seg
+ success = true;
+ }
+ finally
+ {
+ // Take care to release the read lock if we hit an
+ // exception before starting the transaction
+ if (!success)
+ ReleaseRead();
+ }
+
+ // true means we already have a read lock; if this
+ // call hits an exception it will release the write
+ // lock:
+ StartTransaction(true);
+
+ try
+ {
+ mergedName = NewSegmentName();
+ merger = new SegmentMerger(this, mergedName, null);
+
+ SegmentReader sReader = null;
+ lock (this)
+ {
+ if (segmentInfos.Count == 1)
+ {
+ // add existing index, if any
+ sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1);
+ }
+ }
+
+ success = false;
+
+ try
+ {
+ if (sReader != null)
+ merger.Add(sReader);
+
+ for (int i = 0; i < readers.Length; i++)
+ // add new indexes
+ merger.Add(readers[i]);
+
+ int docCount = merger.Merge(); // merge 'em
+
+ lock (this)
+ {
+ segmentInfos.Clear(); // pop old infos & add new
+ info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx());
+ SetDiagnostics(info, "addIndexes(params IndexReader[])");
+ segmentInfos.Add(info);
+ }
+
+ // Notify DocumentsWriter that the flushed count just increased
+ docWriter.UpdateFlushedDocCount(docCount);
+
+ success = true;
+ }
+ finally
+ {
+ if (sReader != null)
+ {
+ readerPool.Release(sReader);
+ }
+ }
+ }
+ finally
+ {
+ if (!success)
+ {
+ if (infoStream != null)
+ Message("hit exception in addIndexes during merge");
+ RollbackTransaction();
+ }
+ else
+ {
+ CommitTransaction();
+ }
+ }
+
+ if (mergePolicy is LogMergePolicy && UseCompoundFile)
+ {
+
+ IList<string> files = null;
+
+ lock (this)
+ {
+ // Must incRef our files so that if another thread
+ // is running merge/optimize, it doesn't delete our
+ // segment's files before we have a change to
+ // finish making the compound file.
+ if (segmentInfos.Contains(info))
+ {
+ files = info.Files();
+ deleter.IncRef(files);
+ }
+ }
+
+ if (files != null)
+ {
+
+ success = false;
+
+ StartTransaction(false);
+
+ try
+ {
+ merger.CreateCompoundFile(mergedName + ".cfs");
+ lock (this)
+ {
+ info.SetUseCompoundFile(true);
+ }
+
+ success = true;
+ }
+ finally
+ {
+ lock (this)
+ {
+ deleter.DecRef(files);
+ }
+
+ if (!success)
+ {
+ if (infoStream != null)
+ Message("hit exception building compound file in addIndexes during merge");
+
+ RollbackTransaction();
+ }
+ else
+ {
+ CommitTransaction();
+ }
+ }
+ }
+ }
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "addIndexes(params IndexReader[])");
+ }
+ finally
+ {
+ if (docWriter != null)
+ {
+ docWriter.ResumeAllThreads();
+ }
+ }
+ }
+
+ ///<summary>
+ /// A hook for extending classes to execute operations after pending added and
+ /// deleted documents have been flushed to the Directory but before the change
+ /// is committed (new segments_N file written).
+ ///</summary>
+ protected virtual void DoAfterFlush()
+ {
+ }
+
+ ///<summary>
+ /// A hook for extending classes to execute operations before pending added and
+ /// deleted documents are flushed to the Directory.
+ ///</summary>
+ protected virtual void DoBeforeFlush()
+ {
+ }
+
+ /// <summary>Expert: prepare for commit.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <seealso cref="PrepareCommit(IDictionary{string,string})">
+ /// </seealso>
+ public void PrepareCommit()
+ {
+ EnsureOpen();
+ PrepareCommit(null);
+ }
+
+ /// <summary><p/>Expert: prepare for commit, specifying
+ /// commitUserData Map (String -> String). This does the
+ /// first phase of 2-phase commit. This method does all steps
+ /// necessary to commit changes since this writer was
+ /// opened: flushes pending added and deleted docs, syncs
+ /// the index files, writes most of next segments_N file.
+ /// After calling this you must call either <see cref="Commit()" />
+ /// to finish the commit, or <see cref="Rollback()" />
+ /// to revert the commit and undo all changes
+ /// done since the writer was opened.<p/>
+ ///
+ /// You can also just call <see cref="Commit(IDictionary{string,string})" /> directly
+ /// without prepareCommit first in which case that method
+ /// will internally call prepareCommit.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <param name="commitUserData">Opaque Map (String->String)
+ /// that's recorded into the segments file in the index,
+ /// and retrievable by <see cref="IndexReader.GetCommitUserData" />.
+ /// Note that when IndexWriter commits itself, during <see cref="Close()" />, the
+ /// commitUserData is unchanged (just carried over from
+ /// the prior commit). If this is null then the previous
+ /// commitUserData is kept. Also, the commitUserData will
+ /// only "stick" if there are actually changes in the
+ /// index to commit.
+ /// </param>
+ private void PrepareCommit(IDictionary<string, string> commitUserData)
+ {
+ if (hitOOM)
+ {
+ throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit");
+ }
+
+ if (pendingCommit != null)
+ throw new System.SystemException("prepareCommit was already called with no corresponding call to commit");
+
+ if (infoStream != null)
+ Message("prepareCommit: flush");
+
+ Flush(true, true, true);
+
+ StartCommit(0, commitUserData);
+ }
+
+ // Used only by commit, below; lock order is commitLock -> IW
+ private Object commitLock = new Object();
+
+ private void Commit(long sizeInBytes)
+ {
+ lock(commitLock) {
+ StartCommit(sizeInBytes, null);
+ FinishCommit();
+ }
+ }
+
+ /// <summary> <p/>Commits all pending changes (added &amp; deleted
+ /// documents, optimizations, segment merges, added
+ /// indexes, etc.) to the index, and syncs all referenced
+ /// index files, such that a reader will see the changes
+ /// and the index updates will survive an OS or machine
+ /// crash or power loss. Note that this does not wait for
+ /// any running background merges to finish. This may be a
+ /// costly operation, so you should test the cost in your
+ /// application and do it only when really necessary.<p/>
+ ///
+ /// <p/> Note that this operation calls Directory.sync on
+ /// the index files. That call should not return until the
+ /// file contents &amp; metadata are on stable storage. For
+ /// FSDirectory, this calls the OS's fsync. But, beware:
+ /// some hardware devices may in fact cache writes even
+ /// during fsync, and return before the bits are actually
+ /// on stable storage, to give the appearance of faster
+ /// performance. If you have such a device, and it does
+ /// not have a battery backup (for example) then on power
+ /// loss it may still lose data. Lucene cannot guarantee
+ /// consistency on such devices. <p/>
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ ///
+ /// </summary>
+ /// <seealso cref="PrepareCommit()">
+ /// </seealso>
+ /// <seealso cref="Commit(IDictionary{string,string})">
+ /// </seealso>
+ public void Commit()
+ {
+ Commit(null);
+ }
+
+ /// <summary>Commits all changes to the index, specifying a
+ /// commitUserData Map (String -> String). This just
+ /// calls <see cref="PrepareCommit(IDictionary{string, string})" /> (if you didn't
+ /// already call it) and then <see cref="FinishCommit" />.
+ ///
+ /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
+ /// you should immediately close the writer. See <a
+ /// href="#OOME">above</a> for details.<p/>
+ /// </summary>
+ public void Commit(IDictionary<string, string> commitUserData)
+ {
+ EnsureOpen();
+
+ if (infoStream != null)
+ {
+ Message("commit: start");
+ }
+
+ lock (commitLock)
+ {
+ if (infoStream != null)
+ {
+ Message("commit: enter lock");
+ }
+ if (pendingCommit == null)
+ {
+ if (infoStream != null)
+ {
+ Message("commit: now prepare");
+ }
+ PrepareCommit(commitUserData);
+ }
+ else if (infoStream != null)
+ {
+ Message("commit: already prepared");
+ }
+
+ FinishCommit();
+ }
+ }
+
+ private void FinishCommit()
+ {
+ lock (this)
+ {
+
+ if (pendingCommit != null)
+ {
+ try
+ {
+ if (infoStream != null)
+ Message("commit: pendingCommit != null");
+ pendingCommit.FinishCommit(directory);
+ if (infoStream != null)
+ Message("commit: wrote segments file \"" + pendingCommit.GetCurrentSegmentFileName() + "\"");
+ lastCommitChangeCount = pendingCommitChangeCount;
+ segmentInfos.UpdateGeneration(pendingCommit);
+ segmentInfos.UserData = pendingCommit.UserData;
+ SetRollbackSegmentInfos(pendingCommit);
+ deleter.Checkpoint(pendingCommit, true);
+ }
+ finally
+ {
+ deleter.DecRef(pendingCommit);
+ pendingCommit = null;
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+ else if (infoStream != null)
+ {
+ Message("commit: pendingCommit == null; skip");
+ }
+
+ if (infoStream != null)
+ {
+ Message("commit: done");
+ }
+ }
+ }
+
+ /// <summary> Flush all in-memory buffered udpates (adds and deletes)
+ /// to the Directory.
+ /// </summary>
+ /// <param name="triggerMerge">if true, we may merge segments (if
+ /// deletes or docs were flushed) if necessary
+ /// </param>
+ /// <param name="flushDocStores">if false we are allowed to keep
+ /// doc stores open to share with the next segment
+ /// </param>
+ /// <param name="flushDeletes">whether pending deletes should also
+ /// be flushed
+ /// </param>
+ public /*protected internal*/ void Flush(bool triggerMerge, bool flushDocStores, bool flushDeletes)
+ {
+ // We can be called during close, when closing==true, so we must pass false to ensureOpen:
+ EnsureOpen(false);
+ if (DoFlush(flushDocStores, flushDeletes) && triggerMerge)
+ MaybeMerge();
+ }
+
+ // TODO: this method should not have to be entirely
+ // synchronized, ie, merges should be allowed to commit
+ // even while a flush is happening
+ private bool DoFlush(bool flushDocStores, bool flushDeletes)
+ {
+ lock (this)
+ {
+ try
+ {
+ try
+ {
+ return DoFlushInternal(flushDocStores, flushDeletes);
+ }
+ finally
+ {
+ if (docWriter.DoBalanceRAM())
+ {
+ docWriter.BalanceRAM();
+ }
+ }
+ }
+ finally
+ {
+ docWriter.ClearFlushPending();
+ }
+ }
+ }
+
+ // TODO: this method should not have to be entirely
+ // synchronized, ie, merges should be allowed to commit
+ // even while a flush is happening
+ private bool DoFlushInternal(bool flushDocStores, bool flushDeletes)
+ {
+ lock (this)
+ {
+ if (hitOOM)
+ {
+ throw new System.SystemException("this writer hit an OutOfMemoryError; cannot flush");
+ }
+
+ EnsureOpen(false);
+
+ System.Diagnostics.Debug.Assert(TestPoint("startDoFlush"));
+
+ DoBeforeFlush();
+
+ flushCount++;
+
+ // If we are flushing because too many deletes
+ // accumulated, then we should apply the deletes to free
+ // RAM:
+ flushDeletes |= docWriter.DoApplyDeletes();
+
+ // Make sure no threads are actively adding a document.
+ // Returns true if docWriter is currently aborting, in
+ // which case we skip flushing this segment
+ if (infoStream != null)
+ {
+ Message("flush: now pause all indexing threads");
+ }
+ if (docWriter.PauseAllThreads())
+ {
+ docWriter.ResumeAllThreads();
+ return false;
+ }
+
+ try
+ {
+
+ SegmentInfo newSegment = null;
+
+ int numDocs = docWriter.NumDocsInRAM;
+
+ // Always flush docs if there are any
+ bool flushDocs = numDocs > 0;
+
+ System.String docStoreSegment = docWriter.DocStoreSegment;
+
+ System.Diagnostics.Debug.Assert(docStoreSegment != null || numDocs == 0, "dss=" + docStoreSegment + " numDocs=" + numDocs);
+
+ if (docStoreSegment == null)
+ flushDocStores = false;
+
+ int docStoreOffset = docWriter.DocStoreOffset;
+
+ bool docStoreIsCompoundFile = false;
+
+ if (infoStream != null)
+ {
+ Message(" flush: segment=" + docWriter.Segment + " docStoreSegment=" + docWriter.DocStoreSegment + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms());
+ Message(" index before flush " + SegString());
+ }
+
+ // Check if the doc stores must be separately flushed
+ // because other segments, besides the one we are about
+ // to flush, reference it
+ if (flushDocStores && (!flushDocs || !docWriter.Segment.Equals(docWriter.DocStoreSegment)))
+ {
+ // We must separately flush the doc store
+ if (infoStream != null)
+ Message(" flush shared docStore segment " + docStoreSegment);
+
+ docStoreIsCompoundFile = FlushDocStores();
+ flushDocStores = false;
+ }
+
+ System.String segment = docWriter.Segment;
+
+ // If we are flushing docs, segment must not be null:
+ System.Diagnostics.Debug.Assert(segment != null || !flushDocs);
+
+ if (flushDocs)
+ {
+
+ bool success = false;
+ int flushedDocCount;
+
+ try
+ {
+ flushedDocCount = docWriter.Flush(flushDocStores);
+ if (infoStream != null)
+ {
+ Message("flushedFiles=" + docWriter.GetFlushedFiles());
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ if (infoStream != null)
+ Message("hit exception flushing segment " + segment);
+ deleter.Refresh(segment);
+ }
+ }
+
+ if (0 == docStoreOffset && flushDocStores)
+ {
+ // This means we are flushing private doc stores
+ // with this segment, so it will not be shared
+ // with other segments
+ System.Diagnostics.Debug.Assert(docStoreSegment != null);
+ System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment));
+ docStoreOffset = - 1;
+ docStoreIsCompoundFile = false;
+ docStoreSegment = null;
+ }
+
+ // Create new SegmentInfo, but do not add to our
+ // segmentInfos until deletes are flushed
+ // successfully.
+ newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.HasProx());
+ SetDiagnostics(newSegment, "flush");
+ }
+
+ docWriter.PushDeletes();
+
+ if (flushDocs)
+ {
+ segmentInfos.Add(newSegment);
+ Checkpoint();
+ }
+
+ if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment))
+ {
+ // Now build compound file
+ bool success = false;
+ try
+ {
+ docWriter.CreateCompoundFile(segment);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ if (infoStream != null)
+ Message("hit exception creating compound file for newly flushed segment " + segment);
+ deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ }
+ }
+
+ newSegment.SetUseCompoundFile(true);
+ Checkpoint();
+ }
+
+ if (flushDeletes)
+ {
+ ApplyDeletes();
+ }
+
+ if (flushDocs)
+ Checkpoint();
+
+ DoAfterFlush();
+
+ return flushDocs;
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "doFlush");
+ // never hit
+ return false;
+ }
+ finally
+ {
+ docWriter.ResumeAllThreads();
+ }
+ }
+ }
+
+ /// <summary>Expert: Return the total size of all index files currently cached in memory.
+ /// Useful for size management with flushRamDocs()
+ /// </summary>
+ public long RamSizeInBytes()
+ {
+ EnsureOpen();
+ return docWriter.GetRAMUsed();
+ }
+
+ /// <summary>Expert: Return the number of documents currently
+ /// buffered in RAM.
+ /// </summary>
+ public int NumRamDocs()
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ return docWriter.NumDocsInRAM;
+ }
+ }
+
+ private int EnsureContiguousMerge(MergePolicy.OneMerge merge)
+ {
+
+ int first = segmentInfos.IndexOf(merge.segments.Info(0));
+ if (first == - 1)
+ throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current index " + SegString(), directory);
+
+ int numSegments = segmentInfos.Count;
+
+ int numSegmentsToMerge = merge.segments.Count;
+ for (int i = 0; i < numSegmentsToMerge; i++)
+ {
+ SegmentInfo info = merge.segments.Info(i);
+
+ if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info))
+ {
+ if (segmentInfos.IndexOf(info) == - 1)
+ throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + SegString(), directory);
+ else
+ throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.SegString(directory) + " vs " + SegString() + "), which IndexWriter (currently) cannot handle", directory);
+ }
+ }
+
+ return first;
+ }
+
+ /// <summary>Carefully merges deletes for the segments we just
+ /// merged. This is tricky because, although merging will
+ /// clear all deletes (compacts the documents), new
+ /// deletes may have been flushed to the segments since
+ /// the merge was started. This method "carries over"
+ /// such new deletes onto the newly merged segment, and
+ /// saves the resulting deletes file (incrementing the
+ /// delete generation for merge.info). If no deletes were
+ /// flushed, no new deletes file is saved.
+ /// </summary>
+ private void CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes"));
+
+ SegmentInfos sourceSegments = merge.segments;
+
+ if (infoStream != null)
+ Message("commitMergeDeletes " + merge.SegString(directory));
+
+ // Carefully merge deletes that occurred after we
+ // started merging:
+ int docUpto = 0;
+ int delCount = 0;
+
+ for (int i = 0; i < sourceSegments.Count; i++)
+ {
+ SegmentInfo info = sourceSegments.Info(i);
+ int docCount = info.docCount;
+ SegmentReader previousReader = merge.readersClone[i];
+ SegmentReader currentReader = merge.readers[i];
+ if (previousReader.HasDeletions)
+ {
+
+ // There were deletes on this segment when the merge
+ // started. The merge has collapsed away those
+ // deletes, but, if new deletes were flushed since
+ // the merge started, we must now carefully keep any
+ // newly flushed deletes but mapping them to the new
+ // docIDs.
+
+ if (currentReader.NumDeletedDocs > previousReader.NumDeletedDocs)
+ {
+ // This means this segment has had new deletes
+ // committed since we started the merge, so we
+ // must merge them:
+ for (int j = 0; j < docCount; j++)
+ {
+ if (previousReader.IsDeleted(j))
+ {
+ System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j));
+ }
+ else
+ {
+ if (currentReader.IsDeleted(j))
+ {
+ mergeReader.DoDelete(docUpto);
+ delCount++;
+ }
+ docUpto++;
+ }
+ }
+ }
+ else
+ {
+ docUpto += docCount - previousReader.NumDeletedDocs;
+ }
+ }
+ else if (currentReader.HasDeletions)
+ {
+ // This segment had no deletes before but now it
+ // does:
+ for (int j = 0; j < docCount; j++)
+ {
+ if (currentReader.IsDeleted(j))
+ {
+ mergeReader.DoDelete(docUpto);
+ delCount++;
+ }
+ docUpto++;
+ }
+ }
+ // No deletes before or after
+ else
+ docUpto += info.docCount;
+ }
+
+ System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs == delCount);
+
+ mergeReader.hasChanges = delCount > 0;
+ }
+ }
+
+ /* FIXME if we want to support non-contiguous segment merges */
+ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge"));
+
+ if (hitOOM)
+ {
+ throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge");
+ }
+
+ if (infoStream != null)
+ Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString());
+
+ System.Diagnostics.Debug.Assert(merge.registerDone);
+
+ // If merge was explicitly aborted, or, if rollback() or
+ // rollbackTransaction() had been called since our merge
+ // started (which results in an unqualified
+ // deleter.refresh() call that will remove any index
+ // file that current segments does not reference), we
+ // abort this merge
+ if (merge.IsAborted())
+ {
+ if (infoStream != null)
+ Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted");
+
+ return false;
+ }
+
+ int start = EnsureContiguousMerge(merge);
+
+ CommitMergedDeletes(merge, mergedReader);
+ docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount);
+
+ // If the doc store we are using has been closed and
+ // is in now compound format (but wasn't when we
+ // started), then we will switch to the compound
+ // format as well:
+ SetMergeDocStoreIsCompoundFile(merge);
+
+ merge.info.HasProx = merger.HasProx();
+
+ segmentInfos.RemoveRange(start, start + merge.segments.Count - start);
+ System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info));
+ segmentInfos.Insert(start, merge.info);
+
+ CloseMergeReaders(merge, false);
+
+ // Must note the change to segmentInfos so any commits
+ // in-flight don't lose it:
+ Checkpoint();
+
+ // If the merged segments had pending changes, clear
+ // them so that they don't bother writing them to
+ // disk, updating SegmentInfo, etc.:
+ readerPool.Clear(merge.segments);
+
+ if (merge.optimize)
+ {
+ // cascade the optimize:
+ segmentsToOptimize.Add(merge.info);
+ }
+ return true;
+ }
+ }
+
+ private void HandleMergeException(System.Exception t, MergePolicy.OneMerge merge)
+ {
+
+ if (infoStream != null)
+ {
+ Message("handleMergeException: merge=" + merge.SegString(directory) + " exc=" + t);
+ }
+
+ // Set the exception on the merge, so if
+ // optimize() is waiting on us it sees the root
+ // cause exception:
+ merge.SetException(t);
+ AddMergeException(merge);
+
+ if (t is MergePolicy.MergeAbortedException)
+ {
+ // We can ignore this exception (it happens when
+ // close(false) or rollback is called), unless the
+ // merge involves segments from external directories,
+ // in which case we must throw it so, for example, the
+ // rollbackTransaction code in addIndexes* is
+ // executed.
+ if (merge.isExternal)
+ throw t;
+ }
+ else if (t is System.IO.IOException || t is System.SystemException || t is System.ApplicationException)
+ {
+ throw t;
+ }
+ else
+ {
+ // Should not get here
+ System.Diagnostics.Debug.Fail("Exception is not expected type!");
+ throw new System.SystemException(null, t);
+ }
+ }
+
+ public void Merge_ForNUnit(MergePolicy.OneMerge merge)
+ {
+ Merge(merge);
+ }
+ /// <summary> Merges the indicated segments, replacing them in the stack with a
+ /// single segment.
+ /// </summary>
+ internal void Merge(MergePolicy.OneMerge merge)
+ {
+
+ bool success = false;
+
+ try
+ {
+ try
+ {
+ try
+ {
+ MergeInit(merge);
+
+ if (infoStream != null)
+ {
+ Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString());
+ }
+
+ MergeMiddle(merge);
+ MergeSuccess(merge);
+ success = true;
+ }
+ catch (System.Exception t)
+ {
+ HandleMergeException(t, merge);
+ }
+ }
+ finally
+ {
+ lock (this)
+ {
+ MergeFinish(merge);
+
+ if (!success)
+ {
+ if (infoStream != null)
+ Message("hit exception during merge");
+ if (merge.info != null && !segmentInfos.Contains(merge.info))
+ deleter.Refresh(merge.info.name);
+ }
+
+ // This merge (and, generally, any change to the
+ // segments) may now enable new merges, so we call
+ // merge policy & update pending merges.
+ if (success && !merge.IsAborted() && !closed && !closing)
+ UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize);
+ }
+ }
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "merge");
+ }
+ }
+
+ /// <summary>Hook that's called when the specified merge is complete. </summary>
+ internal virtual void MergeSuccess(MergePolicy.OneMerge merge)
+ {
+ }
+
+ /// <summary>Checks whether this merge involves any segments
+ /// already participating in a merge. If not, this merge
+ /// is "registered", meaning we record that its segments
+ /// are now participating in a merge, and true is
+ /// returned. Else (the merge conflicts) false is
+ /// returned.
+ /// </summary>
+ internal bool RegisterMerge(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+
+ if (merge.registerDone)
+ return true;
+
+ if (stopMerges)
+ {
+ merge.Abort();
+ throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.SegString(directory));
+ }
+
+ int count = merge.segments.Count;
+ bool isExternal = false;
+ for (int i = 0; i < count; i++)
+ {
+ SegmentInfo info = merge.segments.Info(i);
+ if (mergingSegments.Contains(info))
+ {
+ return false;
+ }
+ if (segmentInfos.IndexOf(info) == -1)
+ {
+ return false;
+ }
+ if (info.dir != directory)
+ {
+ isExternal = true;
+ }
+ if (segmentsToOptimize.Contains(info))
+ {
+ merge.optimize = true;
+ merge.maxNumSegmentsOptimize = optimizeMaxNumSegments;
+ }
+ }
+
+ EnsureContiguousMerge(merge);
+
+ pendingMerges.AddLast(merge);
+
+ if (infoStream != null)
+ Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]");
+
+ merge.mergeGen = mergeGen;
+ merge.isExternal = isExternal;
+
+ // OK it does not conflict; now record that this merge
+ // is running (while synchronized) to avoid race
+ // condition where two conflicting merges from different
+ // threads, start
+ for (int i = 0; i < count; i++)
+ {
+ SegmentInfo si = merge.segments.Info(i);
+ mergingSegments.Add(si);
+ }
+
+ // Merge is now registered
+ merge.registerDone = true;
+ return true;
+ }
+ }
+
+ /// <summary>Does initial setup for a merge, which is fast but holds
+ /// the synchronized lock on IndexWriter instance.
+ /// </summary>
+ internal void MergeInit(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+ bool success = false;
+ try
+ {
+ _MergeInit(merge);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ MergeFinish(merge);
+ }
+ }
+ }
+ }
+
+ private void _MergeInit(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(TestPoint("startMergeInit"));
+
+ System.Diagnostics.Debug.Assert(merge.registerDone);
+ System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0);
+
+ if (hitOOM)
+ {
+ throw new System.SystemException("this writer hit an OutOfMemoryError; cannot merge");
+ }
+
+ if (merge.info != null)
+ // mergeInit already done
+ return ;
+
+ if (merge.IsAborted())
+ return ;
+
+ ApplyDeletes();
+
+ SegmentInfos sourceSegments = merge.segments;
+ int end = sourceSegments.Count;
+
+ // Check whether this merge will allow us to skip
+ // merging the doc stores (stored field & vectors).
+ // This is a very substantial optimization (saves tons
+ // of IO).
+
+ Directory lastDir = directory;
+ System.String lastDocStoreSegment = null;
+ int next = - 1;
+
+ bool mergeDocStores = false;
+ bool doFlushDocStore = false;
+ System.String currentDocStoreSegment = docWriter.DocStoreSegment;
+
+ // Test each segment to be merged: check if we need to
+ // flush/merge doc stores
+ for (int i = 0; i < end; i++)
+ {
+ SegmentInfo si = sourceSegments.Info(i);
+
+ // If it has deletions we must merge the doc stores
+ if (si.HasDeletions())
+ mergeDocStores = true;
+
+ // If it has its own (private) doc stores we must
+ // merge the doc stores
+ if (- 1 == si.DocStoreOffset)
+ mergeDocStores = true;
+
+ // If it has a different doc store segment than
+ // previous segments, we must merge the doc stores
+ System.String docStoreSegment = si.DocStoreSegment;
+ if (docStoreSegment == null)
+ mergeDocStores = true;
+ else if (lastDocStoreSegment == null)
+ lastDocStoreSegment = docStoreSegment;
+ else if (!lastDocStoreSegment.Equals(docStoreSegment))
+ mergeDocStores = true;
+
+ // Segments' docScoreOffsets must be in-order,
+ // contiguous. For the default merge policy now
+ // this will always be the case but for an arbitrary
+ // merge policy this may not be the case
+ if (- 1 == next)
+ next = si.DocStoreOffset + si.docCount;
+ else if (next != si.DocStoreOffset)
+ mergeDocStores = true;
+ else
+ next = si.DocStoreOffset + si.docCount;
+
+ // If the segment comes from a different directory
+ // we must merge
+ if (lastDir != si.dir)
+ mergeDocStores = true;
+
+ // If the segment is referencing the current "live"
+ // doc store outputs then we must merge
+ if (si.DocStoreOffset != - 1 && currentDocStoreSegment != null && si.DocStoreSegment.Equals(currentDocStoreSegment))
+ {
+ doFlushDocStore = true;
+ }
+ }
+
+ // if a mergedSegmentWarmer is installed, we must merge
+ // the doc stores because we will open a full
+ // SegmentReader on the merged segment:
+ if (!mergeDocStores && mergedSegmentWarmer != null && currentDocStoreSegment != null && lastDocStoreSegment != null && lastDocStoreSegment.Equals(currentDocStoreSegment))
+ {
+ mergeDocStores = true;
+ }
+
+ int docStoreOffset;
+ System.String docStoreSegment2;
+ bool docStoreIsCompoundFile;
+
+ if (mergeDocStores)
+ {
+ docStoreOffset = - 1;
+ docStoreSegment2 = null;
+ docStoreIsCompoundFile = false;
+ }
+ else
+ {
+ SegmentInfo si = sourceSegments.Info(0);
+ docStoreOffset = si.DocStoreOffset;
+ docStoreSegment2 = si.DocStoreSegment;
+ docStoreIsCompoundFile = si.DocStoreIsCompoundFile;
+ }
+
+ if (mergeDocStores && doFlushDocStore)
+ {
+ // SegmentMerger intends to merge the doc stores
+ // (stored fields, vectors), and at least one of the
+ // segments to be merged refers to the currently
+ // live doc stores.
+
+ // TODO: if we know we are about to merge away these
+ // newly flushed doc store files then we should not
+ // make compound file out of them...
+ if (infoStream != null)
+ Message("now flush at merge");
+ DoFlush(true, false);
+ }
+
+ merge.mergeDocStores = mergeDocStores;
+
+ // Bind a new segment name here so even with
+ // ConcurrentMergePolicy we keep deterministic segment
+ // names.
+ merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile, false);
+
+
+ IDictionary<string, string> details = new Dictionary<string, string>();
+ details["optimize"] = merge.optimize + "";
+ details["mergeFactor"] = end + "";
+ details["mergeDocStores"] = mergeDocStores + "";
+ SetDiagnostics(merge.info, "merge", details);
+
+ // Also enroll the merged segment into mergingSegments;
+ // this prevents it from getting selected for a merge
+ // after our merge is done but while we are building the
+ // CFS:
+ mergingSegments.Add(merge.info);
+ }
+ }
+
+ private void SetDiagnostics(SegmentInfo info, System.String source)
+ {
+ SetDiagnostics(info, source, null);
+ }
+
+ private void SetDiagnostics(SegmentInfo info, System.String source, IDictionary<string, string> details)
+ {
+ IDictionary<string, string> diagnostics = new Dictionary<string,string>();
+ diagnostics["source"] = source;
+ diagnostics["lucene.version"] = Constants.LUCENE_VERSION;
+ diagnostics["os"] = Constants.OS_NAME + "";
+ diagnostics["os.arch"] = Constants.OS_ARCH + "";
+ diagnostics["os.version"] = Constants.OS_VERSION + "";
+ diagnostics["java.version"] = Constants.JAVA_VERSION + "";
+ diagnostics["java.vendor"] = Constants.JAVA_VENDOR + "";
+ if (details != null)
+ {
+ //System.Collections.ArrayList keys = new System.Collections.ArrayList(details.Keys);
+ //System.Collections.ArrayList values = new System.Collections.ArrayList(details.Values);
+ foreach (string key in details.Keys)
+ {
+ diagnostics[key] = details[key];
+ }
+ }
+ info.Diagnostics = diagnostics;
+ }
+
+ /// <summary>Does fininishing for a merge, which is fast but holds
+ /// the synchronized lock on IndexWriter instance.
+ /// </summary>
+ internal void MergeFinish(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+
+ // Optimize, addIndexes or finishMerges may be waiting
+ // on merges to finish.
+ System.Threading.Monitor.PulseAll(this);
+
+ // It's possible we are called twice, eg if there was an
+ // exception inside mergeInit
+ if (merge.registerDone)
+ {
+ SegmentInfos sourceSegments = merge.segments;
+ int end = sourceSegments.Count;
+ for (int i = 0; i < end; i++)
+ mergingSegments.Remove(sourceSegments.Info(i));
+ if(merge.info != null)
+ mergingSegments.Remove(merge.info);
+ merge.registerDone = false;
+ }
+
+ runningMerges.Remove(merge);
+ }
+ }
+
+ private void SetMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+ string mergeDocStoreSegment = merge.info.DocStoreSegment;
+ if (mergeDocStoreSegment != null && !merge.info.DocStoreIsCompoundFile)
+ {
+ int size = segmentInfos.Count;
+ for (int i = 0; i < size; i++)
+ {
+ SegmentInfo info = segmentInfos.Info(i);
+ string docStoreSegment = info.DocStoreSegment;
+ if (docStoreSegment != null &&
+ docStoreSegment.Equals(mergeDocStoreSegment) &&
+ info.DocStoreIsCompoundFile)
+ {
+ merge.info.DocStoreIsCompoundFile = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ private void CloseMergeReaders(MergePolicy.OneMerge merge, bool suppressExceptions)
+ {
+ lock (this)
+ {
+ int numSegments = merge.segments.Count;
+ if (suppressExceptions)
+ {
+ // Suppress any new exceptions so we throw the
+ // original cause
+ for (int i = 0; i < numSegments; i++)
+ {
+ if (merge.readers[i] != null)
+ {
+ try
+ {
+ readerPool.Release(merge.readers[i], false);
+ }
+ catch (Exception)
+ {
+ }
+ merge.readers[i] = null;
+ }
+
+ if (merge.readersClone[i] != null)
+ {
+ try
+ {
+ merge.readersClone[i].Close();
+ }
+ catch (Exception)
+ {
+ }
+ // This was a private clone and we had the
+ // only reference
+ System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0); //: "refCount should be 0 but is " + merge.readersClone[i].getRefCount();
+ merge.readersClone[i] = null;
+ }
+ }
+ }
+ else
+ {
+ for (int i = 0; i < numSegments; i++)
+ {
+ if (merge.readers[i] != null)
+ {
+ readerPool.Release(merge.readers[i], true);
+ merge.readers[i] = null;
+ }
+
+ if (merge.readersClone[i] != null)
+ {
+ merge.readersClone[i].Close();
+ // This was a private clone and we had the only reference
+ System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0);
+ merge.readersClone[i] = null;
+ }
+ }
+ }
+ }
+ }
+
+
+ /// <summary>Does the actual (time-consuming) work of the merge,
+ /// but without holding synchronized lock on IndexWriter
+ /// instance
+ /// </summary>
+ private int MergeMiddle(MergePolicy.OneMerge merge)
+ {
+
+ merge.CheckAborted(directory);
+
+ System.String mergedName = merge.info.name;
+
+ SegmentMerger merger = null;
+
+ int mergedDocCount = 0;
+
+ SegmentInfos sourceSegments = merge.segments;
+ int numSegments = sourceSegments.Count;
+
+ if (infoStream != null)
+ Message("merging " + merge.SegString(directory));
+
+ merger = new SegmentMerger(this, mergedName, merge);
+
+ merge.readers = new SegmentReader[numSegments];
+ merge.readersClone = new SegmentReader[numSegments];
+
+ bool mergeDocStores = false;
+
+ String currentDocStoreSegment;
+ lock(this) {
+ currentDocStoreSegment = docWriter.DocStoreSegment;
+ }
+ bool currentDSSMerged = false;
+
+ // This is try/finally to make sure merger's readers are
+ // closed:
+ bool success = false;
+ try
+ {
+ int totDocCount = 0;
+
+ for (int i = 0; i < numSegments; i++)
+ {
+
+ SegmentInfo info = sourceSegments.Info(i);
+
+ // Hold onto the "live" reader; we will use this to
+ // commit merged deletes
+ SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1);
+
+ // We clone the segment readers because other
+ // deletes may come in while we're merging so we
+ // need readers that will not change
+ SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true);
+ merger.Add(clone);
+
+ if (clone.HasDeletions)
+ {
+ mergeDocStores = true;
+ }
+
+ if (info.DocStoreOffset != -1 && currentDocStoreSegment != null)
+ {
+ currentDSSMerged |= currentDocStoreSegment.Equals(info.DocStoreSegment);
+ }
+
+ totDocCount += clone.NumDocs();
+ }
+
+ if (infoStream != null)
+ {
+ Message("merge: total " + totDocCount + " docs");
+ }
+
+ merge.CheckAborted(directory);
+
+ // If deletions have arrived and it has now become
+ // necessary to merge doc stores, go and open them:
+ if (mergeDocStores && !merge.mergeDocStores)
+ {
+ merge.mergeDocStores = true;
+ lock (this)
+ {
+ if (currentDSSMerged)
+ {
+ if (infoStream != null)
+ {
+ Message("now flush at mergeMiddle");
+ }
+ DoFlush(true, false);
+ }
+ }
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ merge.readersClone[i].OpenDocStores();
+ }
+
+ // Clear DSS
+ merge.info.SetDocStore(-1, null, false);
+
+ }
+
+ // This is where all the work happens:
+ mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores);
+
+ System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount);
+
+ if (merge.useCompoundFile)
+ {
+
+ success = false;
+ string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION);
+
+ try
+ {
+ if (infoStream != null)
+ {
+ Message("create compound file " + compoundFileName);
+ }
+ merger.CreateCompoundFile(compoundFileName);
+ success = true;
+ }
+ catch (System.IO.IOException ioe)
+ {
+ lock (this)
+ {
+ if (merge.IsAborted())
+ {
+ // This can happen if rollback or close(false)
+ // is called -- fall through to logic below to
+ // remove the partially created CFS:
+ }
+ else
+ {
+ HandleMergeException(ioe, merge);
+ }
+ }
+ }
+ catch (Exception t)
+ {
+ HandleMergeException(t, merge);
+ }
+ finally
+ {
+ if (!success)
+ {
+ if (infoStream != null)
+ {
+ Message("hit exception creating compound file during merge");
+ }
+
+ lock (this)
+ {
+ deleter.DeleteFile(compoundFileName);
+ deleter.DeleteNewFiles(merger.GetMergedFiles());
+ }
+ }
+ }
+
+ success = false;
+
+ lock (this)
+ {
+
+ // delete new non cfs files directly: they were never
+ // registered with IFD
+ deleter.DeleteNewFiles(merger.GetMergedFiles());
+
+ if (merge.IsAborted())
+ {
+ if (infoStream != null)
+ {
+ Message("abort merge after building CFS");
+ }
+ deleter.DeleteFile(compoundFileName);
+ return 0;
+ }
+ }
+
+ merge.info.SetUseCompoundFile(true);
+ }
+
+ int termsIndexDivisor;
+ bool loadDocStores;
+
+ // if the merged segment warmer was not installed when
+ // this merge was started, causing us to not force
+ // the docStores to close, we can't warm it now
+ bool canWarm = merge.info.DocStoreSegment == null || currentDocStoreSegment == null || !merge.info.DocStoreSegment.Equals(currentDocStoreSegment);
+
+ if (poolReaders && mergedSegmentWarmer != null && canWarm)
+ {
+ // Load terms index & doc stores so the segment
+ // warmer can run searches, load documents/term
+ // vectors
+ termsIndexDivisor = readerTermsIndexDivisor;
+ loadDocStores = true;
+ }
+ else
+ {
+ termsIndexDivisor = -1;
+ loadDocStores = false;
+ }
+
+ // TODO: in the non-realtime case, we may want to only
+ // keep deletes (it's costly to open entire reader
+ // when we just need deletes)
+
+ SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);
+ try
+ {
+ if (poolReaders && mergedSegmentWarmer != null)
+ {
+ mergedSegmentWarmer.Warm(mergedReader);
+ }
+ if (!CommitMerge(merge, merger, mergedDocCount, mergedReader))
+ {
+ // commitMerge will return false if this merge was aborted
+ return 0;
+ }
+ }
+ finally
+ {
+ lock (this)
+ {
+ readerPool.Release(mergedReader);
+ }
+ }
+
+ success = true;
+ }
+ finally
+ {
+ // Readers are already closed in commitMerge if we didn't hit
+ // an exc:
+ if (!success)
+ {
+ CloseMergeReaders(merge, true);
+ }
+ }
+
+ return mergedDocCount;
+ }
+
+ internal virtual void AddMergeException(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(merge.GetException() != null);
+ if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen)
+ mergeExceptions.Add(merge);
+ }
+ }
+
+ // Apply buffered deletes to all segments.
+ private bool ApplyDeletes()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(TestPoint("startApplyDeletes"));
+ flushDeletesCount++;
+
+ bool success = false;
+ bool changed;
+ try
+ {
+ changed = docWriter.ApplyDeletes(segmentInfos);
+ success = true;
+ }
+ finally
+ {
+ if (!success && infoStream != null)
+ {
+ Message("hit exception flushing deletes");
+ }
+ }
+
+ if (changed)
+ Checkpoint();
+ return changed;
+ }
+ }
+
+ // For test purposes.
+ internal int GetBufferedDeleteTermsSize()
+ {
+ lock (this)
+ {
+ return docWriter.GetBufferedDeleteTerms().Count;
+ }
+ }
+
+ // For test purposes.
+ internal int GetNumBufferedDeleteTerms()
+ {
+ lock (this)
+ {
+ return docWriter.GetNumBufferedDeleteTerms();
+ }
+ }
+
+ // utility routines for tests
+ public /*internal*/ virtual SegmentInfo NewestSegment()
+ {
+ return segmentInfos.Count > 0 ? segmentInfos.Info(segmentInfos.Count - 1) : null;
+ }
+
+ public virtual System.String SegString()
+ {
+ lock (this)
+ {
+ return SegString(segmentInfos);
+ }
+ }
+
+ private System.String SegString(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ int count = infos.Count;
+ for (int i = 0; i < count; i++)
+ {
+ if (i > 0)
+ {
+ buffer.Append(' ');
+ }
+ SegmentInfo info = infos.Info(i);
+ buffer.Append(info.SegString(directory));
+ if (info.dir != directory)
+ buffer.Append("**");
+ }
+ return buffer.ToString();
+ }
+ }
+
+ // Files that have been sync'd already
+ private HashSet<string> synced = new HashSet<string>();
+
+ // Files that are now being sync'd
+ private HashSet<string> syncing = new HashSet<string>();
+
+ private bool StartSync(System.String fileName, ICollection<string> pending)
+ {
+ lock (synced)
+ {
+ if (!synced.Contains(fileName))
+ {
+ if (!syncing.Contains(fileName))
+ {
+ syncing.Add(fileName);
+ return true;
+ }
+ else
+ {
+ pending.Add(fileName);
+ return false;
+ }
+ }
+ else
+ return false;
+ }
+ }
+
+ private void FinishSync(System.String fileName, bool success)
+ {
+ lock (synced)
+ {
+ System.Diagnostics.Debug.Assert(syncing.Contains(fileName));
+ syncing.Remove(fileName);
+ if (success)
+ synced.Add(fileName);
+ System.Threading.Monitor.PulseAll(synced);
+ }
+ }
+
+ /// <summary>Blocks until all files in syncing are sync'd </summary>
+ private bool WaitForAllSynced(ICollection<System.String> syncing)
+ {
+ lock (synced)
+ {
+ IEnumerator<string> it = syncing.GetEnumerator();
+ while (it.MoveNext())
+ {
+ System.String fileName = it.Current;
+ while (!synced.Contains(fileName))
+ {
+ if (!syncing.Contains(fileName))
+ // There was an error because a file that was
+ // previously syncing failed to appear in synced
+ return false;
+ else
+ System.Threading.Monitor.Wait(synced);
+
+ }
+ }
+ return true;
+ }
+ }
+
+ private void DoWait()
+ {
+ lock (this)
+ {
+ // NOTE: the callers of this method should in theory
+ // be able to do simply wait(), but, as a defense
+ // against thread timing hazards where notifyAll()
+ // falls to be called, we wait for at most 1 second
+ // and then return so caller can check if wait
+ // conditions are satisified:
+ System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(1000));
+
+ }
+ }
+
+ /// <summary>Walk through all files referenced by the current
+ /// segmentInfos and ask the Directory to sync each file,
+ /// if it wasn't already. If that succeeds, then we
+ /// prepare a new segments_N file but do not fully commit
+ /// it.
+ /// </summary>
+ private void StartCommit(long sizeInBytes, IDictionary<string, string> commitUserData)
+ {
+
+ System.Diagnostics.Debug.Assert(TestPoint("startStartCommit"));
+
+ // TODO: as of LUCENE-2095, we can simplify this method,
+ // since only 1 thread can be in here at once
+
+ if (hitOOM)
+ {
+ throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit");
+ }
+
+ try
+ {
+
+ if (infoStream != null)
+ Message("startCommit(): start sizeInBytes=" + sizeInBytes);
+
+ SegmentInfos toSync = null;
+ long myChangeCount;
+
+ lock (this)
+ {
+ // Wait for any running addIndexes to complete
+ // first, then block any from running until we've
+ // copied the segmentInfos we intend to sync:
+ BlockAddIndexes(false);
+
+ // On commit the segmentInfos must never
+ // reference a segment in another directory:
+ System.Diagnostics.Debug.Assert(!HasExternalSegments());
+
+ try
+ {
+
+ System.Diagnostics.Debug.Assert(lastCommitChangeCount <= changeCount);
+ myChangeCount = changeCount;
+
+ if (changeCount == lastCommitChangeCount)
+ {
+ if (infoStream != null)
+ Message(" skip startCommit(): no changes pending");
+ return ;
+ }
+
+ // First, we clone & incref the segmentInfos we intend
+ // to sync, then, without locking, we sync() each file
+ // referenced by toSync, in the background. Multiple
+ // threads can be doing this at once, if say a large
+ // merge and a small merge finish at the same time:
+
+ if (infoStream != null)
+ Message("startCommit index=" + SegString(segmentInfos) + " changeCount=" + changeCount);
+
+ readerPool.Commit();
+
+ // It's possible another flush (that did not close
+ // the open do stores) snuck in after the flush we
+ // just did, so we remove any tail segments
+ // referencing the open doc store from the
+ // SegmentInfos we are about to sync (the main
+ // SegmentInfos will keep them):
+ toSync = (SegmentInfos) segmentInfos.Clone();
+ string dss = docWriter.DocStoreSegment;
+ if (dss != null)
+ {
+ while (true)
+ {
+ String dss2 = toSync.Info(toSync.Count - 1).DocStoreSegment;
+ if (dss2 == null || !dss2.Equals(dss))
+ {
+ break;
+ }
+ toSync.RemoveAt(toSync.Count - 1);
+ changeCount++;
+ }
+ }
+
+ if (commitUserData != null)
+ toSync.UserData = commitUserData;
+
+ deleter.IncRef(toSync, false);
+
+ ICollection<string> files = toSync.Files(directory, false);
+ foreach(string fileName in files)
+ {
+ System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file " + fileName + " does not exist");
+ // If this trips it means we are missing a call to
+ // .checkpoint somewhere, because by the time we
+ // are called, deleter should know about every
+ // file referenced by the current head
+ // segmentInfos:
+ System.Diagnostics.Debug.Assert(deleter.Exists(fileName));
+ }
+ }
+ finally
+ {
+ ResumeAddIndexes();
+ }
+ }
+
+ System.Diagnostics.Debug.Assert(TestPoint("midStartCommit"));
+
+ bool setPending = false;
+
+ try
+ {
+ // Loop until all files toSync references are sync'd:
+ while (true)
+ {
+ ICollection<string> pending = new List<string>();
+
+ IEnumerator<string> it = toSync.Files(directory, false).GetEnumerator();
+ while (it.MoveNext())
+ {
+ string fileName = it.Current;
+ if (StartSync(fileName, pending))
+ {
+ bool success = false;
+ try
+ {
+ // Because we incRef'd this commit point, above,
+ // the file had better exist:
+ System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file '" + fileName + "' does not exist dir=" + directory);
+ if (infoStream != null)
+ Message("now sync " + fileName);
+ directory.Sync(fileName);
+ success = true;
+ }
+ finally
+ {
+ FinishSync(fileName, success);
+ }
+ }
+ }
+
+ // All files that I require are either synced or being
+ // synced by other threads. If they are being synced,
+ // we must at this point block until they are done.
+ // If this returns false, that means an error in
+ // another thread resulted in failing to actually
+ // sync one of our files, so we repeat:
+ if (WaitForAllSynced(pending))
+ break;
+ }
+
+ System.Diagnostics.Debug.Assert(TestPoint("midStartCommit2"));
+
+ lock (this)
+ {
+ // If someone saved a newer version of segments file
+ // since I first started syncing my version, I can
+ // safely skip saving myself since I've been
+ // superseded:
+
+ while (true)
+ {
+ if (myChangeCount <= lastCommitChangeCount)
+ {
+ if (infoStream != null)
+ {
+ Message("sync superseded by newer infos");
+ }
+ break;
+ }
+ else if (pendingCommit == null)
+ {
+ // My turn to commit
+
+ if (segmentInfos.Generation > toSync.Generation)
+ toSync.UpdateGeneration(segmentInfos);
+
+ bool success = false;
+ try
+ {
+
+ // Exception here means nothing is prepared
+ // (this method unwinds everything it did on
+ // an exception)
+ try
+ {
+ toSync.PrepareCommit(directory);
+ }
+ finally
+ {
+ // Have our master segmentInfos record the
+ // generations we just prepared. We do this
+ // on error or success so we don't
+ // double-write a segments_N file.
+ segmentInfos.UpdateGeneration(toSync);
+ }
+
+ System.Diagnostics.Debug.Assert(pendingCommit == null);
+ setPending = true;
+ pendingCommit = toSync;
+ pendingCommitChangeCount = (uint) myChangeCount;
+ success = true;
+ }
+ finally
+ {
+ if (!success && infoStream != null)
+ Message("hit exception committing segments file");
+ }
+ break;
+ }
+ else
+ {
+ // Must wait for other commit to complete
+ DoWait();
+ }
+ }
+ }
+
+ if (infoStream != null)
+ Message("done all syncs");
+
+ System.Diagnostics.Debug.Assert(TestPoint("midStartCommitSuccess"));
+ }
+ finally
+ {
+ lock (this)
+ {
+ if (!setPending)
+ deleter.DecRef(toSync);
+ }
+ }
+ }
+ catch (System.OutOfMemoryException oom)
+ {
+ HandleOOM(oom, "startCommit");
+ }
+ System.Diagnostics.Debug.Assert(TestPoint("finishStartCommit"));
+ }
+
+ /// <summary> Returns <c>true</c> iff the index in the named directory is
+ /// currently locked.
+ /// </summary>
+ /// <param name="directory">the directory to check for a lock
+ /// </param>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public static bool IsLocked(Directory directory)
+ {
+ return directory.MakeLock(WRITE_LOCK_NAME).IsLocked();
+ }
+
+ /// <summary> Forcibly unlocks the index in the named directory.
+ /// <p/>
+ /// Caution: this should only be used by failure recovery code,
+ /// when it is known that no other process nor thread is in fact
+ /// currently accessing this index.
+ /// </summary>
+ public static void Unlock(Directory directory)
+ {
+ directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).Release();
+ }
+
+ /// <summary> Specifies maximum field length (in number of tokens/terms) in <see cref="IndexWriter" /> constructors.
+ /// <see cref="SetMaxFieldLength(int)" /> overrides the value set by
+ /// the constructor.
+ /// </summary>
+ public sealed class MaxFieldLength
+ {
+
+ private int limit;
+ private System.String name;
+
+ /// <summary> Private type-safe-enum-pattern constructor.
+ ///
+ /// </summary>
+ /// <param name="name">instance name
+ /// </param>
+ /// <param name="limit">maximum field length
+ /// </param>
+ internal MaxFieldLength(System.String name, int limit)
+ {
+ this.name = name;
+ this.limit = limit;
+ }
+
+ /// <summary> Public constructor to allow users to specify the maximum field size limit.
+ ///
+ /// </summary>
+ /// <param name="limit">The maximum field length
+ /// </param>
+ public MaxFieldLength(int limit):this("User-specified", limit)
+ {
+ }
+
+ public int Limit
+ {
+ get { return limit; }
+ }
+
+ public override System.String ToString()
+ {
+ return name + ":" + limit;
+ }
+
+ /// <summary>Sets the maximum field length to <see cref="int.MaxValue" />. </summary>
+ public static readonly MaxFieldLength UNLIMITED = new MaxFieldLength("UNLIMITED", System.Int32.MaxValue);
+
+ /// <summary> Sets the maximum field length to
+ /// <see cref="DEFAULT_MAX_FIELD_LENGTH" />
+ ///
+ /// </summary>
+ public static readonly MaxFieldLength LIMITED;
+ static MaxFieldLength()
+ {
+ LIMITED = new MaxFieldLength("LIMITED", Lucene.Net.Index.IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
+ }
+ }
+
+ /// <summary>If <see cref="GetReader()" /> has been called (ie, this writer
+ /// is in near real-time mode), then after a merge
+ /// completes, this class can be invoked to warm the
+ /// reader on the newly merged segment, before the merge
+ /// commits. This is not required for near real-time
+ /// search, but will reduce search latency on opening a
+ /// new near real-time reader after a merge completes.
+ ///
+ /// <p/><b>NOTE:</b> This API is experimental and might
+ /// change in incompatible ways in the next release.<p/>
+ ///
+ /// <p/><b>NOTE</b>: warm is called before any deletes have
+ /// been carried over to the merged segment.
+ /// </summary>
+ public abstract class IndexReaderWarmer
+ {
+ public abstract void Warm(IndexReader reader);
+ }
+
+ private IndexReaderWarmer mergedSegmentWarmer;
+
+ /// <summary>Gets or sets the merged segment warmer. See <see cref="IndexReaderWarmer" />
+ ///.
+ /// </summary>
+ public virtual IndexReaderWarmer MergedSegmentWarmer
+ {
+ set { mergedSegmentWarmer = value; }
+ get { return mergedSegmentWarmer; }
+ }
+
+ private void HandleOOM(System.OutOfMemoryException oom, System.String location)
+ {
+ if (infoStream != null)
+ {
+ Message("hit OutOfMemoryError inside " + location);
+ }
+ hitOOM = true;
+ throw oom;
+ }
+
+ // Used only by assert for testing. Current points:
+ // startDoFlush
+ // startCommitMerge
+ // startStartCommit
+ // midStartCommit
+ // midStartCommit2
+ // midStartCommitSuccess
+ // finishStartCommit
+ // startCommitMergeDeletes
+ // startMergeInit
+ // startApplyDeletes
+ // DocumentsWriter.ThreadState.init start
+ public /*internal*/ virtual bool TestPoint(System.String name)
+ {
+ return true;
+ }
+
+ internal virtual bool NrtIsCurrent(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ if (!infos.Equals(segmentInfos))
+ {
+ // if any structural changes (new segments), we are
+ // stale
+ return false;
+ }
+ else if (infos.Generation != segmentInfos.Generation)
+ {
+ // if any commit took place since we were opened, we
+ // are stale
+ return false;
+ }
+ else
+ {
+ return !docWriter.AnyChanges;
+ }
+ }
+ }
+
+ internal virtual bool IsClosed()
+ {
+ lock (this)
+ {
+ return closed;
+ }
+ }
+
+ static IndexWriter()
+ {
+ MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/IntBlockPool.cs b/src/core/Index/IntBlockPool.cs
new file mode 100644
index 0000000..5fbee30
--- /dev/null
+++ b/src/core/Index/IntBlockPool.cs
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class IntBlockPool
+ {
+ private void InitBlock()
+ {
+ intUpto = DocumentsWriter.INT_BLOCK_SIZE;
+ }
+
+ public int[][] buffers = new int[10][];
+
+ internal int bufferUpto = - 1; // Which buffer we are upto
+ public int intUpto; // Where we are in head buffer
+
+ public int[] buffer; // Current head buffer
+ public int intOffset = - DocumentsWriter.INT_BLOCK_SIZE; // Current head offset
+
+ private DocumentsWriter docWriter;
+ internal bool trackAllocations;
+
+ public IntBlockPool(DocumentsWriter docWriter, bool trackAllocations)
+ {
+ InitBlock();
+ this.docWriter = docWriter;
+ this.trackAllocations = trackAllocations;
+ }
+
+ public void Reset()
+ {
+ if (bufferUpto != - 1)
+ {
+ if (bufferUpto > 0)
+ // Recycle all but the first buffer
+ docWriter.RecycleIntBlocks(buffers, 1, 1 + bufferUpto);
+
+ // Reuse first buffer
+ bufferUpto = 0;
+ intUpto = 0;
+ intOffset = 0;
+ buffer = buffers[0];
+ }
+ }
+
+ public void NextBuffer()
+ {
+ if (1 + bufferUpto == buffers.Length)
+ {
+ int[][] newBuffers = new int[(int) (buffers.Length * 1.5)][];
+ Array.Copy(buffers, 0, newBuffers, 0, buffers.Length);
+ buffers = newBuffers;
+ }
+ buffer = buffers[1 + bufferUpto] = docWriter.GetIntBlock(trackAllocations);
+ bufferUpto++;
+
+ intUpto = 0;
+ intOffset += DocumentsWriter.INT_BLOCK_SIZE;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/InvertedDocConsumer.cs b/src/core/Index/InvertedDocConsumer.cs
new file mode 100644
index 0000000..bb9b2f8
--- /dev/null
+++ b/src/core/Index/InvertedDocConsumer.cs
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class InvertedDocConsumer
+ {
+
+ /// <summary>Add a new thread </summary>
+ internal abstract InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread);
+
+ /// <summary>Abort (called after hitting AbortException) </summary>
+ public abstract void Abort();
+
+ /// <summary>Flush a new segment </summary>
+ internal abstract void Flush(
+ IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields,
+ SegmentWriteState state);
+
+ /// <summary>Close doc stores </summary>
+ internal abstract void CloseDocStore(SegmentWriteState state);
+
+ /// <summary>Attempt to free RAM, returning true if any RAM was
+ /// freed
+ /// </summary>
+ public abstract bool FreeRAM();
+
+ internal FieldInfos fieldInfos;
+
+ internal virtual void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ this.fieldInfos = fieldInfos;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/InvertedDocConsumerPerField.cs b/src/core/Index/InvertedDocConsumerPerField.cs
new file mode 100644
index 0000000..471d9b7
--- /dev/null
+++ b/src/core/Index/InvertedDocConsumerPerField.cs
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class InvertedDocConsumerPerField
+ {
+
+ // Called once per field, and is given all Fieldable
+ // occurrences for this field in the document. Return
+ // true if you wish to see inverted tokens for these
+ // fields:
+ internal abstract bool Start(IFieldable[] fields, int count);
+
+ // Called before a field instance is being processed
+ internal abstract void Start(IFieldable field);
+
+ // Called once per inverted token
+ internal abstract void Add();
+
+ // Called once per field per document, after all Fieldable
+ // occurrences are inverted
+ internal abstract void Finish();
+
+ // Called on hitting an aborting exception
+ public abstract void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/InvertedDocConsumerPerThread.cs b/src/core/Index/InvertedDocConsumerPerThread.cs
new file mode 100644
index 0000000..49ed8df
--- /dev/null
+++ b/src/core/Index/InvertedDocConsumerPerThread.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class InvertedDocConsumerPerThread
+ {
+ public abstract void StartDocument();
+ internal abstract InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+ public abstract DocumentsWriter.DocWriter FinishDocument();
+ public abstract void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/InvertedDocEndConsumer.cs b/src/core/Index/InvertedDocEndConsumer.cs
new file mode 100644
index 0000000..fb0a69e
--- /dev/null
+++ b/src/core/Index/InvertedDocEndConsumer.cs
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class InvertedDocEndConsumer
+ {
+ public abstract InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread);
+ public abstract void Flush(IDictionary<InvertedDocEndConsumerPerThread, ICollection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state);
+ internal abstract void CloseDocStore(SegmentWriteState state);
+ public abstract void Abort();
+ internal abstract void SetFieldInfos(FieldInfos fieldInfos);
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/InvertedDocEndConsumerPerField.cs b/src/core/Index/InvertedDocEndConsumerPerField.cs
new file mode 100644
index 0000000..dfad1c9
--- /dev/null
+++ b/src/core/Index/InvertedDocEndConsumerPerField.cs
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class InvertedDocEndConsumerPerField
+ {
+ internal abstract void Finish();
+ internal abstract void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/InvertedDocEndConsumerPerThread.cs b/src/core/Index/InvertedDocEndConsumerPerThread.cs
new file mode 100644
index 0000000..2f4fb5c
--- /dev/null
+++ b/src/core/Index/InvertedDocEndConsumerPerThread.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class InvertedDocEndConsumerPerThread
+ {
+ internal abstract void StartDocument();
+ internal abstract InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+ internal abstract void FinishDocument();
+ internal abstract void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs b/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs
new file mode 100644
index 0000000..3775de1
--- /dev/null
+++ b/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This <see cref="IndexDeletionPolicy" /> implementation that
+ /// keeps only the most recent commit and immediately removes
+ /// all prior commits after a new commit is done. This is
+ /// the default deletion policy.
+ /// </summary>
+
+ public sealed class KeepOnlyLastCommitDeletionPolicy : IndexDeletionPolicy
+ {
+
+ /// <summary> Deletes all commits except the most recent one.</summary>
+ public void OnInit<T>(IList<T> commits) where T : IndexCommit
+ {
+ // Note that commits.size() should normally be 1:
+ OnCommit(commits);
+ }
+
+ /// <summary> Deletes all commits except the most recent one.</summary>
+ public void OnCommit<T>(IList<T> commits) where T : IndexCommit
+ {
+ // Note that commits.size() should normally be 2 (if not
+ // called by onInit above):
+ int size = commits.Count;
+ for (int i = 0; i < size - 1; i++)
+ {
+ commits[i].Delete();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/LogByteSizeMergePolicy.cs b/src/core/Index/LogByteSizeMergePolicy.cs
new file mode 100644
index 0000000..5d5c952
--- /dev/null
+++ b/src/core/Index/LogByteSizeMergePolicy.cs
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This is a <see cref="LogMergePolicy" /> that measures size of a
+ /// segment as the total byte size of the segment's files.
+ /// </summary>
+ public class LogByteSizeMergePolicy : LogMergePolicy
+ {
+
+ /// <seealso cref="MinMergeMB">
+ /// </seealso>
+ public const double DEFAULT_MIN_MERGE_MB = 1.6;
+
+ /// <summary>Default maximum segment size. A segment of this size</summary>
+ /// <seealso cref="MaxMergeMB">
+ /// </seealso>
+ public static readonly long DEFAULT_MAX_MERGE_MB = long.MaxValue;
+
+ public LogByteSizeMergePolicy(IndexWriter writer)
+ : base(writer)
+ {
+ minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024);
+ //mgarski - the line below causes an overflow in .NET, resulting in a negative number...
+ //maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024);
+ maxMergeSize = DEFAULT_MAX_MERGE_MB;
+ }
+ protected internal override long Size(SegmentInfo info)
+ {
+ return SizeBytes(info);
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+
+
+ /// <summary><p/>Gets or sets the largest segment (measured by total
+ /// byte size of the segment's files, in MB) that may be
+ /// merged with other segments. Small values (e.g., less
+ /// than 50 MB) are best for interactive indexing, as this
+ /// limits the length of pauses while indexing to a few
+ /// seconds. Larger values are best for batched indexing
+ /// and speedier searches.<p/>
+ ///
+ /// <p/>Note that <see cref="IndexWriter.MaxMergeDocs" /> is also
+ /// used to check whether a segment is too large for
+ /// merging (it's either or).<p/>
+ /// </summary>
+ public virtual double MaxMergeMB
+ {
+ get { return maxMergeSize/1024d/1024d; }
+ set
+ {
+ //mgarski: java gracefully overflows to Int64.MaxValue, .NET to MinValue...
+ maxMergeSize = (long) (value*1024*1024);
+ if (maxMergeSize < 0)
+ {
+ maxMergeSize = DEFAULT_MAX_MERGE_MB;
+ }
+ }
+ }
+
+ /// <summary>Gets or sets the minimum size for the lowest level segments.
+ /// Any segments below this size are considered to be on
+ /// the same level (even if they vary drastically in size)
+ /// and will be merged whenever there are mergeFactor of
+ /// them. This effectively truncates the "long tail" of
+ /// small segments that would otherwise be created into a
+ /// single level. If you set this too large, it could
+ /// greatly increase the merging cost during indexing (if
+ /// you flush many small segments).
+ /// </summary>
+ public virtual double MinMergeMB
+ {
+ get { return ((double) minMergeSize)/1024/1024; }
+ set { minMergeSize = (long) (value*1024*1024); }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/LogDocMergePolicy.cs b/src/core/Index/LogDocMergePolicy.cs
new file mode 100644
index 0000000..55ee407
--- /dev/null
+++ b/src/core/Index/LogDocMergePolicy.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This is a <see cref="LogMergePolicy" /> that measures size of a
+ /// segment as the number of documents (not taking deletions
+ /// into account).
+ /// </summary>
+
+ public class LogDocMergePolicy : LogMergePolicy
+ {
+
+ /// <seealso cref="MinMergeDocs">
+ /// </seealso>
+ public const int DEFAULT_MIN_MERGE_DOCS = 1000;
+
+ public LogDocMergePolicy(IndexWriter writer):base(writer)
+ {
+ minMergeSize = DEFAULT_MIN_MERGE_DOCS;
+
+ // maxMergeSize is never used by LogDocMergePolicy; set
+ // it to Long.MAX_VALUE to disable it
+ maxMergeSize = System.Int64.MaxValue;
+ }
+ protected internal override long Size(SegmentInfo info)
+ {
+ return SizeDocs(info);
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+
+ /// <summary>Gets or sets the minimum size for the lowest level segments.
+ /// Any segments below this size are considered to be on
+ /// the same level (even if they vary drastically in size)
+ /// and will be merged whenever there are mergeFactor of
+ /// them. This effectively truncates the "long tail" of
+ /// small segments that would otherwise be created into a
+ /// single level. If you set this too large, it could
+ /// greatly increase the merging cost during indexing (if
+ /// you flush many small segments).
+ /// </summary>
+ public virtual int MinMergeDocs
+ {
+ get { return (int) minMergeSize; }
+ set { minMergeSize = value; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/LogMergePolicy.cs b/src/core/Index/LogMergePolicy.cs
new file mode 100644
index 0000000..c087835
--- /dev/null
+++ b/src/core/Index/LogMergePolicy.cs
@@ -0,0 +1,580 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary><p/>This class implements a <see cref="MergePolicy" /> that tries
+ /// to merge segments into levels of exponentially
+ /// increasing size, where each level has fewer segments than
+ /// the value of the merge factor. Whenever extra segments
+ /// (beyond the merge factor upper bound) are encountered,
+ /// all segments within the level are merged. You can get or
+ /// set the merge factor using <see cref="MergeFactor" /> and
+ /// <see cref="MergeFactor" /> respectively.<p/>
+ ///
+ /// <p/>This class is abstract and requires a subclass to
+ /// define the <see cref="Size" /> method which specifies how a
+ /// segment's size is determined. <see cref="LogDocMergePolicy" />
+ /// is one subclass that measures size by document count in
+ /// the segment. <see cref="LogByteSizeMergePolicy" /> is another
+ /// subclass that measures size as the total byte size of the
+ /// file(s) for the segment.<p/>
+ /// </summary>
+
+ public abstract class LogMergePolicy : MergePolicy
+ {
+
+ /// <summary>Defines the allowed range of log(size) for each
+ /// level. A level is computed by taking the max segment
+ /// log size, minus LEVEL_LOG_SPAN, and finding all
+ /// segments falling within that range.
+ /// </summary>
+ public const double LEVEL_LOG_SPAN = 0.75;
+
+ /// <summary>Default merge factor, which is how many segments are
+ /// merged at a time
+ /// </summary>
+ public const int DEFAULT_MERGE_FACTOR = 10;
+
+ /// <summary>Default maximum segment size. A segment of this size</summary>
+ /// <seealso cref="MaxMergeDocs">
+ /// </seealso>
+ public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue;
+
+ /// <summary> Default noCFSRatio. If a merge's size is >= 10% of
+ /// the index, then we disable compound file for it.
+ /// See <see cref="NoCFSRatio"/>
+ /// </summary>
+ public static double DEFAULT_NO_CFS_RATIO = 0.1;
+
+ private int mergeFactor = DEFAULT_MERGE_FACTOR;
+
+ internal long minMergeSize;
+ internal long maxMergeSize;
+ internal int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+
+ protected double internalNoCFSRatio = DEFAULT_NO_CFS_RATIO;
+
+ /* TODO 3.0: change this default to true */
+ protected internal bool internalCalibrateSizeByDeletes = true;
+
+ private bool useCompoundFile = true;
+ private bool useCompoundDocStore = true;
+
+ protected LogMergePolicy(IndexWriter writer):base(writer)
+ {
+ }
+
+ protected internal virtual bool Verbose()
+ {
+ return writer != null && writer.Verbose;
+ }
+
+ public double NoCFSRatio
+ {
+ get { return internalNoCFSRatio; }
+ set
+ {
+ if (value < 0.0 || value > 1.0)
+ {
+ throw new ArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + value);
+ }
+ this.internalNoCFSRatio = value;
+ }
+ }
+
+ /* If a merged segment will be more than this percentage
+ * of the total size of the index, leave the segment as
+ * non-compound file even if compound file is enabled.
+ * Set to 1.0 to always use CFS regardless of merge
+ * size. */
+ private void Message(System.String message)
+ {
+ if (Verbose())
+ writer.Message("LMP: " + message);
+ }
+
+
+ /// <summary>Gets or sets how often segment indices are merged by
+ /// addDocument(). With smaller values, less RAM is used
+ /// while indexing, and searches on unoptimized indices are
+ /// faster, but indexing speed is slower. With larger
+ /// values, more RAM is used during indexing, and while
+ /// searches on unoptimized indices are slower, indexing is
+ /// faster. Thus larger values (&gt; 10) are best for batch
+ /// index creation, and smaller values (&lt; 10) for indices
+ /// that are interactively maintained.
+ /// </summary>
+ public virtual int MergeFactor
+ {
+ get { return mergeFactor; }
+ set
+ {
+ if (value < 2)
+ throw new System.ArgumentException("mergeFactor cannot be less than 2");
+ this.mergeFactor = value;
+ }
+ }
+
+ public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info)
+ {
+ return useCompoundFile;
+ }
+
+ /// <summary>Gets or sets whether compound file format should be used for
+ /// newly flushed and newly merged segments.
+ /// </summary>
+ public virtual void SetUseCompoundFile(bool useCompoundFile)
+ {
+ this.useCompoundFile = useCompoundFile;
+ }
+
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual bool GetUseCompoundFile()
+ {
+ return useCompoundFile;
+ }
+
+ // Javadoc inherited
+ public override bool UseCompoundDocStore(SegmentInfos infos)
+ {
+ return useCompoundDocStore;
+ }
+
+ /// <summary>Sets whether compound file format should be used for
+ /// newly flushed and newly merged doc store
+ /// segment files (term vectors and stored fields).
+ /// </summary>
+ public virtual void SetUseCompoundDocStore(bool useCompoundDocStore)
+ {
+ this.useCompoundDocStore = useCompoundDocStore;
+ }
+
+ /// <summary>Returns true if newly flushed and newly merge doc
+ /// store segment files (term vectors and stored fields)
+ /// </summary>
+ /// <seealso cref="SetUseCompoundDocStore ">
+ /// </seealso>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual bool GetUseCompoundDocStore()
+ {
+ return useCompoundDocStore;
+ }
+
+ /// <summary>Gets or sets whether the segment size should be calibrated by
+ /// the number of deletes when choosing segments for merge.
+ /// </summary>
+ public virtual bool CalibrateSizeByDeletes
+ {
+ set { this.internalCalibrateSizeByDeletes = value; }
+ get { return internalCalibrateSizeByDeletes; }
+ }
+
+ abstract protected internal long Size(SegmentInfo info);
+
+ protected internal virtual long SizeDocs(SegmentInfo info)
+ {
+ if (internalCalibrateSizeByDeletes)
+ {
+ int delCount = writer.NumDeletedDocs(info);
+ return (info.docCount - (long) delCount);
+ }
+ else
+ {
+ return info.docCount;
+ }
+ }
+
+ protected internal virtual long SizeBytes(SegmentInfo info)
+ {
+ long byteSize = info.SizeInBytes();
+ if (internalCalibrateSizeByDeletes)
+ {
+ int delCount = writer.NumDeletedDocs(info);
+ float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount));
+ return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio)));
+ }
+ else
+ {
+ return byteSize;
+ }
+ }
+
+ private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
+ {
+ int numSegments = infos.Count;
+ int numToOptimize = 0;
+ SegmentInfo optimizeInfo = null;
+ for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
+ {
+ SegmentInfo info = infos.Info(i);
+ if (segmentsToOptimize.Contains(info))
+ {
+ numToOptimize++;
+ optimizeInfo = info;
+ }
+ }
+
+ return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo));
+ }
+
+ /// <summary>Returns true if this single info is optimized (has no
+ /// pending norms or deletes, is in the same dir as the
+ /// writer, and matches the current compound file setting
+ /// </summary>
+ private bool IsOptimized(SegmentInfo info)
+ {
+ bool hasDeletions = writer.NumDeletedDocs(info) > 0;
+ return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory &&
+ (info.GetUseCompoundFile() == useCompoundFile || internalNoCFSRatio < 1.0);
+ }
+
+ /// <summary>Returns the merges necessary to optimize the index.
+ /// This merge policy defines "optimized" to mean only one
+ /// segment in the index, where that segment has no
+ /// deletions pending nor separate norms, and it is in
+ /// compound file format if the current useCompoundFile
+ /// setting is true. This method returns multiple merges
+ /// (mergeFactor at a time) so the <see cref="MergeScheduler" />
+ /// in use may make use of concurrency.
+ /// </summary>
+ public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
+ {
+ MergeSpecification spec;
+
+ System.Diagnostics.Debug.Assert(maxNumSegments > 0);
+
+ if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
+ {
+
+ // Find the newest (rightmost) segment that needs to
+ // be optimized (other segments may have been flushed
+ // since optimize started):
+ int last = infos.Count;
+ while (last > 0)
+ {
+ SegmentInfo info = infos.Info(--last);
+ if (segmentsToOptimize.Contains(info))
+ {
+ last++;
+ break;
+ }
+ }
+
+ if (last > 0)
+ {
+
+ spec = new MergeSpecification();
+
+ // First, enroll all "full" merges (size
+ // mergeFactor) to potentially be run concurrently:
+ while (last - maxNumSegments + 1 >= mergeFactor)
+ {
+ spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
+ last -= mergeFactor;
+ }
+
+ // Only if there are no full merges pending do we
+ // add a final partial (< mergeFactor segments) merge:
+ if (0 == spec.merges.Count)
+ {
+ if (maxNumSegments == 1)
+ {
+
+ // Since we must optimize down to 1 segment, the
+ // choice is simple:
+ if (last > 1 || !IsOptimized(infos.Info(0)))
+ spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
+ }
+ else if (last > maxNumSegments)
+ {
+
+ // Take care to pick a partial merge that is
+ // least cost, but does not make the index too
+ // lopsided. If we always just picked the
+ // partial tail then we could produce a highly
+ // lopsided index over time:
+
+ // We must merge this many segments to leave
+ // maxNumSegments in the index (from when
+ // optimize was first kicked off):
+ int finalMergeSize = last - maxNumSegments + 1;
+
+ // Consider all possible starting points:
+ long bestSize = 0;
+ int bestStart = 0;
+
+ for (int i = 0; i < last - finalMergeSize + 1; i++)
+ {
+ long sumSize = 0;
+ for (int j = 0; j < finalMergeSize; j++)
+ sumSize += Size(infos.Info(j + i));
+ if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
+ {
+ bestStart = i;
+ bestSize = sumSize;
+ }
+ }
+
+ spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
+ }
+ }
+ }
+ else
+ spec = null;
+ }
+ else
+ spec = null;
+
+ return spec;
+ }
+
+ /// <summary> Finds merges necessary to expunge all deletes from the
+ /// index. We simply merge adjacent segments that have
+ /// deletes, up to mergeFactor at a time.
+ /// </summary>
+ public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
+ {
+ int numSegments = segmentInfos.Count;
+
+ if (Verbose())
+ Message("findMergesToExpungeDeletes: " + numSegments + " segments");
+
+ MergeSpecification spec = new MergeSpecification();
+ int firstSegmentWithDeletions = - 1;
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo info = segmentInfos.Info(i);
+ int delCount = writer.NumDeletedDocs(info);
+ if (delCount > 0)
+ {
+ if (Verbose())
+ Message(" segment " + info.name + " has deletions");
+ if (firstSegmentWithDeletions == - 1)
+ firstSegmentWithDeletions = i;
+ else if (i - firstSegmentWithDeletions == mergeFactor)
+ {
+ // We've seen mergeFactor segments in a row with
+ // deletions, so force a merge now:
+ if (Verbose())
+ Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
+ spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
+ firstSegmentWithDeletions = i;
+ }
+ }
+ else if (firstSegmentWithDeletions != - 1)
+ {
+ // End of a sequence of segments with deletions, so,
+ // merge those past segments even if it's fewer than
+ // mergeFactor segments
+ if (Verbose())
+ Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
+ spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
+ firstSegmentWithDeletions = - 1;
+ }
+ }
+
+ if (firstSegmentWithDeletions != - 1)
+ {
+ if (Verbose())
+ Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
+ spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
+ }
+
+ return spec;
+ }
+
+ /// <summary>Checks if any merges are now necessary and returns a
+ /// <see cref="MergePolicy.MergeSpecification" /> if so. A merge
+ /// is necessary when there are more than <see cref="MergeFactor" />
+ /// segments at a given level. When
+ /// multiple levels have too many segments, this method
+ /// will return multiple merges, allowing the <see cref="MergeScheduler" />
+ /// to use concurrency.
+ /// </summary>
+ public override MergeSpecification FindMerges(SegmentInfos infos)
+ {
+
+ int numSegments = infos.Count;
+ if (Verbose())
+ Message("findMerges: " + numSegments + " segments");
+
+ // Compute levels, which is just log (base mergeFactor)
+ // of the size of each segment
+ float[] levels = new float[numSegments];
+ float norm = (float) System.Math.Log(mergeFactor);
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo info = infos.Info(i);
+ long size = Size(info);
+
+ // Floor tiny segments
+ if (size < 1)
+ size = 1;
+ levels[i] = (float) System.Math.Log(size) / norm;
+ }
+
+ float levelFloor;
+ if (minMergeSize <= 0)
+ levelFloor = (float) 0.0;
+ else
+ {
+ levelFloor = (float) (System.Math.Log(minMergeSize) / norm);
+ }
+
+ // Now, we quantize the log values into levels. The
+ // first level is any segment whose log size is within
+ // LEVEL_LOG_SPAN of the max size, or, who has such as
+ // segment "to the right". Then, we find the max of all
+ // other segments and use that to define the next level
+ // segment, etc.
+
+ MergeSpecification spec = null;
+
+ int start = 0;
+ while (start < numSegments)
+ {
+
+ // Find max level of all segments not already
+ // quantized.
+ float maxLevel = levels[start];
+ for (int i = 1 + start; i < numSegments; i++)
+ {
+ float level = levels[i];
+ if (level > maxLevel)
+ maxLevel = level;
+ }
+
+ // Now search backwards for the rightmost segment that
+ // falls into this level:
+ float levelBottom;
+ if (maxLevel < levelFloor)
+ // All remaining segments fall into the min level
+ levelBottom = - 1.0F;
+ else
+ {
+ levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
+
+ // Force a boundary at the level floor
+ if (levelBottom < levelFloor && maxLevel >= levelFloor)
+ levelBottom = levelFloor;
+ }
+
+ int upto = numSegments - 1;
+ while (upto >= start)
+ {
+ if (levels[upto] >= levelBottom)
+ {
+ break;
+ }
+ upto--;
+ }
+ if (Verbose())
+ Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
+
+ // Finally, record all merges that are viable at this level:
+ int end = start + mergeFactor;
+ while (end <= 1 + upto)
+ {
+ bool anyTooLarge = false;
+ for (int i = start; i < end; i++)
+ {
+ SegmentInfo info = infos.Info(i);
+ anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
+ }
+
+ if (!anyTooLarge)
+ {
+ if (spec == null)
+ spec = new MergeSpecification();
+ if (Verbose())
+ Message(" " + start + " to " + end + ": add this merge");
+ spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
+ }
+ else if (Verbose())
+ Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
+
+ start = end;
+ end = start + mergeFactor;
+ }
+
+ start = 1 + upto;
+ }
+
+ return spec;
+ }
+
+ protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge)
+ {
+ bool doCFS;
+ if (!useCompoundFile)
+ {
+ doCFS = false;
+ }
+ else if (internalNoCFSRatio == 1.0)
+ {
+ doCFS = true;
+ }
+ else
+ {
+ long totSize = 0;
+ foreach(SegmentInfo info in infos)
+ {
+ totSize += Size(info);
+ }
+ long mergeSize = 0;
+ foreach(SegmentInfo info in infosToMerge)
+ {
+ mergeSize += Size(info);
+ }
+
+ doCFS = mergeSize <= internalNoCFSRatio * totSize;
+ }
+
+ return new OneMerge(infosToMerge, doCFS);
+ }
+
+ /// <summary>
+ /// Gets or sets the largest segment (measured by document
+ /// count) that may be merged with other segments.
+ /// <p/>Determines the largest segment (measured by
+ /// document count) that may be merged with other segments.
+ /// Small values (e.g., less than 10,000) are best for
+ /// interactive indexing, as this limits the length of
+ /// pauses while indexing to a few seconds. Larger values
+ /// are best for batched indexing and speedier
+ /// searches.<p/>
+ ///
+ /// <p/>The default value is <see cref="int.MaxValue" />.<p/>
+ ///
+ /// <p/>The default merge policy (<see cref="LogByteSizeMergePolicy" />)
+ /// also allows you to set this
+ /// limit by net size (in MB) of the segment, using
+ /// <see cref="LogByteSizeMergePolicy.MaxMergeMB" />.<p/>
+ /// </summary>
+ public virtual int MaxMergeDocs
+ {
+ set { this.maxMergeDocs = value; }
+ get { return maxMergeDocs; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/MergeDocIDRemapper.cs b/src/core/Index/MergeDocIDRemapper.cs
new file mode 100644
index 0000000..2771b53
--- /dev/null
+++ b/src/core/Index/MergeDocIDRemapper.cs
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Remaps docIDs after a merge has completed, where the
+ /// merged segments had at least one deletion. This is used
+ /// to renumber the buffered deletes in IndexWriter when a
+ /// merge of segments with deletions commits.
+ /// </summary>
+
+ sealed class MergeDocIDRemapper
+ {
+ internal int[] starts; // used for binary search of mapped docID
+ internal int[] newStarts; // starts, minus the deletes
+ internal int[][] docMaps; // maps docIDs in the merged set
+ internal int minDocID; // minimum docID that needs renumbering
+ internal int maxDocID; // 1+ the max docID that needs renumbering
+ internal int docShift; // total # deleted docs that were compacted by this merge
+
+ public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
+ {
+ this.docMaps = docMaps;
+ SegmentInfo firstSegment = merge.segments.Info(0);
+ int i = 0;
+ while (true)
+ {
+ SegmentInfo info = infos.Info(i);
+ if (info.Equals(firstSegment))
+ break;
+ minDocID += info.docCount;
+ i++;
+ }
+
+ int numDocs = 0;
+ for (int j = 0; j < docMaps.Length; i++, j++)
+ {
+ numDocs += infos.Info(i).docCount;
+ System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
+ }
+ maxDocID = minDocID + numDocs;
+
+ starts = new int[docMaps.Length];
+ newStarts = new int[docMaps.Length];
+
+ starts[0] = minDocID;
+ newStarts[0] = minDocID;
+ for (i = 1; i < docMaps.Length; i++)
+ {
+ int lastDocCount = merge.segments.Info(i - 1).docCount;
+ starts[i] = starts[i - 1] + lastDocCount;
+ newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
+ }
+ docShift = numDocs - mergedDocCount;
+
+ // There are rare cases when docShift is 0. It happens
+ // if you try to delete a docID that's out of bounds,
+ // because the SegmentReader still allocates deletedDocs
+ // and pretends it has deletions ... so we can't make
+ // this assert here
+ // assert docShift > 0;
+
+ // Make sure it all adds up:
+ System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
+ }
+
+ public int Remap(int oldDocID)
+ {
+ if (oldDocID < minDocID)
+ // Unaffected by merge
+ return oldDocID;
+ else if (oldDocID >= maxDocID)
+ // This doc was "after" the merge, so simple shift
+ return oldDocID - docShift;
+ else
+ {
+ // Binary search to locate this document & find its new docID
+ int lo = 0; // search starts array
+ int hi = docMaps.Length - 1; // for first element less
+
+ while (hi >= lo)
+ {
+ int mid = Number.URShift((lo + hi), 1);
+ int midValue = starts[mid];
+ if (oldDocID < midValue)
+ hi = mid - 1;
+ else if (oldDocID > midValue)
+ lo = mid + 1;
+ else
+ {
+ // found a match
+ while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue)
+ {
+ mid++; // scan to last match
+ }
+ if (docMaps[mid] != null)
+ return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]];
+ else
+ return newStarts[mid] + oldDocID - starts[mid];
+ }
+ }
+ if (docMaps[hi] != null)
+ return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]];
+ else
+ return newStarts[hi] + oldDocID - starts[hi];
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/MergePolicy.cs b/src/core/Index/MergePolicy.cs
new file mode 100644
index 0000000..59b4817
--- /dev/null
+++ b/src/core/Index/MergePolicy.cs
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> <p/>Expert: a MergePolicy determines the sequence of
+ /// primitive merge operations to be used for overall merge
+ /// and optimize operations.<p/>
+ ///
+ /// <p/>Whenever the segments in an index have been altered by
+ /// <see cref="IndexWriter" />, either the addition of a newly
+ /// flushed segment, addition of many segments from
+ /// addIndexes* calls, or a previous merge that may now need
+ /// to cascade, <see cref="IndexWriter" /> invokes <see cref="FindMerges" />
+ /// to give the MergePolicy a chance to pick
+ /// merges that are now required. This method returns a
+ /// <see cref="MergeSpecification" /> instance describing the set of
+ /// merges that should be done, or null if no merges are
+ /// necessary. When IndexWriter.optimize is called, it calls
+ /// <see cref="FindMergesForOptimize" /> and the MergePolicy should
+ /// then return the necessary merges.<p/>
+ ///
+ /// <p/>Note that the policy can return more than one merge at
+ /// a time. In this case, if the writer is using <see cref="SerialMergeScheduler" />
+ ///, the merges will be run
+ /// sequentially but if it is using <see cref="ConcurrentMergeScheduler" />
+ /// they will be run concurrently.<p/>
+ ///
+ /// <p/>The default MergePolicy is <see cref="LogByteSizeMergePolicy" />
+ ///.<p/>
+ ///
+ /// <p/><b>NOTE:</b> This API is new and still experimental
+ /// (subject to change suddenly in the next release)<p/>
+ ///
+ /// <p/><b>NOTE</b>: This class typically requires access to
+ /// package-private APIs (e.g. <c>SegmentInfos</c>) to do its job;
+ /// if you implement your own MergePolicy, you'll need to put
+ /// it in package Lucene.Net.Index in order to use
+ /// these APIs.
+ /// </summary>
+
+ public abstract class MergePolicy : IDisposable
+ {
+
+ /// <summary>OneMerge provides the information necessary to perform
+ /// an individual primitive merge operation, resulting in
+ /// a single new segment. The merge spec includes the
+ /// subset of segments to be merged as well as whether the
+ /// new segment should use the compound file format.
+ /// </summary>
+
+ public class OneMerge
+ {
+
+ internal SegmentInfo info; // used by IndexWriter
+ internal bool mergeDocStores; // used by IndexWriter
+ internal bool optimize; // used by IndexWriter
+ internal bool registerDone; // used by IndexWriter
+ internal long mergeGen; // used by IndexWriter
+ internal bool isExternal; // used by IndexWriter
+ internal int maxNumSegmentsOptimize; // used by IndexWriter
+ internal SegmentReader[] readers; // used by IndexWriter
+ internal SegmentReader[] readersClone; // used by IndexWriter
+ internal SegmentInfos segments;
+ internal bool useCompoundFile;
+ internal bool aborted;
+ internal System.Exception error;
+
+ public OneMerge(SegmentInfos segments, bool useCompoundFile)
+ {
+ if (0 == segments.Count)
+ throw new ArgumentException("segments must include at least one segment", "segments");
+ this.segments = segments;
+ this.useCompoundFile = useCompoundFile;
+ }
+
+ /// <summary>Record that an exception occurred while executing
+ /// this merge
+ /// </summary>
+ internal virtual void SetException(System.Exception error)
+ {
+ lock (this)
+ {
+ this.error = error;
+ }
+ }
+
+ /// <summary>Retrieve previous exception set by <see cref="SetException" />
+ ///.
+ /// </summary>
+ internal virtual System.Exception GetException()
+ {
+ lock (this)
+ {
+ return error;
+ }
+ }
+
+ /// <summary>Mark this merge as aborted. If this is called
+ /// before the merge is committed then the merge will
+ /// not be committed.
+ /// </summary>
+ internal virtual void Abort()
+ {
+ lock (this)
+ {
+ aborted = true;
+ }
+ }
+
+ /// <summary>Returns true if this merge was aborted. </summary>
+ internal virtual bool IsAborted()
+ {
+ lock (this)
+ {
+ return aborted;
+ }
+ }
+
+ internal virtual void CheckAborted(Directory dir)
+ {
+ lock (this)
+ {
+ if (aborted)
+ throw new MergeAbortedException("merge is aborted: " + SegString(dir));
+ }
+ }
+
+ internal virtual String SegString(Directory dir)
+ {
+ var b = new System.Text.StringBuilder();
+ int numSegments = segments.Count;
+ for (int i = 0; i < numSegments; i++)
+ {
+ if (i > 0)
+ b.Append(' ');
+ b.Append(segments.Info(i).SegString(dir));
+ }
+ if (info != null)
+ b.Append(" into ").Append(info.name);
+ if (optimize)
+ b.Append(" [optimize]");
+ if (mergeDocStores)
+ {
+ b.Append(" [mergeDocStores]");
+ }
+ return b.ToString();
+ }
+
+ public SegmentInfos segments_ForNUnit
+ {
+ get { return segments; }
+ }
+ }
+
+ /// <summary> A MergeSpecification instance provides the information
+ /// necessary to perform multiple merges. It simply
+ /// contains a list of <see cref="OneMerge" /> instances.
+ /// </summary>
+
+ public class MergeSpecification
+ {
+
+ /// <summary> The subset of segments to be included in the primitive merge.</summary>
+
+ public IList<OneMerge> merges = new List<OneMerge>();
+
+ public virtual void Add(OneMerge merge)
+ {
+ merges.Add(merge);
+ }
+
+ public virtual String SegString(Directory dir)
+ {
+ var b = new System.Text.StringBuilder();
+ b.Append("MergeSpec:\n");
+ int count = merges.Count;
+ for (int i = 0; i < count; i++)
+ b.Append(" ").Append(1 + i).Append(": ").Append(merges[i].SegString(dir));
+ return b.ToString();
+ }
+ }
+
+ /// <summary>Exception thrown if there are any problems while
+ /// executing a merge.
+ /// </summary>
+ [Serializable]
+ public class MergeException:System.SystemException
+ {
+ private readonly Directory dir;
+
+ public MergeException(System.String message, Directory dir):base(message)
+ {
+ this.dir = dir;
+ }
+
+ public MergeException(System.Exception exc, Directory dir):base(null, exc)
+ {
+ this.dir = dir;
+ }
+
+ /// <summary>Returns the <see cref="Directory" /> of the index that hit
+ /// the exception.
+ /// </summary>
+ public virtual Directory Directory
+ {
+ get { return dir; }
+ }
+ }
+
+ [Serializable]
+ public class MergeAbortedException:System.IO.IOException
+ {
+ public MergeAbortedException():base("merge is aborted")
+ {
+ }
+ public MergeAbortedException(System.String message):base(message)
+ {
+ }
+ }
+
+ protected internal IndexWriter writer;
+
+ protected MergePolicy(IndexWriter writer)
+ {
+ this.writer = writer;
+ }
+
+ /// <summary> Determine what set of merge operations are now necessary on the index.
+ /// <see cref="IndexWriter" /> calls this whenever there is a change to the segments.
+ /// This call is always synchronized on the <see cref="IndexWriter" /> instance so
+ /// only one thread at a time will call this method.
+ ///
+ /// </summary>
+ /// <param name="segmentInfos">the total set of segments in the index
+ /// </param>
+ public abstract MergeSpecification FindMerges(SegmentInfos segmentInfos);
+
+ /// <summary> Determine what set of merge operations is necessary in order to optimize
+ /// the index. <see cref="IndexWriter" /> calls this when its
+ /// <see cref="IndexWriter.Optimize()" /> method is called. This call is always
+ /// synchronized on the <see cref="IndexWriter" /> instance so only one thread at a
+ /// time will call this method.
+ ///
+ /// </summary>
+ /// <param name="segmentInfos">the total set of segments in the index
+ /// </param>
+ /// <param name="maxSegmentCount">requested maximum number of segments in the index (currently this
+ /// is always 1)
+ /// </param>
+ /// <param name="segmentsToOptimize">contains the specific SegmentInfo instances that must be merged
+ /// away. This may be a subset of all SegmentInfos.
+ /// </param>
+ public abstract MergeSpecification FindMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount,
+ ISet<SegmentInfo> segmentsToOptimize);
+
+ /// <summary> Determine what set of merge operations is necessary in order to expunge all
+ /// deletes from the index.
+ ///
+ /// </summary>
+ /// <param name="segmentInfos">the total set of segments in the index
+ /// </param>
+ public abstract MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos);
+
+ /// <summary> Release all resources for the policy.</summary>
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary> Release all resources for the policy.</summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+
+ /// <summary> Returns true if a newly flushed (not from merge)
+ /// segment should use the compound file format.
+ /// </summary>
+ public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
+
+ /// <summary> Returns true if the doc store files should use the
+ /// compound file format.
+ /// </summary>
+ public abstract bool UseCompoundDocStore(SegmentInfos segments);
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/MergeScheduler.cs b/src/core/Index/MergeScheduler.cs
new file mode 100644
index 0000000..7fbf83d
--- /dev/null
+++ b/src/core/Index/MergeScheduler.cs
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary><p/>Expert: <see cref="IndexWriter" /> uses an instance
+ /// implementing this interface to execute the merges
+ /// selected by a <see cref="MergePolicy" />. The default
+ /// MergeScheduler is <see cref="ConcurrentMergeScheduler" />.<p/>
+ ///
+ /// <p/><b>NOTE:</b> This API is new and still experimental
+ /// (subject to change suddenly in the next release)<p/>
+ ///
+ /// <p/><b>NOTE</b>: This class typically requires access to
+ /// package-private APIs (eg, SegmentInfos) to do its job;
+ /// if you implement your own MergePolicy, you'll need to put
+ /// it in package Lucene.Net.Index in order to use
+ /// these APIs.
+ /// </summary>
+
+ public abstract class MergeScheduler : IDisposable
+ {
+
+ /// <summary>Run the merges provided by <see cref="IndexWriter.GetNextMerge()" />. </summary>
+ public abstract void Merge(IndexWriter writer);
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ /// <summary>Close this MergeScheduler. </summary>
+ protected abstract void Dispose(bool disposing);
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/MultiLevelSkipListReader.cs b/src/core/Index/MultiLevelSkipListReader.cs
new file mode 100644
index 0000000..28b4fd5
--- /dev/null
+++ b/src/core/Index/MultiLevelSkipListReader.cs
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This abstract class reads skip lists with multiple levels.
+ ///
+ /// See <see cref="MultiLevelSkipListWriter" /> for the information about the encoding
+ /// of the multi level skip lists.
+ ///
+ /// Subclasses must implement the abstract method <see cref="ReadSkipData(int, IndexInput)" />
+ /// which defines the actual format of the skip data.
+ /// </summary>
+ abstract class MultiLevelSkipListReader : IDisposable
+ {
+ // the maximum number of skip levels possible for this index
+ private readonly int maxNumberOfSkipLevels;
+
+ // number of levels in this skip list
+ private int numberOfSkipLevels;
+
+ // Expert: defines the number of top skip levels to buffer in memory.
+ // Reducing this number results in less memory usage, but possibly
+ // slower performance due to more random I/Os.
+ // Please notice that the space each level occupies is limited by
+ // the skipInterval. The top level can not contain more than
+ // skipLevel entries, the second top level can not contain more
+ // than skipLevel^2 entries and so forth.
+ private const int numberOfLevelsToBuffer = 1;
+
+ private int docCount;
+ private bool haveSkipped;
+
+ private bool isDisposed;
+
+ private readonly IndexInput[] skipStream; // skipStream for each level
+ private readonly long[] skipPointer; // the start pointer of each skip level
+ private readonly int[] skipInterval; // skipInterval of each level
+ private readonly int[] numSkipped; // number of docs skipped per level
+
+ private readonly int[] skipDoc; // doc id of current skip entry per level
+ private int lastDoc; // doc id of last read skip entry with docId <= target
+ private readonly long[] childPointer; // child pointer of current skip entry per level
+ private long lastChildPointer; // childPointer of last read skip entry with docId <= target
+
+ private readonly bool inputIsBuffered;
+
+ protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval)
+ {
+ this.skipStream = new IndexInput[maxSkipLevels];
+ this.skipPointer = new long[maxSkipLevels];
+ this.childPointer = new long[maxSkipLevels];
+ this.numSkipped = new int[maxSkipLevels];
+ this.maxNumberOfSkipLevels = maxSkipLevels;
+ this.skipInterval = new int[maxSkipLevels];
+ this.skipStream[0] = skipStream;
+ this.inputIsBuffered = (skipStream is BufferedIndexInput);
+ this.skipInterval[0] = skipInterval;
+ for (int i = 1; i < maxSkipLevels; i++)
+ {
+ // cache skip intervals
+ this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
+ }
+ skipDoc = new int[maxSkipLevels];
+ }
+
+
+ /// <summary>Returns the id of the doc to which the last call of <see cref="SkipTo(int)" />
+ /// has skipped.
+ /// </summary>
+ internal virtual int GetDoc()
+ {
+ return lastDoc;
+ }
+
+
+ /// <summary>Skips entries to the first beyond the current whose document number is
+ /// greater than or equal to <i>target</i>. Returns the current doc count.
+ /// </summary>
+ internal virtual int SkipTo(int target)
+ {
+ if (!haveSkipped)
+ {
+ // first time, load skip levels
+ LoadSkipLevels();
+ haveSkipped = true;
+ }
+
+ // walk up the levels until highest level is found that has a skip
+ // for this target
+ int level = 0;
+ while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1])
+ {
+ level++;
+ }
+
+ while (level >= 0)
+ {
+ if (target > skipDoc[level])
+ {
+ if (!LoadNextSkip(level))
+ {
+ continue;
+ }
+ }
+ else
+ {
+ // no more skips on this level, go down one level
+ if (level > 0 && lastChildPointer > skipStream[level - 1].FilePointer)
+ {
+ SeekChild(level - 1);
+ }
+ level--;
+ }
+ }
+
+ return numSkipped[0] - skipInterval[0] - 1;
+ }
+
+ private bool LoadNextSkip(int level)
+ {
+ // we have to skip, the target document is greater than the current
+ // skip list entry
+ SetLastSkipData(level);
+
+ numSkipped[level] += skipInterval[level];
+
+ if (numSkipped[level] > docCount)
+ {
+ // this skip list is exhausted
+ skipDoc[level] = System.Int32.MaxValue;
+ if (numberOfSkipLevels > level)
+ numberOfSkipLevels = level;
+ return false;
+ }
+
+ // read next skip entry
+ skipDoc[level] += ReadSkipData(level, skipStream[level]);
+
+ if (level != 0)
+ {
+ // read the child pointer if we are not on the leaf level
+ childPointer[level] = skipStream[level].ReadVLong() + skipPointer[level - 1];
+ }
+
+ return true;
+ }
+
+ /// <summary>Seeks the skip entry on the given level </summary>
+ protected internal virtual void SeekChild(int level)
+ {
+ skipStream[level].Seek(lastChildPointer);
+ numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
+ skipDoc[level] = lastDoc;
+ if (level > 0)
+ {
+ childPointer[level] = skipStream[level].ReadVLong() + skipPointer[level - 1];
+ }
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ for (int i = 1; i < skipStream.Length; i++)
+ {
+ if (skipStream[i] != null)
+ {
+ skipStream[i].Close();
+ }
+ }
+ }
+
+ isDisposed = true;
+ }
+
+ /// <summary>initializes the reader </summary>
+ internal virtual void Init(long skipPointer, int df)
+ {
+ this.skipPointer[0] = skipPointer;
+ this.docCount = df;
+ System.Array.Clear(skipDoc, 0, skipDoc.Length);
+ System.Array.Clear(numSkipped, 0, numSkipped.Length);
+ System.Array.Clear(childPointer, 0, childPointer.Length);
+
+ haveSkipped = false;
+ for (int i = 1; i < numberOfSkipLevels; i++)
+ {
+ skipStream[i] = null;
+ }
+ }
+
+ /// <summary>Loads the skip levels </summary>
+ private void LoadSkipLevels()
+ {
+ numberOfSkipLevels = docCount == 0?0:(int) System.Math.Floor(System.Math.Log(docCount) / System.Math.Log(skipInterval[0]));
+ if (numberOfSkipLevels > maxNumberOfSkipLevels)
+ {
+ numberOfSkipLevels = maxNumberOfSkipLevels;
+ }
+
+ skipStream[0].Seek(skipPointer[0]);
+
+ int toBuffer = numberOfLevelsToBuffer;
+
+ for (int i = numberOfSkipLevels - 1; i > 0; i--)
+ {
+ // the length of the current level
+ long length = skipStream[0].ReadVLong();
+
+ // the start pointer of the current level
+ skipPointer[i] = skipStream[0].FilePointer;
+ if (toBuffer > 0)
+ {
+ // buffer this level
+ skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
+ toBuffer--;
+ }
+ else
+ {
+ // clone this stream, it is already at the start of the current level
+ skipStream[i] = (IndexInput) skipStream[0].Clone();
+ if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE)
+ {
+ ((BufferedIndexInput) skipStream[i]).SetBufferSize((int) length);
+ }
+
+ // move base stream beyond the current level
+ skipStream[0].Seek(skipStream[0].FilePointer + length);
+ }
+ }
+
+ // use base stream for the lowest level
+ skipPointer[0] = skipStream[0].FilePointer;
+ }
+
+ /// <summary> Subclasses must implement the actual skip data encoding in this method.
+ ///
+ /// </summary>
+ /// <param name="level">the level skip data shall be read from
+ /// </param>
+ /// <param name="skipStream">the skip stream to read from
+ /// </param>
+ protected internal abstract int ReadSkipData(int level, IndexInput skipStream);
+
+ /// <summary>Copies the values of the last read skip entry on this level </summary>
+ protected internal virtual void SetLastSkipData(int level)
+ {
+ lastDoc = skipDoc[level];
+ lastChildPointer = childPointer[level];
+ }
+
+
+ /// <summary>used to buffer the top skip levels </summary>
+ private sealed class SkipBuffer : IndexInput
+ {
+ private byte[] data;
+ private readonly long pointer;
+ private int pos;
+
+ private bool isDisposed;
+
+ internal SkipBuffer(IndexInput input, int length)
+ {
+ data = new byte[length];
+ pointer = input.FilePointer;
+ input.ReadBytes(data, 0, length);
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+ if (disposing)
+ {
+ data = null;
+ }
+
+ isDisposed = true;
+ }
+
+ public override long FilePointer
+ {
+ get { return pointer + pos; }
+ }
+
+ public override long Length()
+ {
+ return data.Length;
+ }
+
+ public override byte ReadByte()
+ {
+ return data[pos++];
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ Array.Copy(data, pos, b, offset, len);
+ pos += len;
+ }
+
+ public override void Seek(long pos)
+ {
+ this.pos = (int) (pos - pointer);
+ }
+
+ override public System.Object Clone()
+ {
+ System.Diagnostics.Debug.Fail("Port issue:", "Lets see if we need this FilterIndexReader.Clone()"); // {{Aroush-2.9}}
+ return null;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/MultiLevelSkipListWriter.cs b/src/core/Index/MultiLevelSkipListWriter.cs
new file mode 100644
index 0000000..00543f2
--- /dev/null
+++ b/src/core/Index/MultiLevelSkipListWriter.cs
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This abstract class writes skip lists with multiple levels.
+ ///
+ /// Example for skipInterval = 3:
+ /// c (skip level 2)
+ /// c c c (skip level 1)
+ /// x x x x x x x x x x (skip level 0)
+ /// d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
+ /// 3 6 9 12 15 18 21 24 27 30 (df)
+ ///
+ /// d - document
+ /// x - skip data
+ /// c - skip data with child pointer
+ ///
+ /// Skip level i contains every skipInterval-th entry from skip level i-1.
+ /// Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
+ ///
+ /// Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1.
+ /// This guarantess a logarithmic amount of skips to find the target document.
+ ///
+ /// While this class takes care of writing the different skip levels,
+ /// subclasses must define the actual format of the skip data.
+ ///
+ /// </summary>
+ abstract class MultiLevelSkipListWriter
+ {
+ // number of levels in this skip list
+ private int numberOfSkipLevels;
+
+ // the skip interval in the list with level = 0
+ private int skipInterval;
+
+ // for every skip level a different buffer is used
+ private RAMOutputStream[] skipBuffer;
+
+ protected internal MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df)
+ {
+ this.skipInterval = skipInterval;
+
+ // calculate the maximum number of skip levels for this document frequency
+ numberOfSkipLevels = df == 0?0:(int) System.Math.Floor(System.Math.Log(df) / System.Math.Log(skipInterval));
+
+ // make sure it does not exceed maxSkipLevels
+ if (numberOfSkipLevels > maxSkipLevels)
+ {
+ numberOfSkipLevels = maxSkipLevels;
+ }
+ }
+
+ protected internal virtual void Init()
+ {
+ skipBuffer = new RAMOutputStream[numberOfSkipLevels];
+ for (int i = 0; i < numberOfSkipLevels; i++)
+ {
+ skipBuffer[i] = new RAMOutputStream();
+ }
+ }
+
+ protected internal virtual void ResetSkip()
+ {
+ // creates new buffers or empties the existing ones
+ if (skipBuffer == null)
+ {
+ Init();
+ }
+ else
+ {
+ for (int i = 0; i < skipBuffer.Length; i++)
+ {
+ skipBuffer[i].Reset();
+ }
+ }
+ }
+
+ /// <summary> Subclasses must implement the actual skip data encoding in this method.
+ ///
+ /// </summary>
+ /// <param name="level">the level skip data shall be writting for
+ /// </param>
+ /// <param name="skipBuffer">the skip buffer to write to
+ /// </param>
+ protected internal abstract void WriteSkipData(int level, IndexOutput skipBuffer);
+
+ /// <summary> Writes the current skip data to the buffers. The current document frequency determines
+ /// the max level is skip data is to be written to.
+ ///
+ /// </summary>
+ /// <param name="df">the current document frequency
+ /// </param>
+ /// <throws> IOException </throws>
+ internal virtual void BufferSkip(int df)
+ {
+ int numLevels;
+
+ // determine max level
+ for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval)
+ {
+ numLevels++;
+ }
+
+ long childPointer = 0;
+
+ for (int level = 0; level < numLevels; level++)
+ {
+ WriteSkipData(level, skipBuffer[level]);
+
+ long newChildPointer = skipBuffer[level].FilePointer;
+
+ if (level != 0)
+ {
+ // store child pointers for all levels except the lowest
+ skipBuffer[level].WriteVLong(childPointer);
+ }
+
+ //remember the childPointer for the next level
+ childPointer = newChildPointer;
+ }
+ }
+
+ /// <summary> Writes the buffered skip lists to the given output.
+ ///
+ /// </summary>
+ /// <param name="output">the IndexOutput the skip lists shall be written to
+ /// </param>
+ /// <returns> the pointer the skip list starts
+ /// </returns>
+ internal virtual long WriteSkip(IndexOutput output)
+ {
+ long skipPointer = output.FilePointer;
+ if (skipBuffer == null || skipBuffer.Length == 0)
+ return skipPointer;
+
+ for (int level = numberOfSkipLevels - 1; level > 0; level--)
+ {
+ long length = skipBuffer[level].FilePointer;
+ if (length > 0)
+ {
+ output.WriteVLong(length);
+ skipBuffer[level].WriteTo(output);
+ }
+ }
+ skipBuffer[0].WriteTo(output);
+
+ return skipPointer;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/MultiReader.cs b/src/core/Index/MultiReader.cs
new file mode 100644
index 0000000..a441cb7
--- /dev/null
+++ b/src/core/Index/MultiReader.cs
@@ -0,0 +1,494 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using MultiTermDocs = Lucene.Net.Index.DirectoryReader.MultiTermDocs;
+using MultiTermEnum = Lucene.Net.Index.DirectoryReader.MultiTermEnum;
+using MultiTermPositions = Lucene.Net.Index.DirectoryReader.MultiTermPositions;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>An IndexReader which reads multiple indexes, appending
+ /// their content.
+ /// </summary>
+ public class MultiReader:IndexReader, System.ICloneable
+ {
+ protected internal IndexReader[] subReaders;
+ private int[] starts; // 1st docno for each segment
+ private bool[] decrefOnClose; // remember which subreaders to decRef on close
+ private System.Collections.Generic.IDictionary<string, byte[]> normsCache = new HashMap<string,byte[]>();
+ private int maxDoc = 0;
+ private int numDocs = - 1;
+ private bool hasDeletions = false;
+
+ /// <summary> <p/>Construct a MultiReader aggregating the named set of (sub)readers.
+ /// Directory locking for delete, undeleteAll, and setNorm operations is
+ /// left to the subreaders. <p/>
+ /// <p/>Note that all subreaders are closed if this Multireader is closed.<p/>
+ /// </summary>
+ /// <param name="subReaders">set of (sub)readers
+ /// </param>
+ /// <throws> IOException </throws>
+ public MultiReader(params IndexReader[] subReaders)
+ {
+ Initialize(subReaders, true);
+ }
+
+ /// <summary> <p/>Construct a MultiReader aggregating the named set of (sub)readers.
+ /// Directory locking for delete, undeleteAll, and setNorm operations is
+ /// left to the subreaders. <p/>
+ /// </summary>
+ /// <param name="closeSubReaders">indicates whether the subreaders should be closed
+ /// when this MultiReader is closed
+ /// </param>
+ /// <param name="subReaders">set of (sub)readers
+ /// </param>
+ /// <throws> IOException </throws>
+ public MultiReader(IndexReader[] subReaders, bool closeSubReaders)
+ {
+ Initialize(subReaders, closeSubReaders);
+ }
+
+ private void Initialize(IndexReader[] subReaders, bool closeSubReaders)
+ {
+ // Deep copy
+ this.subReaders = subReaders.ToArray();
+ starts = new int[subReaders.Length + 1]; // build starts array
+ decrefOnClose = new bool[subReaders.Length];
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ starts[i] = maxDoc;
+ maxDoc += subReaders[i].MaxDoc; // compute maxDocs
+
+ if (!closeSubReaders)
+ {
+ subReaders[i].IncRef();
+ decrefOnClose[i] = true;
+ }
+ else
+ {
+ decrefOnClose[i] = false;
+ }
+
+ if (subReaders[i].HasDeletions)
+ hasDeletions = true;
+ }
+ starts[subReaders.Length] = maxDoc;
+ }
+
+ /// <summary> Tries to reopen the subreaders.
+ /// <br/>
+ /// If one or more subreaders could be re-opened (i. e. subReader.reopen()
+ /// returned a new instance != subReader), then a new MultiReader instance
+ /// is returned, otherwise this instance is returned.
+ /// <p/>
+ /// A re-opened instance might share one or more subreaders with the old
+ /// instance. Index modification operations result in undefined behavior
+ /// when performed before the old instance is closed.
+ /// (see <see cref="IndexReader.Reopen()" />).
+ /// <p/>
+ /// If subreaders are shared, then the reference count of those
+ /// readers is increased to ensure that the subreaders remain open
+ /// until the last referring reader is closed.
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public override IndexReader Reopen()
+ {
+ lock (this)
+ {
+ return DoReopen(false);
+ }
+ }
+
+ /// <summary> Clones the subreaders.
+ /// (see <see cref="IndexReader.Clone()" />).
+ /// <br/>
+ /// <p/>
+ /// If subreaders are shared, then the reference count of those
+ /// readers is increased to ensure that the subreaders remain open
+ /// until the last referring reader is closed.
+ /// </summary>
+ public override System.Object Clone()
+ {
+ try
+ {
+ return DoReopen(true);
+ }
+ catch (System.Exception ex)
+ {
+ throw new System.SystemException(ex.Message, ex);
+ }
+ }
+
+ /// <summary> If clone is true then we clone each of the subreaders</summary>
+ /// <param name="doClone">
+ /// </param>
+ /// <returns> New IndexReader, or same one (this) if
+ /// reopen/clone is not necessary
+ /// </returns>
+ /// <throws> CorruptIndexException </throws>
+ /// <throws> IOException </throws>
+ protected internal virtual IndexReader DoReopen(bool doClone)
+ {
+ EnsureOpen();
+
+ bool reopened = false;
+ IndexReader[] newSubReaders = new IndexReader[subReaders.Length];
+
+ bool success = false;
+ try
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ if (doClone)
+ newSubReaders[i] = (IndexReader) subReaders[i].Clone();
+ else
+ newSubReaders[i] = subReaders[i].Reopen();
+ // if at least one of the subreaders was updated we remember that
+ // and return a new MultiReader
+ if (newSubReaders[i] != subReaders[i])
+ {
+ reopened = true;
+ }
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success && reopened)
+ {
+ for (int i = 0; i < newSubReaders.Length; i++)
+ {
+ if (newSubReaders[i] != subReaders[i])
+ {
+ try
+ {
+ newSubReaders[i].Close();
+ }
+ catch (System.IO.IOException)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+
+ if (reopened)
+ {
+ bool[] newDecrefOnClose = new bool[subReaders.Length];
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ if (newSubReaders[i] == subReaders[i])
+ {
+ newSubReaders[i].IncRef();
+ newDecrefOnClose[i] = true;
+ }
+ }
+ MultiReader mr = new MultiReader(newSubReaders);
+ mr.decrefOnClose = newDecrefOnClose;
+ return mr;
+ }
+ else
+ {
+ return this;
+ }
+ }
+
+ public override ITermFreqVector[] GetTermFreqVectors(int n)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment
+ }
+
+ public override ITermFreqVector GetTermFreqVector(int n, System.String field)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVector(n - starts[i], field);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], field, mapper);
+ }
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], mapper);
+ }
+
+ public override bool IsOptimized()
+ {
+ return false;
+ }
+
+ public override int NumDocs()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ // NOTE: multiple threads may wind up init'ing
+ // numDocs... but that's harmless
+ if (numDocs == - 1)
+ {
+ // check cache
+ int n = 0; // cache miss--recompute
+ for (int i = 0; i < subReaders.Length; i++)
+ n += subReaders[i].NumDocs(); // sum from readers
+ numDocs = n;
+ }
+ return numDocs;
+ }
+
+ public override int MaxDoc
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return maxDoc;
+ }
+ }
+
+ // inherit javadoc
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].Document(n - starts[i], fieldSelector); // dispatch to segment reader
+ }
+
+ public override bool IsDeleted(int n)
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader
+ }
+
+ public override bool HasDeletions
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return hasDeletions;
+ }
+ }
+
+ protected internal override void DoDelete(int n)
+ {
+ numDocs = - 1; // invalidate cache
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].DeleteDocument(n - starts[i]); // dispatch to segment reader
+ hasDeletions = true;
+ }
+
+ protected internal override void DoUndeleteAll()
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].UndeleteAll();
+
+ hasDeletions = false;
+ numDocs = - 1; // invalidate cache
+ }
+
+ private int ReaderIndex(int n)
+ {
+ // find reader for doc n:
+ return DirectoryReader.ReaderIndex(n, this.starts, this.subReaders.Length);
+ }
+
+ public override bool HasNorms(System.String field)
+ {
+ EnsureOpen();
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ if (subReaders[i].HasNorms(field))
+ return true;
+ }
+ return false;
+ }
+
+ public override byte[] Norms(System.String field)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = normsCache[field];
+ if (bytes != null)
+ return bytes; // cache hit
+ if (!HasNorms(field))
+ return null;
+
+ bytes = new byte[MaxDoc];
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].Norms(field, bytes, starts[i]);
+ normsCache[field] = bytes; // update cache
+ return bytes;
+ }
+ }
+
+ public override void Norms(System.String field, byte[] result, int offset)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = normsCache[field];
+ for (int i = 0; i < subReaders.Length; i++)
+ // read from segments
+ subReaders[i].Norms(field, result, offset + starts[i]);
+
+ if (bytes == null && !HasNorms(field))
+ {
+ for (int i = offset; i < result.Length; i++)
+ {
+ result[i] = (byte) DefaultSimilarity.EncodeNorm(1.0f);
+ }
+ }
+ else if (bytes != null)
+ {
+ // cache hit
+ Array.Copy(bytes, 0, result, offset, MaxDoc);
+ }
+ else
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ // read from segments
+ subReaders[i].Norms(field, result, offset + starts[i]);
+ }
+ }
+ }
+ }
+
+ protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
+ {
+ lock (normsCache)
+ {
+ normsCache.Remove(field); // clear cache
+ }
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch
+ }
+
+ public override TermEnum Terms()
+ {
+ EnsureOpen();
+ return new MultiTermEnum(this, subReaders, starts, null);
+ }
+
+ public override TermEnum Terms(Term term)
+ {
+ EnsureOpen();
+ return new MultiTermEnum(this, subReaders, starts, term);
+ }
+
+ public override int DocFreq(Term t)
+ {
+ EnsureOpen();
+ int total = 0; // sum freqs in segments
+ for (int i = 0; i < subReaders.Length; i++)
+ total += subReaders[i].DocFreq(t);
+ return total;
+ }
+
+ public override TermDocs TermDocs()
+ {
+ EnsureOpen();
+ return new MultiTermDocs(this, subReaders, starts);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ EnsureOpen();
+ return new MultiTermPositions(this, subReaders, starts);
+ }
+
+ protected internal override void DoCommit(System.Collections.Generic.IDictionary<string, string> commitUserData)
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].Commit(commitUserData);
+ }
+
+ protected internal override void DoClose()
+ {
+ lock (this)
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ if (decrefOnClose[i])
+ {
+ subReaders[i].DecRef();
+ }
+ else
+ {
+ subReaders[i].Close();
+ }
+ }
+ }
+
+ // NOTE: only needed in case someone had asked for
+ // FieldCache for top-level reader (which is generally
+ // not a good idea):
+ Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this);
+ }
+
+ public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
+ {
+ EnsureOpen();
+ return DirectoryReader.GetFieldNames(fieldNames, this.subReaders);
+ }
+
+ /// <summary> Checks recursively if all subreaders are up to date. </summary>
+ public override bool IsCurrent()
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ if (!subReaders[i].IsCurrent())
+ {
+ return false;
+ }
+ }
+
+ // all subreaders are up to date
+ return true;
+ }
+
+ /// <summary>Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public override long Version
+ {
+ get { throw new System.NotSupportedException("MultiReader does not support this method."); }
+ }
+
+ public override IndexReader[] GetSequentialSubReaders()
+ {
+ return subReaders;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/MultipleTermPositions.cs b/src/core/Index/MultipleTermPositions.cs
new file mode 100644
index 0000000..eab3dd5
--- /dev/null
+++ b/src/core/Index/MultipleTermPositions.cs
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Allows you to iterate over the <see cref="TermPositions" /> for multiple <see cref="Term" />s as
+ /// a single <see cref="TermPositions" />.
+ ///
+ /// </summary>
+ public class MultipleTermPositions : TermPositions
+ {
+ private sealed class TermPositionsQueue : PriorityQueue<TermPositions>
+ {
+ internal TermPositionsQueue(LinkedList<TermPositions> termPositions)
+ {
+ Initialize(termPositions.Count);
+
+ foreach(TermPositions tp in termPositions)
+ if (tp.Next())
+ Add(tp);
+ }
+
+ internal TermPositions Peek()
+ {
+ return Top();
+ }
+
+ public override bool LessThan(TermPositions a, TermPositions b)
+ {
+ return a.Doc < b.Doc;
+ }
+ }
+
+ private sealed class IntQueue
+ {
+ public IntQueue()
+ {
+ InitBlock();
+ }
+ private void InitBlock()
+ {
+ _array = new int[_arraySize];
+ }
+ private int _arraySize = 16;
+ private int _index = 0;
+ private int _lastIndex = 0;
+ private int[] _array;
+
+ internal void add(int i)
+ {
+ if (_lastIndex == _arraySize)
+ growArray();
+
+ _array[_lastIndex++] = i;
+ }
+
+ internal int next()
+ {
+ return _array[_index++];
+ }
+
+ internal void sort()
+ {
+ System.Array.Sort(_array, _index, _lastIndex - _index);
+ }
+
+ internal void clear()
+ {
+ _index = 0;
+ _lastIndex = 0;
+ }
+
+ internal int size()
+ {
+ return (_lastIndex - _index);
+ }
+
+ private void growArray()
+ {
+ int[] newArray = new int[_arraySize * 2];
+ Array.Copy(_array, 0, newArray, 0, _arraySize);
+ _array = newArray;
+ _arraySize *= 2;
+ }
+ }
+
+ private int _doc;
+ private int _freq;
+ private TermPositionsQueue _termPositionsQueue;
+ private IntQueue _posList;
+
+ private bool isDisposed;
+ /// <summary> Creates a new <c>MultipleTermPositions</c> instance.
+ ///
+ /// </summary>
+ /// <exception cref="System.IO.IOException">
+ /// </exception>
+ public MultipleTermPositions(IndexReader indexReader, Term[] terms)
+ {
+ var termPositions = new System.Collections.Generic.LinkedList<TermPositions>();
+
+ for (int i = 0; i < terms.Length; i++)
+ termPositions.AddLast(indexReader.TermPositions(terms[i]));
+
+ _termPositionsQueue = new TermPositionsQueue(termPositions);
+ _posList = new IntQueue();
+ }
+
+ public bool Next()
+ {
+ if (_termPositionsQueue.Size() == 0)
+ return false;
+
+ _posList.clear();
+ _doc = _termPositionsQueue.Peek().Doc;
+
+ TermPositions tp;
+ do
+ {
+ tp = _termPositionsQueue.Peek();
+
+ for (int i = 0; i < tp.Freq; i++)
+ _posList.add(tp.NextPosition());
+
+ if (tp.Next())
+ _termPositionsQueue.UpdateTop();
+ else
+ {
+ _termPositionsQueue.Pop();
+ tp.Close();
+ }
+ }
+ while (_termPositionsQueue.Size() > 0 && _termPositionsQueue.Peek().Doc == _doc);
+
+ _posList.sort();
+ _freq = _posList.size();
+
+ return true;
+ }
+
+ public int NextPosition()
+ {
+ return _posList.next();
+ }
+
+ public bool SkipTo(int target)
+ {
+ while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc)
+ {
+ TermPositions tp = _termPositionsQueue.Pop();
+ if (tp.SkipTo(target))
+ _termPositionsQueue.Add(tp);
+ else
+ tp.Close();
+ }
+ return Next();
+ }
+
+ public int Doc
+ {
+ get { return _doc; }
+ }
+
+ public int Freq
+ {
+ get { return _freq; }
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ while (_termPositionsQueue.Size() > 0)
+ _termPositionsQueue.Pop().Close();
+ }
+
+ isDisposed = true;
+ }
+
+ /// <summary> Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public virtual void Seek(Term arg0)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public virtual void Seek(TermEnum termEnum)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public virtual int Read(int[] arg0, int[] arg1)
+ {
+ throw new System.NotSupportedException();
+ }
+
+
+ /// <summary> Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public virtual int PayloadLength
+ {
+ get { throw new System.NotSupportedException(); }
+ }
+
+ /// <summary> Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> </summary>
+ /// <value> false </value>
+// TODO: Remove warning after API has been finalized
+ public virtual bool IsPayloadAvailable
+ {
+ get { return false; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/NormsWriter.cs b/src/core/Index/NormsWriter.cs
new file mode 100644
index 0000000..507d69c
--- /dev/null
+++ b/src/core/Index/NormsWriter.cs
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using Similarity = Lucene.Net.Search.Similarity;
+
+namespace Lucene.Net.Index
+{
+
+ // TODO FI: norms could actually be stored as doc store
+
+ /// <summary>Writes norms. Each thread X field accumulates the norms
+ /// for the doc/fields it saw, then the flush method below
+ /// merges all of these together into a single _X.nrm file.
+ /// </summary>
+
+ sealed class NormsWriter : InvertedDocEndConsumer
+ {
+
+ private static readonly byte defaultNorm;
+ private FieldInfos fieldInfos;
+ public override InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread)
+ {
+ return new NormsWriterPerThread(docInverterPerThread, this);
+ }
+
+ public override void Abort()
+ {
+ }
+
+ // We only write the _X.nrm file at flush
+ internal void Files(ICollection<string> files)
+ {
+ }
+
+ internal override void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ this.fieldInfos = fieldInfos;
+ }
+
+ /// <summary>Produce _X.nrm if any document had a field with norms
+ /// not disabled
+ /// </summary>
+ public override void Flush(IDictionary<InvertedDocEndConsumerPerThread,ICollection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state)
+ {
+
+ IDictionary<FieldInfo, IList<NormsWriterPerField>> byField = new HashMap<FieldInfo, IList<NormsWriterPerField>>();
+
+ // Typically, each thread will have encountered the same
+ // field. So first we collate by field, ie, all
+ // per-thread field instances that correspond to the
+ // same FieldInfo
+ foreach(var entry in threadsAndFields)
+ {
+ ICollection<InvertedDocEndConsumerPerField> fields = entry.Value;
+ IEnumerator<InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator();
+ var fieldsToRemove = new HashSet<NormsWriterPerField>();
+ while (fieldsIt.MoveNext())
+ {
+ NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.Current;
+
+ if (perField.upto > 0)
+ {
+ // It has some norms
+ IList<NormsWriterPerField> l = byField[perField.fieldInfo];
+ if (l == null)
+ {
+ l = new List<NormsWriterPerField>();
+ byField[perField.fieldInfo] = l;
+ }
+ l.Add(perField);
+ }
+ // Remove this field since we haven't seen it
+ // since the previous flush
+ else
+ {
+ fieldsToRemove.Add(perField);
+ }
+ }
+ foreach (var field in fieldsToRemove)
+ {
+ fields.Remove(field);
+ }
+ }
+
+ System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
+ state.flushedFiles.Add(normsFileName);
+ IndexOutput normsOut = state.directory.CreateOutput(normsFileName);
+
+ try
+ {
+ normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);
+
+ int numField = fieldInfos.Size();
+
+ int normCount = 0;
+
+ for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
+ {
+
+ FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);
+
+ IList<NormsWriterPerField> toMerge = byField[fieldInfo];
+ int upto = 0;
+ if (toMerge != null)
+ {
+
+ int numFields = toMerge.Count;
+
+ normCount++;
+
+ NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
+ int[] uptos = new int[numFields];
+
+ for (int j = 0; j < numFields; j++)
+ fields[j] = toMerge[j];
+
+ int numLeft = numFields;
+
+ while (numLeft > 0)
+ {
+
+ System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length));
+
+ int minLoc = 0;
+ int minDocID = fields[0].docIDs[uptos[0]];
+
+ for (int j = 1; j < numLeft; j++)
+ {
+ int docID = fields[j].docIDs[uptos[j]];
+ if (docID < minDocID)
+ {
+ minDocID = docID;
+ minLoc = j;
+ }
+ }
+
+ System.Diagnostics.Debug.Assert(minDocID < state.numDocs);
+
+ // Fill hole
+ for (; upto < minDocID; upto++)
+ normsOut.WriteByte(defaultNorm);
+
+ normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
+ (uptos[minLoc])++;
+ upto++;
+
+ if (uptos[minLoc] == fields[minLoc].upto)
+ {
+ fields[minLoc].Reset();
+ if (minLoc != numLeft - 1)
+ {
+ fields[minLoc] = fields[numLeft - 1];
+ uptos[minLoc] = uptos[numLeft - 1];
+ }
+ numLeft--;
+ }
+ }
+
+ // Fill final hole with defaultNorm
+ for (; upto < state.numDocs; upto++)
+ normsOut.WriteByte(defaultNorm);
+ }
+ else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
+ {
+ normCount++;
+ // Fill entire field with default norm:
+ for (; upto < state.numDocs; upto++)
+ normsOut.WriteByte(defaultNorm);
+ }
+
+ System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.FilePointer, ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.FilePointer);
+ }
+ }
+ finally
+ {
+ normsOut.Close();
+ }
+ }
+
+ internal override void CloseDocStore(SegmentWriteState state)
+ {
+ }
+ static NormsWriter()
+ {
+ defaultNorm = Similarity.EncodeNorm(1.0f);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/NormsWriterPerField.cs b/src/core/Index/NormsWriterPerField.cs
new file mode 100644
index 0000000..81d45df
--- /dev/null
+++ b/src/core/Index/NormsWriterPerField.cs
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Similarity = Lucene.Net.Search.Similarity;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Taps into DocInverter, as an InvertedDocEndConsumer,
+ /// which is called at the end of inverting each field. We
+ /// just look at the length for the field (docState.length)
+ /// and record the norm.
+ /// </summary>
+
+ sealed class NormsWriterPerField:InvertedDocEndConsumerPerField, System.IComparable<NormsWriterPerField>
+ {
+
+ internal NormsWriterPerThread perThread;
+ internal FieldInfo fieldInfo;
+ internal DocumentsWriter.DocState docState;
+
+ // Holds all docID/norm pairs we've seen
+ internal int[] docIDs = new int[1];
+ internal byte[] norms = new byte[1];
+ internal int upto;
+
+ internal FieldInvertState fieldState;
+
+ public void Reset()
+ {
+ // Shrink back if we are overallocated now:
+ docIDs = ArrayUtil.Shrink(docIDs, upto);
+ norms = ArrayUtil.Shrink(norms, upto);
+ upto = 0;
+ }
+
+ public NormsWriterPerField(DocInverterPerField docInverterPerField, NormsWriterPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.perThread = perThread;
+ this.fieldInfo = fieldInfo;
+ docState = perThread.docState;
+ fieldState = docInverterPerField.fieldState;
+ }
+
+ internal override void Abort()
+ {
+ upto = 0;
+ }
+
+ public int CompareTo(NormsWriterPerField other)
+ {
+ return String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name);
+ }
+
+ internal override void Finish()
+ {
+ System.Diagnostics.Debug.Assert(docIDs.Length == norms.Length);
+ if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
+ {
+ if (docIDs.Length <= upto)
+ {
+ System.Diagnostics.Debug.Assert(docIDs.Length == upto);
+ docIDs = ArrayUtil.Grow(docIDs, 1 + upto);
+ norms = ArrayUtil.Grow(norms, 1 + upto);
+ }
+ float norm = docState.similarity.ComputeNorm(fieldInfo.name, fieldState);
+ norms[upto] = Similarity.EncodeNorm(norm);
+ docIDs[upto] = docState.docID;
+ upto++;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/NormsWriterPerThread.cs b/src/core/Index/NormsWriterPerThread.cs
new file mode 100644
index 0000000..d5cd5ed
--- /dev/null
+++ b/src/core/Index/NormsWriterPerThread.cs
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class NormsWriterPerThread:InvertedDocEndConsumerPerThread
+ {
+ internal NormsWriter normsWriter;
+ internal DocumentsWriter.DocState docState;
+
+ public NormsWriterPerThread(DocInverterPerThread docInverterPerThread, NormsWriter normsWriter)
+ {
+ this.normsWriter = normsWriter;
+ docState = docInverterPerThread.docState;
+ }
+
+ internal override InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo)
+ {
+ return new NormsWriterPerField(docInverterPerField, this, fieldInfo);
+ }
+
+ internal override void Abort()
+ {
+ }
+
+ internal override void StartDocument()
+ {
+ }
+ internal override void FinishDocument()
+ {
+ }
+
+ internal bool FreeRAM()
+ {
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ParallelReader.cs b/src/core/Index/ParallelReader.cs
new file mode 100644
index 0000000..e0b4b04
--- /dev/null
+++ b/src/core/Index/ParallelReader.cs
@@ -0,0 +1,822 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
+
+namespace Lucene.Net.Index
+{
+ /// <summary>An IndexReader which reads multiple, parallel indexes. Each index added
+ /// must have the same number of documents, but typically each contains
+ /// different fields. Each document contains the union of the fields of all
+ /// documents with the same document number. When searching, matches for a
+ /// query term are from the first index added that has the field.
+ ///
+ /// <p/>This is useful, e.g., with collections that have large fields which
+ /// change rarely and small fields that change more frequently. The smaller
+ /// fields may be re-indexed in a new index and both indexes may be searched
+ /// together.
+ ///
+ /// <p/><strong>Warning:</strong> It is up to you to make sure all indexes
+ /// are created and modified the same way. For example, if you add
+ /// documents to one index, you need to add the same documents in the
+ /// same order to the other indexes. <em>Failure to do so will result in
+ /// undefined behavior</em>.
+ /// </summary>
+ public class ParallelReader:IndexReader, System.ICloneable
+ {
+ private List<IndexReader> readers = new List<IndexReader>();
+ private List<bool> decrefOnClose = new List<bool>(); // remember which subreaders to decRef on close
+ internal bool incRefReaders = false;
+ private SortedDictionary<string, IndexReader> fieldToReader = new SortedDictionary<string, IndexReader>();
+ private IDictionary<IndexReader, ICollection<string>> readerToFields = new HashMap<IndexReader, ICollection<string>>();
+ private List<IndexReader> storedFieldReaders = new List<IndexReader>();
+
+ private int maxDoc;
+ private int numDocs;
+ private bool hasDeletions;
+
+ /// <summary>Construct a ParallelReader.
+ /// <p/>Note that all subreaders are closed if this ParallelReader is closed.<p/>
+ /// </summary>
+ public ParallelReader():this(true)
+ {
+ }
+
+ /// <summary>Construct a ParallelReader. </summary>
+ /// <param name="closeSubReaders">indicates whether the subreaders should be closed
+ /// when this ParallelReader is closed
+ /// </param>
+ public ParallelReader(bool closeSubReaders):base()
+ {
+ this.incRefReaders = !closeSubReaders;
+ }
+
+ /// <summary>Add an IndexReader.</summary>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void Add(IndexReader reader)
+ {
+ EnsureOpen();
+ Add(reader, false);
+ }
+
+ /// <summary>Add an IndexReader whose stored fields will not be returned. This can
+ /// accellerate search when stored fields are only needed from a subset of
+ /// the IndexReaders.
+ ///
+ /// </summary>
+ /// <throws> IllegalArgumentException if not all indexes contain the same number </throws>
+ /// <summary> of documents
+ /// </summary>
+ /// <throws> IllegalArgumentException if not all indexes have the same value </throws>
+ /// <summary> of <see cref="IndexReader.MaxDoc" />
+ /// </summary>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public virtual void Add(IndexReader reader, bool ignoreStoredFields)
+ {
+
+ EnsureOpen();
+ if (readers.Count == 0)
+ {
+ this.maxDoc = reader.MaxDoc;
+ this.numDocs = reader.NumDocs();
+ this.hasDeletions = reader.HasDeletions;
+ }
+
+ if (reader.MaxDoc != maxDoc)
+ // check compatibility
+ throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc);
+ if (reader.NumDocs() != numDocs)
+ throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
+
+ ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
+ readerToFields[reader] = fields;
+ foreach(var field in fields)
+ {
+ // update fieldToReader map
+ // Do a containskey firt to mimic java behavior
+ if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null)
+ fieldToReader[field] = reader;
+ }
+
+ if (!ignoreStoredFields)
+ storedFieldReaders.Add(reader); // add to storedFieldReaders
+ readers.Add(reader);
+
+ if (incRefReaders)
+ {
+ reader.IncRef();
+ }
+ decrefOnClose.Add(incRefReaders);
+ }
+
+ public override System.Object Clone()
+ {
+ try
+ {
+ return DoReopen(true);
+ }
+ catch (System.Exception ex)
+ {
+ throw new System.SystemException(ex.Message, ex);
+ }
+ }
+
+ /// <summary> Tries to reopen the subreaders.
+ /// <br/>
+ /// If one or more subreaders could be re-opened (i. e. subReader.reopen()
+ /// returned a new instance != subReader), then a new ParallelReader instance
+ /// is returned, otherwise this instance is returned.
+ /// <p/>
+ /// A re-opened instance might share one or more subreaders with the old
+ /// instance. Index modification operations result in undefined behavior
+ /// when performed before the old instance is closed.
+ /// (see <see cref="IndexReader.Reopen()" />).
+ /// <p/>
+ /// If subreaders are shared, then the reference count of those
+ /// readers is increased to ensure that the subreaders remain open
+ /// until the last referring reader is closed.
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public override IndexReader Reopen()
+ {
+ lock (this)
+ {
+ return DoReopen(false);
+ }
+ }
+
+ protected internal virtual IndexReader DoReopen(bool doClone)
+ {
+ EnsureOpen();
+
+ bool reopened = false;
+ IList<IndexReader> newReaders = new List<IndexReader>();
+
+ bool success = false;
+
+ try
+ {
+ foreach(var oldReader in readers)
+ {
+ IndexReader newReader = null;
+ if (doClone)
+ {
+ newReader = (IndexReader) oldReader.Clone();
+ }
+ else
+ {
+ newReader = oldReader.Reopen();
+ }
+ newReaders.Add(newReader);
+ // if at least one of the subreaders was updated we remember that
+ // and return a new ParallelReader
+ if (newReader != oldReader)
+ {
+ reopened = true;
+ }
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success && reopened)
+ {
+ for (int i = 0; i < newReaders.Count; i++)
+ {
+ IndexReader r = newReaders[i];
+ if (r != readers[i])
+ {
+ try
+ {
+ r.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+
+ if (reopened)
+ {
+ List<bool> newDecrefOnClose = new List<bool>();
+ ParallelReader pr = new ParallelReader();
+ for (int i = 0; i < readers.Count; i++)
+ {
+ IndexReader oldReader = readers[i];
+ IndexReader newReader = newReaders[i];
+ if (newReader == oldReader)
+ {
+ newDecrefOnClose.Add(true);
+ newReader.IncRef();
+ }
+ else
+ {
+ // this is a new subreader instance, so on close() we don't
+ // decRef but close it
+ newDecrefOnClose.Add(false);
+ }
+ pr.Add(newReader, !storedFieldReaders.Contains(oldReader));
+ }
+ pr.decrefOnClose = newDecrefOnClose;
+ pr.incRefReaders = incRefReaders;
+ return pr;
+ }
+ else
+ {
+ // No subreader was refreshed
+ return this;
+ }
+ }
+
+
+ public override int NumDocs()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return numDocs;
+ }
+
+ public override int MaxDoc
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return maxDoc;
+ }
+ }
+
+ public override bool HasDeletions
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return hasDeletions;
+ }
+ }
+
+ // check first reader
+ public override bool IsDeleted(int n)
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ if (readers.Count > 0)
+ return readers[0].IsDeleted(n);
+ return false;
+ }
+
+ // delete in all readers
+ protected internal override void DoDelete(int n)
+ {
+ foreach(var reader in readers)
+ {
+ reader.DeleteDocument(n);
+ }
+ hasDeletions = true;
+ }
+
+ // undeleteAll in all readers
+ protected internal override void DoUndeleteAll()
+ {
+ foreach(var reader in readers)
+ {
+ reader.UndeleteAll();
+ }
+ hasDeletions = false;
+ }
+
+ // append fields from storedFieldReaders
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ EnsureOpen();
+ Document result = new Document();
+ foreach(IndexReader reader in storedFieldReaders)
+ {
+ bool include = (fieldSelector == null);
+ if (!include)
+ {
+ var fields = readerToFields[reader];
+ foreach(var field in fields)
+ {
+ if (fieldSelector.Accept(field) != FieldSelectorResult.NO_LOAD)
+ {
+ include = true;
+ break;
+ }
+ }
+ }
+ if (include)
+ {
+ var fields = reader.Document(n, fieldSelector).GetFields();
+ foreach(var field in fields)
+ {
+ result.Add(field);
+ }
+ }
+ }
+ return result;
+ }
+
+ // get all vectors
+ public override ITermFreqVector[] GetTermFreqVectors(int n)
+ {
+ EnsureOpen();
+ IList<ITermFreqVector> results = new List<ITermFreqVector>();
+ foreach(var e in fieldToReader)
+ {
+ System.String field = e.Key;
+ IndexReader reader = e.Value;
+
+ ITermFreqVector vector = reader.GetTermFreqVector(n, field);
+ if (vector != null)
+ results.Add(vector);
+ }
+ return results.ToArray();
+ }
+
+ public override ITermFreqVector GetTermFreqVector(int n, System.String field)
+ {
+ EnsureOpen();
+ IndexReader reader = (fieldToReader[field]);
+ return reader == null?null:reader.GetTermFreqVector(n, field);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ IndexReader reader = (fieldToReader[field]);
+ if (reader != null)
+ {
+ reader.GetTermFreqVector(docNumber, field, mapper);
+ }
+ }
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+
+ foreach(var e in fieldToReader)
+ {
+ System.String field = e.Key;
+ IndexReader reader = e.Value;
+ reader.GetTermFreqVector(docNumber, field, mapper);
+ }
+ }
+
+ public override bool HasNorms(System.String field)
+ {
+ EnsureOpen();
+ IndexReader reader = fieldToReader[field];
+ return reader != null && reader.HasNorms(field);
+ }
+
+ public override byte[] Norms(System.String field)
+ {
+ EnsureOpen();
+ IndexReader reader = fieldToReader[field];
+ return reader == null?null:reader.Norms(field);
+ }
+
+ public override void Norms(System.String field, byte[] result, int offset)
+ {
+ EnsureOpen();
+ IndexReader reader = fieldToReader[field];
+ if (reader != null)
+ reader.Norms(field, result, offset);
+ }
+
+ protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
+ {
+ IndexReader reader = fieldToReader[field];
+ if (reader != null)
+ reader.DoSetNorm(n, field, value_Renamed);
+ }
+
+ public override TermEnum Terms()
+ {
+ EnsureOpen();
+ return new ParallelTermEnum(this);
+ }
+
+ public override TermEnum Terms(Term term)
+ {
+ EnsureOpen();
+ return new ParallelTermEnum(this, term);
+ }
+
+ public override int DocFreq(Term term)
+ {
+ EnsureOpen();
+ IndexReader reader = fieldToReader[term.Field];
+ return reader == null?0:reader.DocFreq(term);
+ }
+
+ public override TermDocs TermDocs(Term term)
+ {
+ EnsureOpen();
+ return new ParallelTermDocs(this, term);
+ }
+
+ public override TermDocs TermDocs()
+ {
+ EnsureOpen();
+ return new ParallelTermDocs(this);
+ }
+
+ public override TermPositions TermPositions(Term term)
+ {
+ EnsureOpen();
+ return new ParallelTermPositions(this, term);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ EnsureOpen();
+ return new ParallelTermPositions(this);
+ }
+
+ /// <summary> Checks recursively if all subreaders are up to date. </summary>
+ public override bool IsCurrent()
+ {
+ foreach (var reader in readers)
+ {
+ if (!reader.IsCurrent())
+ {
+ return false;
+ }
+ }
+
+ // all subreaders are up to date
+ return true;
+ }
+
+ /// <summary> Checks recursively if all subindexes are optimized </summary>
+ public override bool IsOptimized()
+ {
+ foreach (var reader in readers)
+ {
+ if (!reader.IsOptimized())
+ {
+ return false;
+ }
+ }
+
+ // all subindexes are optimized
+ return true;
+ }
+
+
+ /// <summary>Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public override long Version
+ {
+ get { throw new System.NotSupportedException("ParallelReader does not support this method."); }
+ }
+
+ // for testing
+ public /*internal*/ virtual IndexReader[] GetSubReaders()
+ {
+ return readers.ToArray();
+ }
+
+ protected internal override void DoCommit(IDictionary<string, string> commitUserData)
+ {
+ foreach(var reader in readers)
+ reader.Commit(commitUserData);
+ }
+
+ protected internal override void DoClose()
+ {
+ lock (this)
+ {
+ for (int i = 0; i < readers.Count; i++)
+ {
+ if (decrefOnClose[i])
+ {
+ readers[i].DecRef();
+ }
+ else
+ {
+ readers[i].Close();
+ }
+ }
+ }
+
+ Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this);
+ }
+
+ public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
+ {
+ EnsureOpen();
+ ISet<string> fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>();
+ foreach(var reader in readers)
+ {
+ ICollection<string> names = reader.GetFieldNames(fieldNames);
+ fieldSet.UnionWith(names);
+ }
+ return fieldSet;
+ }
+
+ private class ParallelTermEnum : TermEnum
+ {
+ private void InitBlock(ParallelReader enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ParallelReader enclosingInstance;
+ public ParallelReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private System.String field;
+ private IEnumerator<string> fieldIterator;
+ private TermEnum termEnum;
+
+ private bool isDisposed;
+
+ public ParallelTermEnum(ParallelReader enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ try
+ {
+ field = Enclosing_Instance.fieldToReader.Keys.First();
+ }
+ catch (ArgumentOutOfRangeException)
+ {
+ // No fields, so keep field == null, termEnum == null
+ return;
+ }
+ if (field != null)
+ termEnum = Enclosing_Instance.fieldToReader[field].Terms();
+ }
+
+ public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
+ {
+ InitBlock(enclosingInstance);
+ field = term.Field;
+ IndexReader reader = Enclosing_Instance.fieldToReader[field];
+ if (reader != null)
+ termEnum = reader.Terms(term);
+ }
+
+ public override bool Next()
+ {
+ if (termEnum == null)
+ return false;
+
+ // another term in this field?
+ if (termEnum.Next() && (System.Object) termEnum.Term.Field == (System.Object) field)
+ return true; // yes, keep going
+
+ termEnum.Close(); // close old termEnum
+
+ // find the next field with terms, if any
+ if (fieldIterator == null)
+ {
+ var newList = new List<string>();
+ if (Enclosing_Instance.fieldToReader != null && Enclosing_Instance.fieldToReader.Count > 0)
+ {
+ var comparer = Enclosing_Instance.fieldToReader.Comparer;
+ foreach(var entry in Enclosing_Instance.fieldToReader.Keys.Where(x => comparer.Compare(x, field) >= 0))
+ newList.Add(entry);
+ }
+
+ fieldIterator = newList.Skip(1).GetEnumerator(); // Skip field to get next one
+ }
+ while (fieldIterator.MoveNext())
+ {
+ field = fieldIterator.Current;
+ termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
+ Term term = termEnum.Term;
+ if (term != null && (System.Object) term.Field == (System.Object) field)
+ return true;
+ else
+ termEnum.Close();
+ }
+
+ return false; // no more fields
+ }
+
+ public override Term Term
+ {
+ get
+ {
+ if (termEnum == null)
+ return null;
+
+ return termEnum.Term;
+ }
+ }
+
+ public override int DocFreq()
+ {
+ if (termEnum == null)
+ return 0;
+
+ return termEnum.DocFreq();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (termEnum != null)
+ termEnum.Close();
+ }
+
+ isDisposed = true;
+ }
+ }
+
+ // wrap a TermDocs in order to support seek(Term)
+ private class ParallelTermDocs : TermDocs
+ {
+ private void InitBlock(ParallelReader enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ParallelReader enclosingInstance;
+ public ParallelReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ protected internal TermDocs termDocs;
+
+ private bool isDisposed;
+
+ public ParallelTermDocs(ParallelReader enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ public ParallelTermDocs(ParallelReader enclosingInstance, Term term)
+ {
+ InitBlock(enclosingInstance);
+ if(term == null)
+ termDocs = (Enclosing_Instance.readers.Count == 0)
+ ? null
+ : Enclosing_Instance.readers[0].TermDocs(null);
+ else
+ Seek(term);
+ }
+
+ public virtual int Doc
+ {
+ get { return termDocs.Doc; }
+ }
+
+ public virtual int Freq
+ {
+ get { return termDocs.Freq; }
+ }
+
+ public virtual void Seek(Term term)
+ {
+ IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];
+ termDocs = reader != null?reader.TermDocs(term):null;
+ }
+
+ public virtual void Seek(TermEnum termEnum)
+ {
+ Seek(termEnum.Term);
+ }
+
+ public virtual bool Next()
+ {
+ if (termDocs == null)
+ return false;
+
+ return termDocs.Next();
+ }
+
+ public virtual int Read(int[] docs, int[] freqs)
+ {
+ if (termDocs == null)
+ return 0;
+
+ return termDocs.Read(docs, freqs);
+ }
+
+ public virtual bool SkipTo(int target)
+ {
+ if (termDocs == null)
+ return false;
+
+ return termDocs.SkipTo(target);
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public virtual void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (termDocs != null)
+ termDocs.Close();
+ }
+
+ isDisposed = true;
+ }
+ }
+
+ private class ParallelTermPositions:ParallelTermDocs, TermPositions
+ {
+ private void InitBlock(ParallelReader enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ParallelReader enclosingInstance;
+ public new ParallelReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ public ParallelTermPositions(ParallelReader enclosingInstance):base(enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ public ParallelTermPositions(ParallelReader enclosingInstance, Term term):base(enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ Seek(term);
+ }
+
+ public override void Seek(Term term)
+ {
+ IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];
+ termDocs = reader != null?reader.TermPositions(term):null;
+ }
+
+ public virtual int NextPosition()
+ {
+ // It is an error to call this if there is no next position, e.g. if termDocs==null
+ return ((TermPositions) termDocs).NextPosition();
+ }
+
+ public virtual int PayloadLength
+ {
+ get { return ((TermPositions) termDocs).PayloadLength; }
+ }
+
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ return ((TermPositions) termDocs).GetPayload(data, offset);
+ }
+
+
+ // TODO: Remove warning after API has been finalized
+
+ public virtual bool IsPayloadAvailable
+ {
+ get { return ((TermPositions) termDocs).IsPayloadAvailable; }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/Payload.cs b/src/core/Index/Payload.cs
new file mode 100644
index 0000000..a6f391a
--- /dev/null
+++ b/src/core/Index/Payload.cs
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> A Payload is metadata that can be stored together with each occurrence
+ /// of a term. This metadata is stored inline in the posting list of the
+ /// specific term.
+ /// <p/>
+ /// To store payloads in the index a <see cref="TokenStream"/> has to be used that
+ /// produces payload data.
+ /// <p/>
+ /// Use <see cref="TermPositions.PayloadLength"/> and <see cref="TermPositions.GetPayload(byte[], int)"/>
+ /// to retrieve the payloads from the index.<br/>
+ ///
+ /// </summary>
+ [Serializable]
+ public class Payload : System.ICloneable
+ {
+ /// <summary>the byte array containing the payload data </summary>
+ protected internal byte[] data;
+
+ /// <summary>the offset within the byte array </summary>
+ protected internal int internalOffset;
+
+ /// <summary>the length of the payload data </summary>
+ protected internal int internalLength;
+
+ /// <summary>Creates an empty payload and does not allocate a byte array. </summary>
+ public Payload()
+ {
+ // nothing to do
+ }
+
+ /// <summary> Creates a new payload with the the given array as data.
+ /// A reference to the passed-in array is held, i. e. no
+ /// copy is made.
+ ///
+ /// </summary>
+ /// <param name="data">the data of this payload
+ /// </param>
+ public Payload(byte[] data):this(data, 0, data.Length)
+ {
+ }
+
+ /// <summary> Creates a new payload with the the given array as data.
+ /// A reference to the passed-in array is held, i. e. no
+ /// copy is made.
+ ///
+ /// </summary>
+ /// <param name="data">the data of this payload
+ /// </param>
+ /// <param name="offset">the offset in the data byte array
+ /// </param>
+ /// <param name="length">the length of the data
+ /// </param>
+ public Payload(byte[] data, int offset, int length)
+ {
+ if (offset < 0 || offset + length > data.Length)
+ {
+ throw new System.ArgumentException();
+ }
+ this.data = data;
+ this.internalOffset = offset;
+ this.internalLength = length;
+ }
+
+ /// <summary> Sets this payloads data.
+ /// A reference to the passed-in array is held, i. e. no
+ /// copy is made.
+ /// </summary>
+ public virtual void SetData(byte[] value, int offset, int length)
+ {
+ this.data = value;
+ this.internalOffset = offset;
+ this.internalLength = length;
+ }
+
+ /// <summary> Gets or sets a reference to the underlying byte array
+ /// that holds this payloads data. Data is not copied.
+ /// </summary>
+ public virtual void SetData(byte[] value)
+ {
+ SetData(value, 0, value.Length);
+ }
+
+ /// <summary> Gets or sets a reference to the underlying byte array
+ /// that holds this payloads data. Data is not copied.
+ /// </summary>
+ public virtual byte[] GetData()
+ {
+ return this.data;
+ }
+
+ /// <summary> Returns the offset in the underlying byte array </summary>
+ public virtual int Offset
+ {
+ get { return this.internalOffset; }
+ }
+
+ /// <summary> Returns the length of the payload data. </summary>
+ public virtual int Length
+ {
+ get { return this.internalLength; }
+ }
+
+ /// <summary> Returns the byte at the given index.</summary>
+ public virtual byte ByteAt(int index)
+ {
+ if (0 <= index && index < this.internalLength)
+ {
+ return this.data[this.internalOffset + index];
+ }
+ throw new System. IndexOutOfRangeException("Index of bound " + index);
+ }
+
+ /// <summary> Allocates a new byte array, copies the payload data into it and returns it. </summary>
+ public virtual byte[] ToByteArray()
+ {
+ byte[] retArray = new byte[this.internalLength];
+ Array.Copy(this.data, this.internalOffset, retArray, 0, this.internalLength);
+ return retArray;
+ }
+
+ /// <summary> Copies the payload data to a byte array.
+ ///
+ /// </summary>
+ /// <param name="target">the target byte array
+ /// </param>
+ /// <param name="targetOffset">the offset in the target byte array
+ /// </param>
+ public virtual void CopyTo(byte[] target, int targetOffset)
+ {
+ if (this.internalLength > target.Length + targetOffset)
+ {
+ throw new System.IndexOutOfRangeException();
+ }
+ Array.Copy(this.data, this.internalOffset, target, targetOffset, this.internalLength);
+ }
+
+ /// <summary> Clones this payload by creating a copy of the underlying
+ /// byte array.
+ /// </summary>
+ public virtual System.Object Clone()
+ {
+ try
+ {
+ // Start with a shallow copy of data
+ Payload clone = (Payload) base.MemberwiseClone();
+ // Only copy the part of data that belongs to this Payload
+ if (internalOffset == 0 && internalLength == data.Length)
+ {
+ // It is the whole thing, so just clone it.
+ clone.data = new byte[data.Length];
+ data.CopyTo(clone.data, 0);
+ }
+ else
+ {
+ // Just get the part
+ clone.data = this.ToByteArray();
+ clone.internalOffset = 0;
+ }
+ return clone;
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException(e.Message, e); // shouldn't happen
+ }
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (obj == this)
+ return true;
+ if (obj is Payload)
+ {
+ Payload other = (Payload) obj;
+ if (internalLength == other.internalLength)
+ {
+ for (int i = 0; i < internalLength; i++)
+ if (data[internalOffset + i] != other.data[other.internalOffset + i])
+ return false;
+ return true;
+ }
+ else
+ return false;
+ }
+ else
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return ArrayUtil.HashCode(data, internalOffset, internalOffset + internalLength);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/PositionBasedTermVectorMapper.cs b/src/core/Index/PositionBasedTermVectorMapper.cs
new file mode 100644
index 0000000..af548a7
--- /dev/null
+++ b/src/core/Index/PositionBasedTermVectorMapper.cs
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> For each Field, store position by position information. It ignores frequency information
+ /// <p/>
+ /// This is not thread-safe.
+ /// </summary>
+ public class PositionBasedTermVectorMapper:TermVectorMapper
+ {
+ private IDictionary<string, IDictionary<int, TVPositionInfo>> fieldToTerms;
+
+ private System.String currentField;
+ /// <summary> A Map of Integer and TVPositionInfo</summary>
+ private IDictionary<int, TVPositionInfo> currentPositions;
+ private bool storeOffsets;
+
+ public PositionBasedTermVectorMapper():base(false, false)
+ {
+ }
+
+ public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets)
+ {
+ }
+
+ /// <summary> Never ignores positions. This mapper doesn't make much sense unless there are positions</summary>
+ /// <value> false </value>
+ public override bool IsIgnoringPositions
+ {
+ get { return false; }
+ }
+
+ /// <summary> Callback for the TermVectorReader. </summary>
+ /// <param name="term">
+ /// </param>
+ /// <param name="frequency">
+ /// </param>
+ /// <param name="offsets">
+ /// </param>
+ /// <param name="positions">
+ /// </param>
+ public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+ {
+ for (int i = 0; i < positions.Length; i++)
+ {
+ System.Int32 posVal = positions[i];
+ TVPositionInfo pos = currentPositions[posVal];
+ if (pos == null)
+ {
+ pos = new TVPositionInfo(positions[i], storeOffsets);
+ currentPositions[posVal] = pos;
+ }
+ pos.addTerm(term, offsets != null ? offsets[i] : TermVectorOffsetInfo.Null);
+ }
+ }
+
+ /// <summary> Callback mechanism used by the TermVectorReader</summary>
+ /// <param name="field"> The field being read
+ /// </param>
+ /// <param name="numTerms">The number of terms in the vector
+ /// </param>
+ /// <param name="storeOffsets">Whether offsets are available
+ /// </param>
+ /// <param name="storePositions">Whether positions are available
+ /// </param>
+ public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
+ {
+ if (storePositions == false)
+ {
+ throw new System.SystemException("You must store positions in order to use this Mapper");
+ }
+ if (storeOffsets == true)
+ {
+ //ignoring offsets
+ }
+ fieldToTerms = new HashMap<string, IDictionary<int, TVPositionInfo>>(numTerms);
+ this.storeOffsets = storeOffsets;
+ currentField = field;
+ currentPositions = new HashMap<int, TVPositionInfo>();
+ fieldToTerms[currentField] = currentPositions;
+ }
+
+ /// <summary> Get the mapping between fields and terms, sorted by the comparator
+ ///
+ /// </summary>
+ /// <value> A map between field names and a Map. The sub-Map key is the position as the integer, the value is &lt;see cref=&quot;Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo&quot; /&gt;. </value>
+ public virtual IDictionary<string, IDictionary<int, TVPositionInfo>> FieldToTerms
+ {
+ get { return fieldToTerms; }
+ }
+
+ /// <summary> Container for a term at a position</summary>
+ public class TVPositionInfo
+ {
+ /// <summary> </summary>
+ /// <returns> The position of the term
+ /// </returns>
+ virtual public int Position
+ {
+ get
+ {
+ return position;
+ }
+
+ }
+ /// <summary> Note, there may be multiple terms at the same position</summary>
+ /// <returns> A List of Strings
+ /// </returns>
+ virtual public IList<String> Terms
+ {
+ get
+ {
+ return terms;
+ }
+
+ }
+ /// <summary> Parallel list (to <see cref="Terms" />) of TermVectorOffsetInfo objects.
+ /// There may be multiple entries since there may be multiple terms at a position</summary>
+ /// <returns> A List of TermVectorOffsetInfo objects, if offsets are store.
+ /// </returns>
+ virtual public IList<TermVectorOffsetInfo> Offsets
+ {
+ get
+ {
+ return offsets;
+ }
+
+ }
+ private int position;
+ //a list of Strings
+ private IList<string> terms;
+ //A list of TermVectorOffsetInfo
+ private IList<TermVectorOffsetInfo> offsets;
+
+
+ public TVPositionInfo(int position, bool storeOffsets)
+ {
+ this.position = position;
+ terms = new List<string>();
+ if (storeOffsets)
+ {
+ offsets = new List<TermVectorOffsetInfo>();
+ }
+ }
+
+ internal virtual void addTerm(System.String term, TermVectorOffsetInfo info)
+ {
+ terms.Add(term);
+ if (offsets != null)
+ {
+ offsets.Add(info);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/RawPostingList.cs b/src/core/Index/RawPostingList.cs
new file mode 100644
index 0000000..bffc2de
--- /dev/null
+++ b/src/core/Index/RawPostingList.cs
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// <summary>This is the base class for an in-memory posting list,
+ /// keyed by a Token. <see cref="TermsHash" /> maintains a hash
+ /// table holding one instance of this per unique Token.
+ /// Consumers of TermsHash (<see cref="TermsHashConsumer" />) must
+ /// subclass this class with its own concrete class.
+ /// FreqProxTermsWriter.PostingList is a private inner class used
+ /// for the freq/prox postings, and
+ /// TermVectorsTermsWriter.PostingList is a private inner class
+ /// used to hold TermVectors postings.
+ /// </summary>
+
+ abstract class RawPostingList
+ {
+ internal static readonly int BYTES_SIZE;
+ internal int textStart;
+ internal int intStart;
+ internal int byteStart;
+ static RawPostingList()
+ {
+ BYTES_SIZE = DocumentsWriter.OBJECT_HEADER_BYTES + 3 * DocumentsWriter.INT_NUM_BYTE;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ReadOnlyDirectoryReader.cs b/src/core/Index/ReadOnlyDirectoryReader.cs
new file mode 100644
index 0000000..8f0f3b7
--- /dev/null
+++ b/src/core/Index/ReadOnlyDirectoryReader.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ public class ReadOnlyDirectoryReader:DirectoryReader
+ {
+ internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor):base(directory, sis, deletionPolicy, true, termInfosIndexDivisor)
+ {
+ }
+
+ internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.Generic.IDictionary<string, byte[]> oldNormsCache, bool doClone, int termInfosIndexDivisor)
+ : base(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor)
+ {
+ }
+
+ internal ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor):base(writer, infos, termInfosIndexDivisor)
+ {
+ }
+
+ protected internal override void AcquireWriteLock()
+ {
+ ReadOnlySegmentReader.NoWrite();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ReadOnlySegmentReader.cs b/src/core/Index/ReadOnlySegmentReader.cs
new file mode 100644
index 0000000..3c7c916
--- /dev/null
+++ b/src/core/Index/ReadOnlySegmentReader.cs
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ public class ReadOnlySegmentReader:SegmentReader
+ {
+
+ internal static void NoWrite()
+ {
+ throw new System.NotSupportedException("This IndexReader cannot make any changes to the index (it was opened with readOnly = true)");
+ }
+
+ protected internal override void AcquireWriteLock()
+ {
+ NoWrite();
+ }
+
+ // Not synchronized
+ public override bool IsDeleted(int n)
+ {
+ return deletedDocs != null && deletedDocs.Get(n);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/ReusableStringReader.cs b/src/core/Index/ReusableStringReader.cs
new file mode 100644
index 0000000..54c1b7d
--- /dev/null
+++ b/src/core/Index/ReusableStringReader.cs
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Used by DocumentsWriter to implemented a StringReader
+ /// that can be reset to a new string; we use this when
+ /// tokenizing the string value from a Field.
+ /// </summary>
+ sealed class ReusableStringReader : System.IO.TextReader
+ {
+ internal int upto;
+ internal int left;
+ internal System.String s;
+ internal void Init(System.String s)
+ {
+ this.s = s;
+ left = s.Length;
+ this.upto = 0;
+ }
+
+ public int Read(char[] c)
+ {
+ return Read(c, 0, c.Length);
+ }
+
+ public override int Read(System.Char[] c, int off, int len)
+ {
+ if (left > len)
+ {
+ TextSupport.GetCharsFromString(s, upto, upto + len, c, off);
+ upto += len;
+ left -= len;
+ return len;
+ }
+ else if (0 == left)
+ {
+ // don't keep a reference (s could have been very large)
+ s = null;
+ return 0;
+ }
+ else
+ {
+ TextSupport.GetCharsFromString(s, upto, upto + left, c, off);
+ int r = left;
+ left = 0;
+ upto = s.Length;
+ return r;
+ }
+ }
+
+ //[Obsolete("Use Dispose() instead")]
+ public override void Close()
+ {
+ Dispose();
+ }
+
+ public override int Read()
+ {
+ if (left > 0)
+ {
+ char ch = s[upto];
+ upto += 1;
+ left -= 1;
+ return (int)ch;
+ }
+ return -1;
+ }
+
+ public override int ReadBlock(char[] buffer, int index, int count)
+ {
+ return Read(buffer, index, count);
+ }
+
+ public override string ReadLine()
+ {
+ int i;
+ for (i = upto; i < s.Length; i++)
+ {
+ char c = s[i];
+ if (c == '\r' || c == '\n')
+ {
+ string result = s.Substring(upto, i - upto);
+ upto = i + 1;
+ left = s.Length - upto;
+ if (c == '\r' && upto < s.Length && s[upto] == '\n')
+ {
+ upto++;
+ left--;
+ }
+ return result;
+ }
+ }
+ if (i > upto)
+ {
+ return ReadToEnd();
+ }
+ return null;
+ }
+
+ public override int Peek()
+ {
+ if (left > 0)
+ {
+ return (int)s[upto];
+ }
+ return -1;
+ }
+
+ public override string ReadToEnd()
+ {
+ string result = s.Substring(upto, left);
+ left = 0;
+ upto = s.Length - 1;
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentInfo.cs b/src/core/Index/SegmentInfo.cs
new file mode 100644
index 0000000..697dda6
--- /dev/null
+++ b/src/core/Index/SegmentInfo.cs
@@ -0,0 +1,875 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using BitVector = Lucene.Net.Util.BitVector;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Information about a segment such as it's name, directory, and files related
+ /// to the segment.
+ ///
+ /// * <p/><b>NOTE:</b> This API is new and still experimental
+ /// (subject to change suddenly in the next release)<p/>
+ /// </summary>
+ public sealed class SegmentInfo : System.ICloneable
+ {
+
+ internal const int NO = - 1; // e.g. no norms; no deletes;
+ internal const int YES = 1; // e.g. have norms; have deletes;
+ internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
+ internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it.
+
+ public System.String name; // unique name in dir
+ public int docCount; // number of docs in seg
+ public Directory dir; // where segment resides
+
+ private bool preLockless; // true if this is a segments file written before
+ // lock-less commits (2.1)
+
+ private long delGen; // current generation of del file; NO if there
+ // are no deletes; CHECK_DIR if it's a pre-2.1 segment
+ // (and we must check filesystem); YES or higher if
+ // there are deletes at generation N
+
+ private long[] normGen; // current generation of each field's norm file.
+ // If this array is null, for lockLess this means no
+ // separate norms. For preLockLess this means we must
+ // check filesystem. If this array is not null, its
+ // values mean: NO says this field has no separate
+ // norms; CHECK_DIR says it is a preLockLess segment and
+ // filesystem must be checked; >= YES says this field
+ // has separate norms with the specified generation
+
+ private sbyte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
+ // pre-2.1 (ie, must check file system to see
+ // if <name>.cfs and <name>.nrm exist)
+
+ private bool hasSingleNormFile; // true if this segment maintains norms in a single file;
+ // false otherwise
+ // this is currently false for segments populated by DocumentWriter
+ // and true for newly created merged segments (both
+ // compound and non compound).
+
+ private IList<string> files; // cached list of files that this segment uses
+ // in the Directory
+
+ internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand)
+
+ private int docStoreOffset; // if this segment shares stored fields & vectors, this
+ // offset is where in that file this segment's docs begin
+ private System.String docStoreSegment; // name used to derive fields/vectors file we share with
+ // other segments
+ private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
+
+ private int delCount; // How many deleted docs in this segment, or -1 if not yet known
+ // (if it's an older index)
+
+ private bool hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
+
+ private IDictionary<string, string> diagnostics;
+
+ public override System.String ToString()
+ {
+ return "si: " + dir.ToString() + " " + name + " docCount: " + docCount + " delCount: " + delCount + " delFileName: " + GetDelFileName();
+ }
+
+ public SegmentInfo(System.String name, int docCount, Directory dir)
+ {
+ this.name = name;
+ this.docCount = docCount;
+ this.dir = dir;
+ delGen = NO;
+ isCompoundFile = (sbyte) (CHECK_DIR);
+ preLockless = true;
+ hasSingleNormFile = false;
+ docStoreOffset = - 1;
+ docStoreSegment = name;
+ docStoreIsCompoundFile = false;
+ delCount = 0;
+ hasProx = true;
+ }
+
+ public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile):this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false, true)
+ {
+ }
+
+ public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile, bool hasProx):this(name, docCount, dir)
+ {
+ this.isCompoundFile = (sbyte) (isCompoundFile?YES:NO);
+ this.hasSingleNormFile = hasSingleNormFile;
+ preLockless = false;
+ this.docStoreOffset = docStoreOffset;
+ this.docStoreSegment = docStoreSegment;
+ this.docStoreIsCompoundFile = docStoreIsCompoundFile;
+ this.hasProx = hasProx;
+ delCount = 0;
+ System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null, "dso=" + docStoreOffset + " dss=" + docStoreSegment + " docCount=" + docCount);
+ }
+
+ /// <summary> Copy everything from src SegmentInfo into our instance.</summary>
+ internal void Reset(SegmentInfo src)
+ {
+ ClearFiles();
+ name = src.name;
+ docCount = src.docCount;
+ dir = src.dir;
+ preLockless = src.preLockless;
+ delGen = src.delGen;
+ docStoreOffset = src.docStoreOffset;
+ docStoreIsCompoundFile = src.docStoreIsCompoundFile;
+ if (src.normGen == null)
+ {
+ normGen = null;
+ }
+ else
+ {
+ normGen = new long[src.normGen.Length];
+ Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
+ }
+ isCompoundFile = src.isCompoundFile;
+ hasSingleNormFile = src.hasSingleNormFile;
+ delCount = src.delCount;
+ }
+
+ public IDictionary<string, string> Diagnostics
+ {
+ get { return diagnostics; }
+ internal set { this.diagnostics = value; }
+ }
+
+ /// <summary> Construct a new SegmentInfo instance by reading a
+ /// previously saved SegmentInfo from input.
+ ///
+ /// </summary>
+ /// <param name="dir">directory to load from
+ /// </param>
+ /// <param name="format">format of the segments info file
+ /// </param>
+ /// <param name="input">input handle to read segment info from
+ /// </param>
+ internal SegmentInfo(Directory dir, int format, IndexInput input)
+ {
+ this.dir = dir;
+ name = input.ReadString();
+ docCount = input.ReadInt();
+ if (format <= SegmentInfos.FORMAT_LOCKLESS)
+ {
+ delGen = input.ReadLong();
+ if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
+ {
+ docStoreOffset = input.ReadInt();
+ if (docStoreOffset != - 1)
+ {
+ docStoreSegment = input.ReadString();
+ docStoreIsCompoundFile = (1 == input.ReadByte());
+ }
+ else
+ {
+ docStoreSegment = name;
+ docStoreIsCompoundFile = false;
+ }
+ }
+ else
+ {
+ docStoreOffset = - 1;
+ docStoreSegment = name;
+ docStoreIsCompoundFile = false;
+ }
+ if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
+ {
+ hasSingleNormFile = (1 == input.ReadByte());
+ }
+ else
+ {
+ hasSingleNormFile = false;
+ }
+ int numNormGen = input.ReadInt();
+ if (numNormGen == NO)
+ {
+ normGen = null;
+ }
+ else
+ {
+ normGen = new long[numNormGen];
+ for (int j = 0; j < numNormGen; j++)
+ {
+ normGen[j] = input.ReadLong();
+ }
+ }
+ isCompoundFile = (sbyte) input.ReadByte();
+ preLockless = (isCompoundFile == CHECK_DIR);
+ if (format <= SegmentInfos.FORMAT_DEL_COUNT)
+ {
+ delCount = input.ReadInt();
+ System.Diagnostics.Debug.Assert(delCount <= docCount);
+ }
+ else
+ delCount = - 1;
+ if (format <= SegmentInfos.FORMAT_HAS_PROX)
+ hasProx = input.ReadByte() == 1;
+ else
+ hasProx = true;
+
+ if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
+ {
+ diagnostics = input.ReadStringStringMap();
+ }
+ else
+ {
+ diagnostics = new Dictionary<string,string>();
+ }
+ }
+ else
+ {
+ delGen = CHECK_DIR;
+ normGen = null;
+ isCompoundFile = (sbyte) (CHECK_DIR);
+ preLockless = true;
+ hasSingleNormFile = false;
+ docStoreOffset = - 1;
+ docStoreIsCompoundFile = false;
+ docStoreSegment = null;
+ delCount = - 1;
+ hasProx = true;
+ diagnostics = new Dictionary<string,string>();
+ }
+ }
+
+ internal void SetNumFields(int numFields)
+ {
+ if (normGen == null)
+ {
+ // normGen is null if we loaded a pre-2.1 segment
+ // file, or, if this segments file hasn't had any
+ // norms set against it yet:
+ normGen = new long[numFields];
+
+ if (preLockless)
+ {
+ // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
+ // we have to check filesystem for norm files, because this is prelockless.
+ }
+ else
+ {
+ // This is a FORMAT_LOCKLESS segment, which means
+ // there are no separate norms:
+ for (int i = 0; i < numFields; i++)
+ {
+ normGen[i] = NO;
+ }
+ }
+ }
+ }
+
+ /// <summary>Returns total size in bytes of all of files used by
+ /// this segment.
+ /// </summary>
+ public long SizeInBytes()
+ {
+ if (sizeInBytes == - 1)
+ {
+ IList<string> files = Files();
+ int size = files.Count;
+ sizeInBytes = 0;
+ for (int i = 0; i < size; i++)
+ {
+ System.String fileName = files[i];
+ // We don't count bytes used by a shared doc store
+ // against this segment:
+ if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName))
+ sizeInBytes += dir.FileLength(fileName);
+ }
+ }
+ return sizeInBytes;
+ }
+
+ public bool HasDeletions()
+ {
+ // Cases:
+ //
+ // delGen == NO: this means this segment was written
+ // by the LOCKLESS code and for certain does not have
+ // deletions yet
+ //
+ // delGen == CHECK_DIR: this means this segment was written by
+ // pre-LOCKLESS code which means we must check
+ // directory to see if .del file exists
+ //
+ // delGen >= YES: this means this segment was written by
+ // the LOCKLESS code and for certain has
+ // deletions
+ //
+ if (delGen == NO)
+ {
+ return false;
+ }
+ else if (delGen >= YES)
+ {
+ return true;
+ }
+ else
+ {
+ return dir.FileExists(GetDelFileName());
+ }
+ }
+
+ internal void AdvanceDelGen()
+ {
+ // delGen 0 is reserved for pre-LOCKLESS format
+ if (delGen == NO)
+ {
+ delGen = YES;
+ }
+ else
+ {
+ delGen++;
+ }
+ ClearFiles();
+ }
+
+ internal void ClearDelGen()
+ {
+ delGen = NO;
+ ClearFiles();
+ }
+
+ public System.Object Clone()
+ {
+ SegmentInfo si = new SegmentInfo(name, docCount, dir);
+ si.isCompoundFile = isCompoundFile;
+ si.delGen = delGen;
+ si.delCount = delCount;
+ si.hasProx = hasProx;
+ si.preLockless = preLockless;
+ si.hasSingleNormFile = hasSingleNormFile;
+ si.diagnostics = new HashMap<string, string>(this.diagnostics);
+ if (this.diagnostics != null)
+ {
+ si.diagnostics = new System.Collections.Generic.Dictionary<string, string>();
+ foreach (string o in diagnostics.Keys)
+ {
+ si.diagnostics.Add(o,diagnostics[o]);
+ }
+ }
+ if (normGen != null)
+ {
+ si.normGen = new long[normGen.Length];
+ normGen.CopyTo(si.normGen, 0);
+ }
+ si.docStoreOffset = docStoreOffset;
+ si.docStoreSegment = docStoreSegment;
+ si.docStoreIsCompoundFile = docStoreIsCompoundFile;
+ if (this.files != null)
+ {
+ si.files = new System.Collections.Generic.List<string>();
+ foreach (string file in files)
+ {
+ si.files.Add(file);
+ }
+ }
+
+ return si;
+ }
+
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public System.String GetDelFileName()
+ {
+ if (delGen == NO)
+ {
+ // In this case we know there is no deletion filename
+ // against this segment
+ return null;
+ }
+ else
+ {
+ // If delGen is CHECK_DIR, it's the pre-lockless-commit file format
+ return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
+ }
+ }
+
+ /// <summary> Returns true if this field for this segment has saved a separate norms file (_&lt;segment&gt;_N.sX).
+ ///
+ /// </summary>
+ /// <param name="fieldNumber">the field index to check
+ /// </param>
+ public bool HasSeparateNorms(int fieldNumber)
+ {
+ if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR))
+ {
+ // Must fallback to directory file exists check:
+ System.String fileName = name + ".s" + fieldNumber;
+ return dir.FileExists(fileName);
+ }
+ else if (normGen == null || normGen[fieldNumber] == NO)
+ {
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+
+ /// <summary> Returns true if any fields in this segment have separate norms.</summary>
+ public bool HasSeparateNorms()
+ {
+ if (normGen == null)
+ {
+ if (!preLockless)
+ {
+ // This means we were created w/ LOCKLESS code and no
+ // norms are written yet:
+ return false;
+ }
+ else
+ {
+ // This means this segment was saved with pre-LOCKLESS
+ // code. So we must fallback to the original
+ // directory list check:
+ System.String[] result = dir.ListAll();
+ if (result == null)
+ {
+ throw new System.IO.IOException("cannot read directory " + dir + ": ListAll() returned null");
+ }
+
+ IndexFileNameFilter filter = IndexFileNameFilter.Filter;
+ System.String pattern;
+ pattern = name + ".s";
+ int patternLength = pattern.Length;
+ for (int i = 0; i < result.Length; i++)
+ {
+ string fileName = result[i];
+ if (filter.Accept(null, fileName) && fileName.StartsWith(pattern) && char.IsDigit(fileName[patternLength]))
+ return true;
+ }
+ return false;
+ }
+ }
+ else
+ {
+ // This means this segment was saved with LOCKLESS
+ // code so we first check whether any normGen's are >= 1
+ // (meaning they definitely have separate norms):
+ for (int i = 0; i < normGen.Length; i++)
+ {
+ if (normGen[i] >= YES)
+ {
+ return true;
+ }
+ }
+ // Next we look for any == 0. These cases were
+ // pre-LOCKLESS and must be checked in directory:
+ for (int i = 0; i < normGen.Length; i++)
+ {
+ if (normGen[i] == CHECK_DIR)
+ {
+ if (HasSeparateNorms(i))
+ {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /// <summary> Increment the generation count for the norms file for
+ /// this field.
+ ///
+ /// </summary>
+ /// <param name="fieldIndex">field whose norm file will be rewritten
+ /// </param>
+ internal void AdvanceNormGen(int fieldIndex)
+ {
+ if (normGen[fieldIndex] == NO)
+ {
+ normGen[fieldIndex] = YES;
+ }
+ else
+ {
+ normGen[fieldIndex]++;
+ }
+ ClearFiles();
+ }
+
+ /// <summary> Get the file name for the norms file for this field.
+ ///
+ /// </summary>
+ /// <param name="number">field index
+ /// </param>
+ public System.String GetNormFileName(int number)
+ {
+ System.String prefix;
+
+ long gen;
+ if (normGen == null)
+ {
+ gen = CHECK_DIR;
+ }
+ else
+ {
+ gen = normGen[number];
+ }
+
+ if (HasSeparateNorms(number))
+ {
+ // case 1: separate norm
+ prefix = ".s";
+ return IndexFileNames.FileNameFromGeneration(name, prefix + number, gen);
+ }
+
+ if (hasSingleNormFile)
+ {
+ // case 2: lockless (or nrm file exists) - single file for all norms
+ prefix = "." + IndexFileNames.NORMS_EXTENSION;
+ return IndexFileNames.FileNameFromGeneration(name, prefix, WITHOUT_GEN);
+ }
+
+ // case 3: norm file for each field
+ prefix = ".f";
+ return IndexFileNames.FileNameFromGeneration(name, prefix + number, WITHOUT_GEN);
+ }
+
+ /// <summary> Returns true if this segment is stored as a compound
+ /// file; else, false.
+ /// </summary>
+ internal void SetUseCompoundFile(bool value)
+ {
+ if (value)
+ {
+ this.isCompoundFile = (sbyte) (YES);
+ }
+ else
+ {
+ this.isCompoundFile = (sbyte) (NO);
+ }
+ ClearFiles();
+ }
+
+ /// <summary> Returns true if this segment is stored as a compound
+ /// file; else, false.
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public bool GetUseCompoundFile()
+ {
+ if (isCompoundFile == NO)
+ {
+ return false;
+ }
+ if (isCompoundFile == YES)
+ {
+ return true;
+ }
+ return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ }
+
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public int GetDelCount()
+ {
+ if (delCount == - 1)
+ {
+ if (HasDeletions())
+ {
+ System.String delFileName = GetDelFileName();
+ delCount = new BitVector(dir, delFileName).Count();
+ }
+ else
+ delCount = 0;
+ }
+ System.Diagnostics.Debug.Assert(delCount <= docCount);
+ return delCount;
+ }
+
+ internal void SetDelCount(int delCount)
+ {
+ this.delCount = delCount;
+ System.Diagnostics.Debug.Assert(delCount <= docCount);
+ }
+
+ public int DocStoreOffset
+ {
+ get { return docStoreOffset; }
+ internal set
+ {
+ docStoreOffset = value;
+ ClearFiles();
+ }
+ }
+
+ public bool DocStoreIsCompoundFile
+ {
+ get { return docStoreIsCompoundFile; }
+ internal set
+ {
+ docStoreIsCompoundFile = value;
+ ClearFiles();
+ }
+ }
+
+ public string DocStoreSegment
+ {
+ get { return docStoreSegment; }
+ }
+
+ internal void SetDocStore(int offset, System.String segment, bool isCompoundFile)
+ {
+ docStoreOffset = offset;
+ docStoreSegment = segment;
+ docStoreIsCompoundFile = isCompoundFile;
+ }
+
+ /// <summary> Save this segment's info.</summary>
+ internal void Write(IndexOutput output)
+ {
+ output.WriteString(name);
+ output.WriteInt(docCount);
+ output.WriteLong(delGen);
+ output.WriteInt(docStoreOffset);
+ if (docStoreOffset != - 1)
+ {
+ output.WriteString(docStoreSegment);
+ output.WriteByte((byte) (docStoreIsCompoundFile?1:0));
+ }
+
+ output.WriteByte((byte) (hasSingleNormFile?1:0));
+ if (normGen == null)
+ {
+ output.WriteInt(NO);
+ }
+ else
+ {
+ output.WriteInt(normGen.Length);
+ for (int j = 0; j < normGen.Length; j++)
+ {
+ output.WriteLong(normGen[j]);
+ }
+ }
+ output.WriteByte((byte) isCompoundFile);
+ output.WriteInt(delCount);
+ output.WriteByte((byte) (hasProx?1:0));
+ output.WriteStringStringMap(diagnostics);
+ }
+
+ public bool HasProx
+ {
+ get { return hasProx; }
+ internal set
+ {
+ this.hasProx = value;
+ ClearFiles();
+ }
+ }
+
+ private void AddIfExists(IList<string> files, System.String fileName)
+ {
+ if (dir.FileExists(fileName))
+ files.Add(fileName);
+ }
+
+ /*
+ * Return all files referenced by this SegmentInfo. The
+ * returns List is a locally cached List so you should not
+ * modify it.
+ */
+
+ public IList<string> Files()
+ {
+
+ if (files != null)
+ {
+ // Already cached:
+ return files;
+ }
+
+ var fileList = new System.Collections.Generic.List<string>();
+
+ bool useCompoundFile = GetUseCompoundFile();
+
+ if (useCompoundFile)
+ {
+ fileList.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ }
+ else
+ {
+ System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
+ for (int i = 0; i < exts.Length; i++)
+ AddIfExists(fileList, name + "." + exts[i]);
+ }
+
+ if (docStoreOffset != - 1)
+ {
+ // We are sharing doc stores (stored fields, term
+ // vectors) with other segments
+ System.Diagnostics.Debug.Assert(docStoreSegment != null);
+ if (docStoreIsCompoundFile)
+ {
+ fileList.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
+ }
+ else
+ {
+ System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+ for (int i = 0; i < exts.Length; i++)
+ AddIfExists(fileList, docStoreSegment + "." + exts[i]);
+ }
+ }
+ else if (!useCompoundFile)
+ {
+ // We are not sharing, and, these files were not
+ // included in the compound file
+ System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+ for (int i = 0; i < exts.Length; i++)
+ AddIfExists(fileList, name + "." + exts[i]);
+ }
+
+ System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
+ if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName)))
+ {
+ fileList.Add(delFileName);
+ }
+
+ // Careful logic for norms files
+ if (normGen != null)
+ {
+ for (int i = 0; i < normGen.Length; i++)
+ {
+ long gen = normGen[i];
+ if (gen >= YES)
+ {
+ // Definitely a separate norm file, with generation:
+ fileList.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
+ }
+ else if (NO == gen)
+ {
+ // No separate norms but maybe plain norms
+ // in the non compound file case:
+ if (!hasSingleNormFile && !useCompoundFile)
+ {
+ System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
+ if (dir.FileExists(fileName))
+ {
+ fileList.Add(fileName);
+ }
+ }
+ }
+ else if (CHECK_DIR == gen)
+ {
+ // Pre-2.1: we have to check file existence
+ System.String fileName = null;
+ if (useCompoundFile)
+ {
+ fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
+ }
+ else if (!hasSingleNormFile)
+ {
+ fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
+ }
+ if (fileName != null && dir.FileExists(fileName))
+ {
+ fileList.Add(fileName);
+ }
+ }
+ }
+ }
+ else if (preLockless || (!hasSingleNormFile && !useCompoundFile))
+ {
+ // Pre-2.1: we have to scan the dir to find all
+ // matching _X.sN/_X.fN files for our segment:
+ System.String prefix;
+ if (useCompoundFile)
+ prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
+ else
+ prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION;
+ int prefixLength = prefix.Length;
+ System.String[] allFiles = dir.ListAll();
+ IndexFileNameFilter filter = IndexFileNameFilter.Filter;
+ for (int i = 0; i < allFiles.Length; i++)
+ {
+ System.String fileName = allFiles[i];
+ if (filter.Accept(null, fileName) && fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix))
+ {
+ fileList.Add(fileName);
+ }
+ }
+ }
+ //System.Diagnostics.Debug.Assert();
+ files = fileList;
+ return files;
+ }
+
+ /* Called whenever any change is made that affects which
+ * files this segment has. */
+ private void ClearFiles()
+ {
+ files = null;
+ sizeInBytes = - 1;
+ }
+
+ /// <summary>Used for debugging </summary>
+ public System.String SegString(Directory dir)
+ {
+ System.String cfs;
+ try
+ {
+ if (GetUseCompoundFile())
+ cfs = "c";
+ else
+ cfs = "C";
+ }
+ catch (System.IO.IOException)
+ {
+ cfs = "?";
+ }
+
+ System.String docStore;
+
+ if (docStoreOffset != - 1)
+ docStore = "->" + docStoreSegment;
+ else
+ docStore = "";
+
+ return name + ":" + cfs + (this.dir == dir?"":"x") + docCount + docStore;
+ }
+
+ /// <summary>We consider another SegmentInfo instance equal if it
+ /// has the same dir and same name.
+ /// </summary>
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj) return true;
+
+ if (obj is SegmentInfo)
+ {
+ SegmentInfo other = (SegmentInfo) obj;
+ return other.dir == dir && other.name.Equals(name);
+ }
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return dir.GetHashCode() + name.GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentInfos.cs b/src/core/Index/SegmentInfos.cs
new file mode 100644
index 0000000..ca5297e
--- /dev/null
+++ b/src/core/Index/SegmentInfos.cs
@@ -0,0 +1,1074 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Support;
+using ChecksumIndexInput = Lucene.Net.Store.ChecksumIndexInput;
+using ChecksumIndexOutput = Lucene.Net.Store.ChecksumIndexOutput;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using NoSuchDirectoryException = Lucene.Net.Store.NoSuchDirectoryException;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> A collection of segmentInfo objects with methods for operating on
+ /// those segments in relation to the file system.
+ ///
+ /// <p/><b>NOTE:</b> This API is new and still experimental
+ /// (subject to change suddenly in the next release)<p/>
+ /// </summary>
+ [Serializable]
+ public sealed class SegmentInfos : List<SegmentInfo>, ICloneable
+ {
+ private class AnonymousClassFindSegmentsFile:FindSegmentsFile
+ {
+ private void InitBlock(SegmentInfos enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SegmentInfos enclosingInstance;
+ public SegmentInfos Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1):base(Param1)
+ {
+ InitBlock(enclosingInstance);
+ }
+
+ public /*protected internal*/ override System.Object DoBody(System.String segmentFileName)
+ {
+ Enclosing_Instance.Read(directory, segmentFileName);
+ return null;
+ }
+ }
+ /// <summary>The file format version, a negative number. </summary>
+ /* Works since counter, the old 1st entry, is always >= 0 */
+ public const int FORMAT = - 1;
+
+ /// <summary>This format adds details used for lockless commits. It differs
+ /// slightly from the previous format in that file names
+ /// are never re-used (write once). Instead, each file is
+ /// written to the next generation. For example,
+ /// segments_1, segments_2, etc. This allows us to not use
+ /// a commit lock. See <a
+ /// href="http://lucene.apache.org/java/docs/fileformats.html">file
+ /// formats</a> for details.
+ /// </summary>
+ public const int FORMAT_LOCKLESS = - 2;
+
+ /// <summary>This format adds a "hasSingleNormFile" flag into each segment info.
+ /// See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
+ /// for details.
+ /// </summary>
+ public const int FORMAT_SINGLE_NORM_FILE = - 3;
+
+ /// <summary>This format allows multiple segments to share a single
+ /// vectors and stored fields file.
+ /// </summary>
+ public const int FORMAT_SHARED_DOC_STORE = - 4;
+
+ /// <summary>This format adds a checksum at the end of the file to
+ /// ensure all bytes were successfully written.
+ /// </summary>
+ public const int FORMAT_CHECKSUM = - 5;
+
+ /// <summary>This format adds the deletion count for each segment.
+ /// This way IndexWriter can efficiently report numDocs().
+ /// </summary>
+ public const int FORMAT_DEL_COUNT = - 6;
+
+ /// <summary>This format adds the boolean hasProx to record if any
+ /// fields in the segment store prox information (ie, have
+ /// omitTermFreqAndPositions==false)
+ /// </summary>
+ public const int FORMAT_HAS_PROX = - 7;
+
+ /// <summary>This format adds optional commit userData (String) storage. </summary>
+ public const int FORMAT_USER_DATA = - 8;
+
+ /// <summary>This format adds optional per-segment String
+ /// dianostics storage, and switches userData to Map
+ /// </summary>
+ public const int FORMAT_DIAGNOSTICS = - 9;
+
+ /* This must always point to the most recent file format. */
+ internal static readonly int CURRENT_FORMAT = FORMAT_DIAGNOSTICS;
+
+ public int counter = 0; // used to name new segments
+ /// <summary> counts how often the index has been changed by adding or deleting docs.
+ /// starting with the current time in milliseconds forces to create unique version numbers.
+ /// </summary>
+ private long version = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond);
+
+ private long generation = 0; // generation of the "segments_N" for the next commit
+ private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
+ // or wrote; this is normally the same as generation except if
+ // there was an IOException that had interrupted a commit
+
+ private IDictionary<string, string> userData = new HashMap<string, string>(); // Opaque Map<String, String> that user can specify during IndexWriter.commit
+
+ /// <summary> If non-null, information about loading segments_N files</summary>
+ /// <seealso cref="SetInfoStream">
+ /// </seealso>
+ private static System.IO.StreamWriter infoStream;
+
+ public SegmentInfo Info(int i)
+ {
+ return (SegmentInfo) this[i];
+ }
+
+ /// <summary> Get the generation (N) of the current segments_N file
+ /// from a list of files.
+ ///
+ /// </summary>
+ /// <param name="files">-- array of file names to check
+ /// </param>
+ public static long GetCurrentSegmentGeneration(System.String[] files)
+ {
+ if (files == null)
+ {
+ return - 1;
+ }
+ long max = - 1;
+ for (int i = 0; i < files.Length; i++)
+ {
+ System.String file = files[i];
+ if (file.StartsWith(IndexFileNames.SEGMENTS) && !file.Equals(IndexFileNames.SEGMENTS_GEN))
+ {
+ long gen = GenerationFromSegmentsFileName(file);
+ if (gen > max)
+ {
+ max = gen;
+ }
+ }
+ }
+ return max;
+ }
+
+ /// <summary> Get the generation (N) of the current segments_N file
+ /// in the directory.
+ ///
+ /// </summary>
+ /// <param name="directory">-- directory to search for the latest segments_N file
+ /// </param>
+ public static long GetCurrentSegmentGeneration(Directory directory)
+ {
+ try
+ {
+ return GetCurrentSegmentGeneration(directory.ListAll());
+ }
+ catch (NoSuchDirectoryException)
+ {
+ return - 1;
+ }
+ }
+
+ /// <summary> Get the filename of the current segments_N file
+ /// from a list of files.
+ ///
+ /// </summary>
+ /// <param name="files">-- array of file names to check
+ /// </param>
+
+ public static System.String GetCurrentSegmentFileName(System.String[] files)
+ {
+ return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", GetCurrentSegmentGeneration(files));
+ }
+
+ /// <summary> Get the filename of the current segments_N file
+ /// in the directory.
+ ///
+ /// </summary>
+ /// <param name="directory">-- directory to search for the latest segments_N file
+ /// </param>
+ public static System.String GetCurrentSegmentFileName(Directory directory)
+ {
+ return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", GetCurrentSegmentGeneration(directory));
+ }
+
+ /// <summary> Get the segments_N filename in use by this segment infos.</summary>
+ public System.String GetCurrentSegmentFileName()
+ {
+ return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", lastGeneration);
+ }
+
+ /// <summary> Parse the generation off the segments file name and
+ /// return it.
+ /// </summary>
+ public static long GenerationFromSegmentsFileName(System.String fileName)
+ {
+ if (fileName.Equals(IndexFileNames.SEGMENTS))
+ {
+ return 0;
+ }
+ else if (fileName.StartsWith(IndexFileNames.SEGMENTS))
+ {
+ return Number.ToInt64(fileName.Substring(1 + IndexFileNames.SEGMENTS.Length));
+ }
+ else
+ {
+ throw new System.ArgumentException("fileName \"" + fileName + "\" is not a segments file");
+ }
+ }
+
+
+ /// <summary> Get the next segments_N filename that will be written.</summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public System.String GetNextSegmentFileName()
+ {
+ long nextGeneration;
+
+ if (generation == - 1)
+ {
+ nextGeneration = 1;
+ }
+ else
+ {
+ nextGeneration = generation + 1;
+ }
+ return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", nextGeneration);
+ }
+
+ /// <summary> Read a particular segmentFileName. Note that this may
+ /// throw an IOException if a commit is in process.
+ ///
+ /// </summary>
+ /// <param name="directory">-- directory containing the segments file
+ /// </param>
+ /// <param name="segmentFileName">-- segment file to load
+ /// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public void Read(Directory directory, System.String segmentFileName)
+ {
+ bool success = false;
+
+ // Clear any previous segments:
+ Clear();
+
+ var input = new ChecksumIndexInput(directory.OpenInput(segmentFileName));
+
+ generation = GenerationFromSegmentsFileName(segmentFileName);
+
+ lastGeneration = generation;
+
+ try
+ {
+ int format = input.ReadInt();
+ if (format < 0)
+ {
+ // file contains explicit format info
+ // check that it is a format we can understand
+ if (format < CURRENT_FORMAT)
+ throw new CorruptIndexException("Unknown format version: " + format);
+ version = input.ReadLong(); // read version
+ counter = input.ReadInt(); // read counter
+ }
+ else
+ {
+ // file is in old format without explicit format info
+ counter = format;
+ }
+
+ for (int i = input.ReadInt(); i > 0; i--)
+ {
+ // read segmentInfos
+ Add(new SegmentInfo(directory, format, input));
+ }
+
+ if (format >= 0)
+ {
+ // in old format the version number may be at the end of the file
+ if (input.FilePointer >= input.Length())
+ version = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond);
+ // old file format without version number
+ else
+ version = input.ReadLong(); // read version
+ }
+
+ if (format <= FORMAT_USER_DATA)
+ {
+ if (format <= FORMAT_DIAGNOSTICS)
+ {
+ userData = input.ReadStringStringMap();
+ }
+ else if (0 != input.ReadByte())
+ {
+ // TODO: Should be read-only map
+ userData = new HashMap<string,string> {{"userData", input.ReadString()}};
+ }
+ else
+ {
+ // TODO: Should be empty read-only map
+ userData = new HashMap<string, string>();
+ }
+ }
+ else
+ {
+ // TODO: Should be empty read-only map
+ userData = new HashMap<string, string>();
+ }
+
+ if (format <= FORMAT_CHECKSUM)
+ {
+ long checksumNow = input.Checksum;
+ long checksumThen = input.ReadLong();
+ if (checksumNow != checksumThen)
+ throw new CorruptIndexException("checksum mismatch in segments file");
+ }
+ success = true;
+ }
+ finally
+ {
+ input.Close();
+ if (!success)
+ {
+ // Clear any segment infos we had loaded so we
+ // have a clean slate on retry:
+ Clear();
+ }
+ }
+ }
+
+ /// <summary> This version of read uses the retry logic (for lock-less
+ /// commits) to find the right segments file to load.
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public void Read(Directory directory)
+ {
+
+ generation = lastGeneration = - 1;
+
+ new AnonymousClassFindSegmentsFile(this, directory).Run();
+ }
+
+ // Only non-null after prepareCommit has been called and
+ // before finishCommit is called
+ internal ChecksumIndexOutput pendingSegnOutput;
+
+ private void Write(Directory directory)
+ {
+
+ System.String segmentFileName = GetNextSegmentFileName();
+
+ // Always advance the generation on write:
+ if (generation == - 1)
+ {
+ generation = 1;
+ }
+ else
+ {
+ generation++;
+ }
+
+ var segnOutput = new ChecksumIndexOutput(directory.CreateOutput(segmentFileName));
+
+ bool success = false;
+
+ try
+ {
+ segnOutput.WriteInt(CURRENT_FORMAT); // write FORMAT
+ segnOutput.WriteLong(++version); // every write changes
+ // the index
+ segnOutput.WriteInt(counter); // write counter
+ segnOutput.WriteInt(Count); // write infos
+ for (int i = 0; i < Count; i++)
+ {
+ Info(i).Write(segnOutput);
+ }
+ segnOutput.WriteStringStringMap(userData);
+ segnOutput.PrepareCommit();
+ success = true;
+ pendingSegnOutput = segnOutput;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // We hit an exception above; try to close the file
+ // but suppress any exception:
+ try
+ {
+ segnOutput.Close();
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ try
+ {
+ // Try not to leave a truncated segments_N file in
+ // the index:
+ directory.DeleteFile(segmentFileName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ }
+ }
+ }
+
+ /// <summary> Returns a copy of this instance, also copying each
+ /// SegmentInfo.
+ /// </summary>
+
+ public System.Object Clone()
+ {
+ SegmentInfos sis = new SegmentInfos();
+ for (int i = 0; i < this.Count; i++)
+ {
+ sis.Add((SegmentInfo)this[i].Clone());
+ }
+ sis.counter = this.counter;
+ sis.generation = this.generation;
+ sis.lastGeneration = this.lastGeneration;
+ // sis.pendingSegnOutput = this.pendingSegnOutput; // {{Aroush-2.9}} needed?
+ sis.userData = new HashMap<string, string>(userData);
+ sis.version = this.version;
+ return sis;
+ }
+
+ /// <summary> version number when this SegmentInfos was generated.</summary>
+ public long Version
+ {
+ get { return version; }
+ }
+
+ public long Generation
+ {
+ get { return generation; }
+ }
+
+ public long LastGeneration
+ {
+ get { return lastGeneration; }
+ }
+
+ /// <summary> Current version number from segments file.</summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public static long ReadCurrentVersion(Directory directory)
+ {
+ // Fully read the segments file: this ensures that it's
+ // completely written so that if
+ // IndexWriter.prepareCommit has been called (but not
+ // yet commit), then the reader will still see itself as
+ // current:
+ var sis = new SegmentInfos();
+ sis.Read(directory);
+ return sis.version;
+ //return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).Run());
+ //DIGY: AnonymousClassFindSegmentsFile1 can safely be deleted
+ }
+
+ /// <summary> Returns userData from latest segments file</summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public static System.Collections.Generic.IDictionary<string, string> ReadCurrentUserData(Directory directory)
+ {
+ var sis = new SegmentInfos();
+ sis.Read(directory);
+ return sis.UserData;
+ }
+
+ /// <summary>If non-null, information about retries when loading
+ /// the segments file will be printed to this.
+ /// </summary>
+ public static void SetInfoStream(System.IO.StreamWriter infoStream)
+ {
+ SegmentInfos.infoStream = infoStream;
+ }
+
+ /* Advanced configuration of retry logic in loading
+ segments_N file */
+ private static int defaultGenFileRetryCount = 10;
+ private static int defaultGenFileRetryPauseMsec = 50;
+ private static int defaultGenLookaheadCount = 10;
+
+ /// <summary> Advanced: Gets or sets how many times to try loading the
+ /// segments.gen file contents to determine current segment
+ /// generation. This file is only referenced when the
+ /// primary method (listing the directory) fails.
+ /// </summary>
+ public static int DefaultGenFileRetryCount
+ {
+ get { return defaultGenFileRetryCount; }
+ set { defaultGenFileRetryCount = value; }
+ }
+
+ public static int DefaultGenFileRetryPauseMsec
+ {
+ set { defaultGenFileRetryPauseMsec = value; }
+ get { return defaultGenFileRetryPauseMsec; }
+ }
+
+ /// <summary> Advanced: set how many times to try incrementing the
+ /// gen when loading the segments file. This only runs if
+ /// the primary (listing directory) and secondary (opening
+ /// segments.gen file) methods fail to find the segments
+ /// file.
+ /// </summary>
+ public static int DefaultGenLookaheadCount
+ {
+ set { defaultGenLookaheadCount = value; }
+ get { return defaultGenLookaheadCount; }
+ }
+
+ /// <seealso cref="SetInfoStream">
+ /// </seealso>
+ public static StreamWriter InfoStream
+ {
+ get { return infoStream; }
+ }
+
+ private static void Message(System.String message)
+ {
+ if (infoStream != null)
+ {
+ infoStream.WriteLine("SIS [" + ThreadClass.Current().Name + "]: " + message);
+ }
+ }
+
+ /// <summary> Utility class for executing code that needs to do
+ /// something with the current segments file. This is
+ /// necessary with lock-less commits because from the time
+ /// you locate the current segments file name, until you
+ /// actually open it, read its contents, or check modified
+ /// time, etc., it could have been deleted due to a writer
+ /// commit finishing.
+ /// </summary>
+ public abstract class FindSegmentsFile
+ {
+
+ internal Directory directory;
+
+ protected FindSegmentsFile(Directory directory)
+ {
+ this.directory = directory;
+ }
+
+ public System.Object Run()
+ {
+ return Run(null);
+ }
+
+ public System.Object Run(IndexCommit commit)
+ {
+ if (commit != null)
+ {
+ if (directory != commit.Directory)
+ throw new System.IO.IOException("the specified commit does not match the specified Directory");
+ return DoBody(commit.SegmentsFileName);
+ }
+
+ System.String segmentFileName = null;
+ long lastGen = - 1;
+ long gen = 0;
+ int genLookaheadCount = 0;
+ System.IO.IOException exc = null;
+ bool retry = false;
+
+ int method = 0;
+
+ // Loop until we succeed in calling doBody() without
+ // hitting an IOException. An IOException most likely
+ // means a commit was in process and has finished, in
+ // the time it took us to load the now-old infos files
+ // (and segments files). It's also possible it's a
+ // true error (corrupt index). To distinguish these,
+ // on each retry we must see "forward progress" on
+ // which generation we are trying to load. If we
+ // don't, then the original error is real and we throw
+ // it.
+
+ // We have three methods for determining the current
+ // generation. We try the first two in parallel, and
+ // fall back to the third when necessary.
+
+ while (true)
+ {
+
+ if (0 == method)
+ {
+
+ // Method 1: list the directory and use the highest
+ // segments_N file. This method works well as long
+ // as there is no stale caching on the directory
+ // contents (NOTE: NFS clients often have such stale
+ // caching):
+ System.String[] files = null;
+
+ long genA = - 1;
+
+ files = directory.ListAll();
+
+ if (files != null)
+ genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files);
+
+ Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA);
+
+ // Method 2: open segments.gen and read its
+ // contents. Then we take the larger of the two
+ // gens. This way, if either approach is hitting
+ // a stale cache (NFS) we have a better chance of
+ // getting the right generation.
+ long genB = - 1;
+ for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++)
+ {
+ IndexInput genInput = null;
+ try
+ {
+ genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);
+ }
+ catch (System.IO.FileNotFoundException e)
+ {
+ Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e);
+ break;
+ }
+ catch (System.IO.IOException e)
+ {
+ Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e);
+ }
+
+ if (genInput != null)
+ {
+ try
+ {
+ int version = genInput.ReadInt();
+ if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
+ {
+ long gen0 = genInput.ReadLong();
+ long gen1 = genInput.ReadLong();
+ Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1);
+ if (gen0 == gen1)
+ {
+ // The file is consistent.
+ genB = gen0;
+ break;
+ }
+ }
+ }
+ catch (System.IO.IOException)
+ {
+ // will retry
+ }
+ finally
+ {
+ genInput.Close();
+ }
+ }
+
+ System.Threading.Thread.Sleep(new TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
+
+
+ }
+
+ Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB);
+
+ // Pick the larger of the two gen's:
+ if (genA > genB)
+ gen = genA;
+ else
+ gen = genB;
+
+ if (gen == - 1)
+ {
+ throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + string.Join(" ", files));
+ }
+ }
+
+ // Third method (fallback if first & second methods
+ // are not reliable): since both directory cache and
+ // file contents cache seem to be stale, just
+ // advance the generation.
+ if (1 == method || (0 == method && lastGen == gen && retry))
+ {
+
+ method = 1;
+
+ if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount)
+ {
+ gen++;
+ genLookaheadCount++;
+ Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen);
+ }
+ }
+
+ if (lastGen == gen)
+ {
+
+ // This means we're about to try the same
+ // segments_N last tried. This is allowed,
+ // exactly once, because writer could have been in
+ // the process of writing segments_N last time.
+
+ if (retry)
+ {
+ // OK, we've tried the same segments_N file
+ // twice in a row, so this must be a real
+ // error. We throw the original exception we
+ // got.
+ throw exc;
+ }
+
+ retry = true;
+ }
+ else if (0 == method)
+ {
+ // Segment file has advanced since our last loop, so
+ // reset retry:
+ retry = false;
+ }
+
+ lastGen = gen;
+
+ segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
+
+ try
+ {
+ System.Object v = DoBody(segmentFileName);
+ Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName);
+
+ return v;
+ }
+ catch (System.IO.IOException err)
+ {
+
+ // Save the original root cause:
+ if (exc == null)
+ {
+ exc = err;
+ }
+
+ Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen);
+
+ if (!retry && gen > 1)
+ {
+
+ // This is our first time trying this segments
+ // file (because retry is false), and, there is
+ // possibly a segments_(N-1) (because gen > 1).
+ // So, check if the segments_(N-1) exists and
+ // try it if so:
+ System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1);
+
+ bool prevExists;
+ prevExists = directory.FileExists(prevSegmentFileName);
+
+ if (prevExists)
+ {
+ Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
+ try
+ {
+ System.Object v = DoBody(prevSegmentFileName);
+ if (exc != null)
+ {
+ Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName);
+ }
+ return v;
+ }
+ catch (System.IO.IOException err2)
+ {
+ Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /// <summary> Subclass must implement this. The assumption is an
+ /// IOException will be thrown if something goes wrong
+ /// during the processing that could have been caused by
+ /// a writer committing.
+ /// </summary>
+ public /*internal*/ abstract System.Object DoBody(System.String segmentFileName);
+ }
+
+ /// <summary> Returns a new SegmentInfos containg the SegmentInfo
+ /// instances in the specified range first (inclusive) to
+ /// last (exclusive), so total number of segments returned
+ /// is last-first.
+ /// </summary>
+ public SegmentInfos Range(int first, int last)
+ {
+ SegmentInfos infos = new SegmentInfos();
+ infos.AddRange(this.GetRange(first, last - first));
+ return infos;
+ }
+
+ // Carry over generation numbers from another SegmentInfos
+ internal void UpdateGeneration(SegmentInfos other)
+ {
+ lastGeneration = other.lastGeneration;
+ generation = other.generation;
+ version = other.version;
+ }
+
+ internal void RollbackCommit(Directory dir)
+ {
+ if (pendingSegnOutput != null)
+ {
+ try
+ {
+ pendingSegnOutput.Close();
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ // in our caller
+ }
+
+ // Must carefully compute fileName from "generation"
+ // since lastGeneration isn't incremented:
+ try
+ {
+ System.String segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
+ dir.DeleteFile(segmentFileName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ // in our caller
+ }
+ pendingSegnOutput = null;
+ }
+ }
+
+ /// <summary>Call this to start a commit. This writes the new
+ /// segments file, but writes an invalid checksum at the
+ /// end, so that it is not visible to readers. Once this
+ /// is called you must call <see cref="FinishCommit" /> to complete
+ /// the commit or <see cref="RollbackCommit" /> to abort it.
+ /// </summary>
+ internal void PrepareCommit(Directory dir)
+ {
+ if (pendingSegnOutput != null)
+ throw new System.SystemException("prepareCommit was already called");
+ Write(dir);
+ }
+
+ /// <summary>Returns all file names referenced by SegmentInfo
+ /// instances matching the provided Directory (ie files
+ /// associated with any "external" segments are skipped).
+ /// The returned collection is recomputed on each
+ /// invocation.
+ /// </summary>
+ public System.Collections.Generic.ICollection<string> Files(Directory dir, bool includeSegmentsFile)
+ {
+ System.Collections.Generic.HashSet<string> files = new System.Collections.Generic.HashSet<string>();
+ if (includeSegmentsFile)
+ {
+ files.Add(GetCurrentSegmentFileName());
+ }
+ int size = Count;
+ for (int i = 0; i < size; i++)
+ {
+ SegmentInfo info = Info(i);
+ if (info.dir == dir)
+ {
+ files.UnionWith(Info(i).Files());
+ }
+ }
+ return files;
+ }
+
+ internal void FinishCommit(Directory dir)
+ {
+ if (pendingSegnOutput == null)
+ throw new System.SystemException("prepareCommit was not called");
+ bool success = false;
+ try
+ {
+ pendingSegnOutput.FinishCommit();
+ pendingSegnOutput.Close();
+ pendingSegnOutput = null;
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ RollbackCommit(dir);
+ }
+
+ // NOTE: if we crash here, we have left a segments_N
+ // file in the directory in a possibly corrupt state (if
+ // some bytes made it to stable storage and others
+ // didn't). But, the segments_N file includes checksum
+ // at the end, which should catch this case. So when a
+ // reader tries to read it, it will throw a
+ // CorruptIndexException, which should cause the retry
+ // logic in SegmentInfos to kick in and load the last
+ // good (previous) segments_N-1 file.
+
+ System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
+ success = false;
+ try
+ {
+ dir.Sync(fileName);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ dir.DeleteFile(fileName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ }
+ }
+
+ lastGeneration = generation;
+
+ try
+ {
+ IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN);
+ try
+ {
+ genOutput.WriteInt(FORMAT_LOCKLESS);
+ genOutput.WriteLong(generation);
+ genOutput.WriteLong(generation);
+ }
+ finally
+ {
+ genOutput.Close();
+ }
+ }
+ catch (System.Exception)
+ {
+ // It's OK if we fail to write this file since it's
+ // used only as one of the retry fallbacks.
+ }
+ }
+
+ /// <summary>Writes &amp; syncs to the Directory dir, taking care to
+ /// remove the segments file on exception
+ /// </summary>
+ public /*internal*/ void Commit(Directory dir)
+ {
+ PrepareCommit(dir);
+ FinishCommit(dir);
+ }
+
+ public System.String SegString(Directory directory)
+ {
+ lock (this)
+ {
+ var buffer = new System.Text.StringBuilder();
+ int count = Count;
+ for (int i = 0; i < count; i++)
+ {
+ if (i > 0)
+ {
+ buffer.Append(' ');
+ }
+ SegmentInfo info = Info(i);
+ buffer.Append(info.SegString(directory));
+ if (info.dir != directory)
+ buffer.Append("**");
+ }
+ return buffer.ToString();
+ }
+ }
+
+ public IDictionary<string, string> UserData
+ {
+ get { return userData; }
+ internal set {
+ userData = value ?? new HashMap<string, string>();
+ }
+ }
+
+ /// <summary>Replaces all segments in this instance, but keeps
+ /// generation, version, counter so that future commits
+ /// remain write once.
+ /// </summary>
+ internal void Replace(SegmentInfos other)
+ {
+ Clear();
+ AddRange(other);
+ lastGeneration = other.lastGeneration;
+ }
+
+ // Used only for testing
+ public bool HasExternalSegments(Directory dir)
+ {
+ int numSegments = Count;
+ for (int i = 0; i < numSegments; i++)
+ if (Info(i).dir != dir)
+ return true;
+ return false;
+ }
+
+ #region Lucene.NET (Equals & GetHashCode )
+ /// <summary>
+ /// Simple brute force implementation.
+ /// If size is equal, compare items one by one.
+ /// </summary>
+ /// <param name="obj">SegmentInfos object to check equality for</param>
+ /// <returns>true if lists are equal, false otherwise</returns>
+ public override bool Equals(object obj)
+ {
+ if (obj == null) return false;
+
+ var objToCompare = obj as SegmentInfos;
+ if (objToCompare == null) return false;
+
+ if (this.Count != objToCompare.Count) return false;
+
+ for (int idx = 0; idx < this.Count; idx++)
+ {
+ if (!this[idx].Equals(objToCompare[idx])) return false;
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Calculate hash code of SegmentInfos
+ /// </summary>
+ /// <returns>hash code as in java version of ArrayList</returns>
+ public override int GetHashCode()
+ {
+ int h = 1;
+ for (int i = 0; i < this.Count; i++)
+ {
+ SegmentInfo si = (this[i] as SegmentInfo);
+ h = 31 * h + (si == null ? 0 : si.GetHashCode());
+ }
+
+ return h;
+ }
+ #endregion
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentMergeInfo.cs b/src/core/Index/SegmentMergeInfo.cs
new file mode 100644
index 0000000..bad0aad
--- /dev/null
+++ b/src/core/Index/SegmentMergeInfo.cs
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class SegmentMergeInfo : IDisposable
+ {
+ internal Term term;
+ internal int base_Renamed;
+ internal int ord; // the position of the segment in a MultiReader
+ internal TermEnum termEnum;
+ internal IndexReader reader;
+ internal int delCount;
+ private TermPositions postings; // use getPositions()
+ private int[] docMap; // use getDocMap()
+
+ private bool isDisposed;
+
+ internal SegmentMergeInfo(int b, TermEnum te, IndexReader r)
+ {
+ base_Renamed = b;
+ reader = r;
+ termEnum = te;
+ term = te.Term;
+ }
+
+ // maps around deleted docs
+ internal int[] GetDocMap()
+ {
+ if (docMap == null)
+ {
+ delCount = 0;
+ // build array which maps document numbers around deletions
+ if (reader.HasDeletions)
+ {
+ int maxDoc = reader.MaxDoc;
+ docMap = new int[maxDoc];
+ int j = 0;
+ for (int i = 0; i < maxDoc; i++)
+ {
+ if (reader.IsDeleted(i))
+ {
+ delCount++;
+ docMap[i] = - 1;
+ }
+ else
+ docMap[i] = j++;
+ }
+ }
+ }
+ return docMap;
+ }
+
+ internal TermPositions GetPositions()
+ {
+ if (postings == null)
+ {
+ postings = reader.TermPositions();
+ }
+ return postings;
+ }
+
+ internal bool Next()
+ {
+ if (termEnum.Next())
+ {
+ term = termEnum.Term;
+ return true;
+ }
+ else
+ {
+ term = null;
+ return false;
+ }
+ }
+
+ public void Dispose()
+ {
+ if (isDisposed) return;
+
+ // Move to protected method if class becomes unsealed
+ termEnum.Close();
+ if (postings != null)
+ {
+ postings.Close();
+ }
+
+ isDisposed = true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentMergeQueue.cs b/src/core/Index/SegmentMergeQueue.cs
new file mode 100644
index 0000000..1b48584
--- /dev/null
+++ b/src/core/Index/SegmentMergeQueue.cs
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class SegmentMergeQueue : PriorityQueue<SegmentMergeInfo>, IDisposable
+ {
+ internal SegmentMergeQueue(int size)
+ {
+ Initialize(size);
+ }
+
+ public override bool LessThan(SegmentMergeInfo stiA, SegmentMergeInfo stiB)
+ {
+ int comparison = stiA.term.CompareTo(stiB.term);
+ if (comparison == 0)
+ return stiA.base_Renamed < stiB.base_Renamed;
+ else
+ return comparison < 0;
+ }
+
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ while (Top() != null)
+ Pop().Dispose();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentMerger.cs b/src/core/Index/SegmentMerger.cs
new file mode 100644
index 0000000..0ab159d
--- /dev/null
+++ b/src/core/Index/SegmentMerger.cs
@@ -0,0 +1,934 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
+using FieldOption = Lucene.Net.Index.IndexReader.FieldOption;
+using MergeAbortedException = Lucene.Net.Index.MergePolicy.MergeAbortedException;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> The SegmentMerger class combines two or more Segments, represented by an IndexReader (<see cref="Add" />,
+ /// into a single Segment. After adding the appropriate readers, call the merge method to combine the
+ /// segments.
+ /// <p/>
+ /// If the compoundFile flag is set, then the segments will be merged into a compound file.
+ ///
+ ///
+ /// </summary>
+ /// <seealso cref="Merge()">
+ /// </seealso>
+ /// <seealso cref="Add">
+ /// </seealso>
+ public sealed class SegmentMerger
+ {
+ private class AnonymousClassCheckAbort:CheckAbort
+ {
+ private void InitBlock(SegmentMerger enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SegmentMerger enclosingInstance;
+ public SegmentMerger Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassCheckAbort(SegmentMerger enclosingInstance, Lucene.Net.Index.MergePolicy.OneMerge Param1, Lucene.Net.Store.Directory Param2):base(Param1, Param2)
+ {
+ InitBlock(enclosingInstance);
+ }
+ public override void Work(double units)
+ {
+ // do nothing
+ }
+ }
+ private class AnonymousClassCheckAbort1:CheckAbort
+ {
+ private void InitBlock(SegmentMerger enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SegmentMerger enclosingInstance;
+ public SegmentMerger Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassCheckAbort1(SegmentMerger enclosingInstance, Lucene.Net.Index.MergePolicy.OneMerge Param1, Lucene.Net.Store.Directory Param2):base(Param1, Param2)
+ {
+ InitBlock(enclosingInstance);
+ }
+ public override void Work(double units)
+ {
+ // do nothing
+ }
+ }
+
+ private void InitBlock()
+ {
+ termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
+ }
+
+ /// <summary>norms header placeholder </summary>
+ internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', unchecked((byte) - 1)};
+
+ private Directory directory;
+ private System.String segment;
+ private int termIndexInterval;
+
+ private IList<IndexReader> readers = new List<IndexReader>();
+ private FieldInfos fieldInfos;
+
+ private int mergedDocs;
+
+ private CheckAbort checkAbort;
+
+ // Whether we should merge doc stores (stored fields and
+ // vectors files). When all segments we are merging
+ // already share the same doc store files, we don't need
+ // to merge the doc stores.
+ private bool mergeDocStores;
+
+ /// <summary>Maximum number of contiguous documents to bulk-copy
+ /// when merging stored fields
+ /// </summary>
+ private const int MAX_RAW_MERGE_DOCS = 4192;
+
+ /// <summary>This ctor used only by test code.
+ ///
+ /// </summary>
+ /// <param name="dir">The Directory to merge the other segments into
+ /// </param>
+ /// <param name="name">The name of the new segment
+ /// </param>
+ public /*internal*/ SegmentMerger(Directory dir, System.String name)
+ {
+ InitBlock();
+ directory = dir;
+ segment = name;
+ checkAbort = new AnonymousClassCheckAbort(this, null, null);
+ }
+
+ internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge)
+ {
+ InitBlock();
+ directory = writer.Directory;
+ segment = name;
+ if (merge != null)
+ {
+ checkAbort = new CheckAbort(merge, directory);
+ }
+ else
+ {
+ checkAbort = new AnonymousClassCheckAbort1(this, null, null);
+ }
+ termIndexInterval = writer.TermIndexInterval;
+ }
+
+ internal bool HasProx()
+ {
+ return fieldInfos.HasProx();
+ }
+
+ /// <summary> Add an IndexReader to the collection of readers that are to be merged</summary>
+ /// <param name="reader">
+ /// </param>
+ public /*internal*/ void Add(IndexReader reader)
+ {
+ readers.Add(reader);
+ }
+
+ /// <summary> </summary>
+ /// <param name="i">The index of the reader to return
+ /// </param>
+ /// <returns> The ith reader to be merged
+ /// </returns>
+ internal IndexReader SegmentReader(int i)
+ {
+ return readers[i];
+ }
+
+ /// <summary> Merges the readers specified by the <see cref="Add" /> method into the directory passed to the constructor</summary>
+ /// <returns> The number of documents that were merged
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public /*internal*/ int Merge()
+ {
+ return Merge(true);
+ }
+
+ /// <summary> Merges the readers specified by the <see cref="Add" /> method
+ /// into the directory passed to the constructor.
+ /// </summary>
+ /// <param name="mergeDocStores">if false, we will not merge the
+ /// stored fields nor vectors files
+ /// </param>
+ /// <returns> The number of documents that were merged
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ internal int Merge(bool mergeDocStores)
+ {
+
+ this.mergeDocStores = mergeDocStores;
+
+ // NOTE: it's important to add calls to
+ // checkAbort.work(...) if you make any changes to this
+ // method that will spend alot of time. The frequency
+ // of this check impacts how long
+ // IndexWriter.close(false) takes to actually stop the
+ // threads.
+
+ mergedDocs = MergeFields();
+ MergeTerms();
+ MergeNorms();
+
+ if (mergeDocStores && fieldInfos.HasVectors())
+ MergeVectors();
+
+ return mergedDocs;
+ }
+
+ /// <summary> close all IndexReaders that have been added.
+ /// Should not be called before merge().
+ /// </summary>
+ /// <throws> IOException </throws>
+ internal void CloseReaders()
+ {
+ foreach(IndexReader reader in readers)
+ {
+ reader.Dispose();
+ }
+ }
+
+ internal ICollection<string> GetMergedFiles()
+ {
+ ISet<string> fileSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>();
+
+ // Basic files
+ for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
+ {
+ System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
+
+ if (ext.Equals(IndexFileNames.PROX_EXTENSION) && !HasProx())
+ continue;
+
+ if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
+ fileSet.Add(segment + "." + ext);
+ }
+
+ // Fieldable norm files
+ for (int i = 0; i < fieldInfos.Size(); i++)
+ {
+ FieldInfo fi = fieldInfos.FieldInfo(i);
+ if (fi.isIndexed && !fi.omitNorms)
+ {
+ fileSet.Add(segment + "." + IndexFileNames.NORMS_EXTENSION);
+ break;
+ }
+ }
+
+ // Vector files
+ if (fieldInfos.HasVectors() && mergeDocStores)
+ {
+ for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++)
+ {
+ fileSet.Add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]);
+ }
+ }
+
+ return fileSet;
+ }
+
+ public /*internal*/ ICollection<string> CreateCompoundFile(System.String fileName)
+ {
+ ICollection<string> files = GetMergedFiles();
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
+
+ // Now merge all added files
+ foreach(var file in files)
+ {
+ cfsWriter.AddFile(file);
+ }
+
+ // Perform the merge
+ cfsWriter.Close();
+
+ return files;
+ }
+
+ private void AddIndexed(IndexReader reader, FieldInfos fInfos, ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
+ {
+ foreach (var field in names)
+ {
+ fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector,
+ !reader.HasNorms(field), storePayloads, omitTFAndPositions);
+ }
+ }
+
+ private SegmentReader[] matchingSegmentReaders;
+ private int[] rawDocLengths;
+ private int[] rawDocLengths2;
+
+ private void SetMatchingSegmentReaders()
+ {
+ // If the i'th reader is a SegmentReader and has
+ // identical fieldName -> number mapping, then this
+ // array will be non-null at position i:
+ int numReaders = readers.Count;
+ matchingSegmentReaders = new SegmentReader[numReaders];
+
+ // If this reader is a SegmentReader, and all of its
+ // field name -> number mappings match the "merged"
+ // FieldInfos, then we can do a bulk copy of the
+ // stored fields:
+ for (int i = 0; i < numReaders; i++)
+ {
+ IndexReader reader = readers[i];
+ if (reader is SegmentReader)
+ {
+ SegmentReader segmentReader = (SegmentReader) reader;
+ bool same = true;
+ FieldInfos segmentFieldInfos = segmentReader.FieldInfos();
+ int numFieldInfos = segmentFieldInfos.Size();
+ for (int j = 0; same && j < numFieldInfos; j++)
+ {
+ same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
+ }
+ if (same)
+ {
+ matchingSegmentReaders[i] = segmentReader;
+ }
+ }
+ }
+
+ // Used for bulk-reading raw bytes for stored fields
+ rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
+ rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];
+ }
+
+ /// <summary> </summary>
+ /// <returns> The number of documents in all of the readers
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ private int MergeFields()
+ {
+
+ if (!mergeDocStores)
+ {
+ // When we are not merging by doc stores, their field
+ // name -> number mapping are the same. So, we start
+ // with the fieldInfos of the last segment in this
+ // case, to keep that numbering.
+ SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
+ fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone();
+ }
+ else
+ {
+ fieldInfos = new FieldInfos(); // merge field names
+ }
+
+ foreach(IndexReader reader in readers)
+ {
+ if (reader is SegmentReader)
+ {
+ SegmentReader segmentReader = (SegmentReader) reader;
+ FieldInfos readerFieldInfos = segmentReader.FieldInfos();
+ int numReaderFieldInfos = readerFieldInfos.Size();
+ for (int j = 0; j < numReaderFieldInfos; j++)
+ {
+ FieldInfo fi = readerFieldInfos.FieldInfo(j);
+ fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
+ }
+ }
+ else
+ {
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
+ fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
+ }
+ }
+ fieldInfos.Write(directory, segment + ".fnm");
+
+ int docCount = 0;
+
+ SetMatchingSegmentReaders();
+
+ if (mergeDocStores)
+ {
+ // merge field values
+ FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
+
+ try
+ {
+ int idx = 0;
+ foreach(IndexReader reader in readers)
+ {
+ SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
+ FieldsReader matchingFieldsReader = null;
+ if (matchingSegmentReader != null)
+ {
+ FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
+ if (fieldsReader != null && fieldsReader.CanReadRawDocs())
+ {
+ matchingFieldsReader = fieldsReader;
+ }
+ }
+ if (reader.HasDeletions)
+ {
+ docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader);
+ }
+ else
+ {
+ docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader);
+ }
+ }
+ }
+ finally
+ {
+ fieldsWriter.Dispose();
+ }
+
+ System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
+ long fdxFileLength = directory.FileLength(fileName);
+
+ if (4 + ((long) docCount) * 8 != fdxFileLength)
+ // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+ // we detect that the bug has struck, here, and
+ // throw an exception to prevent the corruption from
+ // entering the index. See LUCENE-1282 for
+ // details.
+ throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
+ }
+ // If we are skipping the doc stores, that means there
+ // are no deletions in any of these segments, so we
+ // just sum numDocs() of each segment to get total docCount
+ else
+ {
+ foreach(IndexReader reader in readers)
+ {
+ docCount += reader.NumDocs();
+ }
+ }
+
+ return docCount;
+ }
+
+ private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
+ {
+ int docCount = 0;
+ int maxDoc = reader.MaxDoc;
+ if (matchingFieldsReader != null)
+ {
+ // We can bulk-copy because the fieldInfos are "congruent"
+ for (int j = 0; j < maxDoc; )
+ {
+ if (reader.IsDeleted(j))
+ {
+ // skip deleted docs
+ ++j;
+ continue;
+ }
+ // We can optimize this case (doing a bulk byte copy) since the field
+ // numbers are identical
+ int start = j, numDocs = 0;
+ do
+ {
+ j++;
+ numDocs++;
+ if (j >= maxDoc)
+ break;
+ if (reader.IsDeleted(j))
+ {
+ j++;
+ break;
+ }
+ }
+ while (numDocs < MAX_RAW_MERGE_DOCS);
+
+ IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
+ fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
+ docCount += numDocs;
+ checkAbort.Work(300 * numDocs);
+ }
+ }
+ else
+ {
+ for (int j = 0; j < maxDoc; j++)
+ {
+ if (reader.IsDeleted(j))
+ {
+ // skip deleted docs
+ continue;
+ }
+ // NOTE: it's very important to first assign to doc then pass it to
+ // termVectorsWriter.addAllDocVectors; see LUCENE-1282
+ Document doc = reader.Document(j);
+ fieldsWriter.AddDocument(doc);
+ docCount++;
+ checkAbort.Work(300);
+ }
+ }
+ return docCount;
+ }
+
+ private int CopyFieldsNoDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
+ {
+ int maxDoc = reader.MaxDoc;
+ int docCount = 0;
+ if (matchingFieldsReader != null)
+ {
+ // We can bulk-copy because the fieldInfos are "congruent"
+ while (docCount < maxDoc)
+ {
+ int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
+ IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
+ fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
+ docCount += len;
+ checkAbort.Work(300 * len);
+ }
+ }
+ else
+ {
+ for (; docCount < maxDoc; docCount++)
+ {
+ // NOTE: it's very important to first assign to doc then pass it to
+ // termVectorsWriter.addAllDocVectors; see LUCENE-1282
+ Document doc = reader.Document(docCount);
+ fieldsWriter.AddDocument(doc);
+ checkAbort.Work(300);
+ }
+ }
+ return docCount;
+ }
+
+ /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
+ /// <throws> IOException </throws>
+ private void MergeVectors()
+ {
+ TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
+
+ try
+ {
+ int idx = 0;
+ foreach(IndexReader reader in readers)
+ {
+ SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
+ TermVectorsReader matchingVectorsReader = null;
+ if (matchingSegmentReader != null)
+ {
+ TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();
+
+ // If the TV* files are an older format then they cannot read raw docs:
+ if (vectorsReader != null && vectorsReader.CanReadRawDocs())
+ {
+ matchingVectorsReader = vectorsReader;
+ }
+ }
+ if (reader.HasDeletions)
+ {
+ CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
+ }
+ else
+ {
+ CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
+ }
+ }
+ }
+ finally
+ {
+ termVectorsWriter.Dispose();
+ }
+
+ System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
+ long tvxSize = directory.FileLength(fileName);
+
+ if (4 + ((long) mergedDocs) * 16 != tvxSize)
+ // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+ // we detect that the bug has struck, here, and
+ // throw an exception to prevent the corruption from
+ // entering the index. See LUCENE-1282 for
+ // details.
+ throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
+ }
+
+ private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
+ {
+ int maxDoc = reader.MaxDoc;
+ if (matchingVectorsReader != null)
+ {
+ // We can bulk-copy because the fieldInfos are "congruent"
+ for (int docNum = 0; docNum < maxDoc; )
+ {
+ if (reader.IsDeleted(docNum))
+ {
+ // skip deleted docs
+ ++docNum;
+ continue;
+ }
+ // We can optimize this case (doing a bulk byte copy) since the field
+ // numbers are identical
+ int start = docNum, numDocs = 0;
+ do
+ {
+ docNum++;
+ numDocs++;
+ if (docNum >= maxDoc)
+ break;
+ if (reader.IsDeleted(docNum))
+ {
+ docNum++;
+ break;
+ }
+ }
+ while (numDocs < MAX_RAW_MERGE_DOCS);
+
+ matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
+ termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
+ checkAbort.Work(300 * numDocs);
+ }
+ }
+ else
+ {
+ for (int docNum = 0; docNum < maxDoc; docNum++)
+ {
+ if (reader.IsDeleted(docNum))
+ {
+ // skip deleted docs
+ continue;
+ }
+
+ // NOTE: it's very important to first assign to vectors then pass it to
+ // termVectorsWriter.addAllDocVectors; see LUCENE-1282
+ ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
+ termVectorsWriter.AddAllDocVectors(vectors);
+ checkAbort.Work(300);
+ }
+ }
+ }
+
+ private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
+ {
+ int maxDoc = reader.MaxDoc;
+ if (matchingVectorsReader != null)
+ {
+ // We can bulk-copy because the fieldInfos are "congruent"
+ int docCount = 0;
+ while (docCount < maxDoc)
+ {
+ int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
+ matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
+ termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
+ docCount += len;
+ checkAbort.Work(300 * len);
+ }
+ }
+ else
+ {
+ for (int docNum = 0; docNum < maxDoc; docNum++)
+ {
+ // NOTE: it's very important to first assign to vectors then pass it to
+ // termVectorsWriter.addAllDocVectors; see LUCENE-1282
+ ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
+ termVectorsWriter.AddAllDocVectors(vectors);
+ checkAbort.Work(300);
+ }
+ }
+ }
+
+ private SegmentMergeQueue queue = null;
+
+ private void MergeTerms()
+ {
+
+ SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval);
+
+ FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
+
+ try
+ {
+ queue = new SegmentMergeQueue(readers.Count);
+
+ MergeTermInfos(consumer);
+ }
+ finally
+ {
+ consumer.Finish();
+ if (queue != null)
+ queue.Dispose();
+ }
+ }
+
+ internal bool omitTermFreqAndPositions;
+
+ private void MergeTermInfos(FormatPostingsFieldsConsumer consumer)
+ {
+ int base_Renamed = 0;
+ int readerCount = readers.Count;
+ for (int i = 0; i < readerCount; i++)
+ {
+ IndexReader reader = readers[i];
+ TermEnum termEnum = reader.Terms();
+ SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
+ int[] docMap = smi.GetDocMap();
+ if (docMap != null)
+ {
+ if (docMaps == null)
+ {
+ docMaps = new int[readerCount][];
+ delCounts = new int[readerCount];
+ }
+ docMaps[i] = docMap;
+ delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs();
+ }
+
+ base_Renamed += reader.NumDocs();
+
+ System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount);
+
+ if (smi.Next())
+ queue.Add(smi);
+ // initialize queue
+ else
+ smi.Dispose();
+ }
+
+ SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
+
+ System.String currentField = null;
+ FormatPostingsTermsConsumer termsConsumer = null;
+
+ while (queue.Size() > 0)
+ {
+ int matchSize = 0; // pop matching terms
+ match[matchSize++] = queue.Pop();
+ Term term = match[0].term;
+ SegmentMergeInfo top = queue.Top();
+
+ while (top != null && term.CompareTo(top.term) == 0)
+ {
+ match[matchSize++] = queue.Pop();
+ top = queue.Top();
+ }
+
+ if ((System.Object) currentField != (System.Object) term.Field)
+ {
+ currentField = term.Field;
+ if (termsConsumer != null)
+ termsConsumer.Finish();
+ FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
+ termsConsumer = consumer.AddField(fieldInfo);
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ }
+
+ int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo
+
+ checkAbort.Work(df / 3.0);
+
+ while (matchSize > 0)
+ {
+ SegmentMergeInfo smi = match[--matchSize];
+ if (smi.Next())
+ queue.Add(smi);
+ // restore queue
+ else
+ smi.Dispose(); // done with a segment
+ }
+ }
+ }
+
+ private byte[] payloadBuffer;
+ private int[][] docMaps;
+ internal int[][] GetDocMaps()
+ {
+ return docMaps;
+ }
+ private int[] delCounts;
+ internal int[] GetDelCounts()
+ {
+ return delCounts;
+ }
+
+ /// <summary>Process postings from multiple segments all positioned on the
+ /// same term. Writes out merged entries into freqOutput and
+ /// the proxOutput streams.
+ ///
+ /// </summary>
+ /// <param name="smis">array of segments
+ /// </param>
+ /// <param name="n">number of cells in the array actually occupied
+ /// </param>
+ /// <returns> number of documents across all segments where this term was found
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
+ {
+
+ FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text);
+ int df = 0;
+ for (int i = 0; i < n; i++)
+ {
+ SegmentMergeInfo smi = smis[i];
+ TermPositions postings = smi.GetPositions();
+ System.Diagnostics.Debug.Assert(postings != null);
+ int base_Renamed = smi.base_Renamed;
+ int[] docMap = smi.GetDocMap();
+ postings.Seek(smi.termEnum);
+
+ while (postings.Next())
+ {
+ df++;
+ int doc = postings.Doc;
+ if (docMap != null)
+ doc = docMap[doc]; // map around deletions
+ doc += base_Renamed; // convert to merged space
+
+ int freq = postings.Freq;
+ FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);
+
+ if (!omitTermFreqAndPositions)
+ {
+ for (int j = 0; j < freq; j++)
+ {
+ int position = postings.NextPosition();
+ int payloadLength = postings.PayloadLength;
+ if (payloadLength > 0)
+ {
+ if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
+ payloadBuffer = new byte[payloadLength];
+ postings.GetPayload(payloadBuffer, 0);
+ }
+ posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
+ }
+ posConsumer.Finish();
+ }
+ }
+ }
+ docConsumer.Finish();
+
+ return df;
+ }
+
+ private void MergeNorms()
+ {
+ byte[] normBuffer = null;
+ IndexOutput output = null;
+ try
+ {
+ int numFieldInfos = fieldInfos.Size();
+ for (int i = 0; i < numFieldInfos; i++)
+ {
+ FieldInfo fi = fieldInfos.FieldInfo(i);
+ if (fi.isIndexed && !fi.omitNorms)
+ {
+ if (output == null)
+ {
+ output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
+ output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
+ }
+ foreach(IndexReader reader in readers)
+ {
+ int maxDoc = reader.MaxDoc;
+ if (normBuffer == null || normBuffer.Length < maxDoc)
+ {
+ // the buffer is too small for the current segment
+ normBuffer = new byte[maxDoc];
+ }
+ reader.Norms(fi.name, normBuffer, 0);
+ if (!reader.HasDeletions)
+ {
+ //optimized case for segments without deleted docs
+ output.WriteBytes(normBuffer, maxDoc);
+ }
+ else
+ {
+ // this segment has deleted docs, so we have to
+ // check for every doc if it is deleted or not
+ for (int k = 0; k < maxDoc; k++)
+ {
+ if (!reader.IsDeleted(k))
+ {
+ output.WriteByte(normBuffer[k]);
+ }
+ }
+ }
+ checkAbort.Work(maxDoc);
+ }
+ }
+ }
+ }
+ finally
+ {
+ if (output != null)
+ {
+ output.Close();
+ }
+ }
+ }
+
+ internal class CheckAbort
+ {
+ private double workCount;
+ private MergePolicy.OneMerge merge;
+ private Directory dir;
+ public CheckAbort(MergePolicy.OneMerge merge, Directory dir)
+ {
+ this.merge = merge;
+ this.dir = dir;
+ }
+
+ /// <summary> Records the fact that roughly units amount of work
+ /// have been done since this method was last called.
+ /// When adding time-consuming code into SegmentMerger,
+ /// you should test different values for units to ensure
+ /// that the time in between calls to merge.checkAborted
+ /// is up to ~ 1 second.
+ /// </summary>
+ public virtual void Work(double units)
+ {
+ workCount += units;
+ if (workCount >= 10000.0)
+ {
+ merge.CheckAborted(dir);
+ workCount = 0;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentReader.cs b/src/core/Index/SegmentReader.cs
new file mode 100644
index 0000000..8cc5d3b
--- /dev/null
+++ b/src/core/Index/SegmentReader.cs
@@ -0,0 +1,1692 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using BitVector = Lucene.Net.Util.BitVector;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
+
+namespace Lucene.Net.Index
+{
+
+ /// <version> $Id
+ /// </version>
+ /// <summary> <p/><b>NOTE:</b> This API is new and still experimental
+ /// (subject to change suddenly in the next release)<p/>
+ /// </summary>
+ public class SegmentReader : IndexReader
+ {
+ public SegmentReader()
+ {
+ InitBlock();
+ }
+ private void InitBlock()
+ {
+ fieldsReaderLocal = new FieldsReaderLocal(this);
+ }
+ protected internal bool readOnly;
+
+ private SegmentInfo si;
+ private int readBufferSize;
+
+ internal CloseableThreadLocal<FieldsReader> fieldsReaderLocal;
+ internal CloseableThreadLocal<TermVectorsReader> termVectorsLocal = new CloseableThreadLocal<TermVectorsReader>();
+
+ internal BitVector deletedDocs = null;
+ internal Ref deletedDocsRef = null;
+ private bool deletedDocsDirty = false;
+ private bool normsDirty = false;
+ private int pendingDeleteCount;
+
+ private bool rollbackHasChanges = false;
+ private bool rollbackDeletedDocsDirty = false;
+ private bool rollbackNormsDirty = false;
+ private SegmentInfo rollbackSegmentInfo;
+ private int rollbackPendingDeleteCount;
+
+ // optionally used for the .nrm file shared by multiple norms
+ private IndexInput singleNormStream;
+ private Ref singleNormRef;
+
+ internal CoreReaders core;
+
+ // Holds core readers that are shared (unchanged) when
+ // SegmentReader is cloned or reopened
+ public /*internal*/ sealed class CoreReaders
+ {
+
+ // Counts how many other reader share the core objects
+ // (freqStream, proxStream, tis, etc.) of this reader;
+ // when coreRef drops to 0, these core objects may be
+ // closed. A given insance of SegmentReader may be
+ // closed, even those it shares core objects with other
+ // SegmentReaders:
+ private readonly Ref ref_Renamed = new Ref();
+
+ internal System.String segment;
+ internal FieldInfos fieldInfos;
+ internal IndexInput freqStream;
+ internal IndexInput proxStream;
+ internal TermInfosReader tisNoIndex;
+
+ internal Directory dir;
+ internal Directory cfsDir;
+ internal int readBufferSize;
+ internal int termsIndexDivisor;
+
+ internal SegmentReader origInstance;
+
+ internal TermInfosReader tis;
+ internal FieldsReader fieldsReaderOrig;
+ internal TermVectorsReader termVectorsReaderOrig;
+ internal CompoundFileReader cfsReader;
+ internal CompoundFileReader storeCFSReader;
+
+ internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor)
+ {
+ segment = si.name;
+ this.readBufferSize = readBufferSize;
+ this.dir = dir;
+
+ bool success = false;
+
+ try
+ {
+ Directory dir0 = dir;
+ if (si.GetUseCompoundFile())
+ {
+ cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
+ dir0 = cfsReader;
+ }
+ cfsDir = dir0;
+
+ fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
+
+ this.termsIndexDivisor = termsIndexDivisor;
+ var reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
+ if (termsIndexDivisor == - 1)
+ {
+ tisNoIndex = reader;
+ }
+ else
+ {
+ tis = reader;
+ tisNoIndex = null;
+ }
+
+ // make sure that all index files have been read or are kept open
+ // so that if an index update removes them we'll still have them
+ freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize);
+
+ proxStream = fieldInfos.HasProx() ? cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize) : null;
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ DecRef();
+ }
+ }
+
+
+ // Must assign this at the end -- if we hit an
+ // exception above core, we don't want to attempt to
+ // purge the FieldCache (will hit NPE because core is
+ // not assigned yet).
+ this.origInstance = origInstance;
+ }
+
+ internal TermVectorsReader GetTermVectorsReaderOrig()
+ {
+ lock (this)
+ {
+ return termVectorsReaderOrig;
+ }
+ }
+
+ internal FieldsReader GetFieldsReaderOrig()
+ {
+ lock (this)
+ {
+ return fieldsReaderOrig;
+ }
+ }
+
+ internal void IncRef()
+ {
+ lock (this)
+ {
+ ref_Renamed.IncRef();
+ }
+ }
+
+ internal Directory GetCFSReader()
+ {
+ lock (this)
+ {
+ return cfsReader;
+ }
+ }
+
+ internal TermInfosReader GetTermsReader()
+ {
+ lock (this)
+ {
+ if (tis != null)
+ {
+ return tis;
+ }
+ else
+ {
+ return tisNoIndex;
+ }
+ }
+ }
+
+ internal bool TermsIndexIsLoaded()
+ {
+ lock (this)
+ {
+ return tis != null;
+ }
+ }
+
+ // NOTE: only called from IndexWriter when a near
+ // real-time reader is opened, or applyDeletes is run,
+ // sharing a segment that's still being merged. This
+ // method is not fully thread safe, and relies on the
+ // synchronization in IndexWriter
+ internal void LoadTermsIndex(SegmentInfo si, int termsIndexDivisor)
+ {
+ lock (this)
+ {
+ if (tis == null)
+ {
+ Directory dir0;
+ if (si.GetUseCompoundFile())
+ {
+ // In some cases, we were originally opened when CFS
+ // was not used, but then we are asked to open the
+ // terms reader with index, the segment has switched
+ // to CFS
+ if (cfsReader == null)
+ {
+ cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
+ }
+ dir0 = cfsReader;
+ }
+ else
+ {
+ dir0 = dir;
+ }
+
+ tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
+ }
+ }
+ }
+
+ internal void DecRef()
+ {
+ lock (this)
+ {
+
+ if (ref_Renamed.DecRef() == 0)
+ {
+
+ // close everything, nothing is shared anymore with other readers
+ if (tis != null)
+ {
+ tis.Dispose();
+ // null so if an app hangs on to us we still free most ram
+ tis = null;
+ }
+
+ if (tisNoIndex != null)
+ {
+ tisNoIndex.Dispose();
+ }
+
+ if (freqStream != null)
+ {
+ freqStream.Close();
+ }
+
+ if (proxStream != null)
+ {
+ proxStream.Close();
+ }
+
+ if (termVectorsReaderOrig != null)
+ {
+ termVectorsReaderOrig.Dispose();
+ }
+
+ if (fieldsReaderOrig != null)
+ {
+ fieldsReaderOrig.Dispose();
+ }
+
+ if (cfsReader != null)
+ {
+ cfsReader.Close();
+ }
+
+ if (storeCFSReader != null)
+ {
+ storeCFSReader.Close();
+ }
+
+ // Force FieldCache to evict our entries at this point
+ if (origInstance != null)
+ {
+ Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(origInstance);
+ }
+ }
+ }
+ }
+
+ internal void OpenDocStores(SegmentInfo si)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(si.name.Equals(segment));
+
+ if (fieldsReaderOrig == null)
+ {
+ Directory storeDir;
+ if (si.DocStoreOffset != - 1)
+ {
+ if (si.DocStoreIsCompoundFile)
+ {
+ System.Diagnostics.Debug.Assert(storeCFSReader == null);
+ storeCFSReader = new CompoundFileReader(dir, si.DocStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
+ storeDir = storeCFSReader;
+ System.Diagnostics.Debug.Assert(storeDir != null);
+ }
+ else
+ {
+ storeDir = dir;
+ System.Diagnostics.Debug.Assert(storeDir != null);
+ }
+ }
+ else if (si.GetUseCompoundFile())
+ {
+ // In some cases, we were originally opened when CFS
+ // was not used, but then we are asked to open doc
+ // stores after the segment has switched to CFS
+ if (cfsReader == null)
+ {
+ cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
+ }
+ storeDir = cfsReader;
+ System.Diagnostics.Debug.Assert(storeDir != null);
+ }
+ else
+ {
+ storeDir = dir;
+ System.Diagnostics.Debug.Assert(storeDir != null);
+ }
+
+ string storesSegment = si.DocStoreOffset != - 1 ? si.DocStoreSegment : segment;
+
+ fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.DocStoreOffset, si.docCount);
+
+ // Verify two sources of "maxDoc" agree:
+ if (si.DocStoreOffset == - 1 && fieldsReaderOrig.Size() != si.docCount)
+ {
+ throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount);
+ }
+
+ if (fieldInfos.HasVectors())
+ {
+ // open term vector files only as needed
+ termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.DocStoreOffset, si.docCount);
+ }
+ }
+ }
+ }
+
+ public FieldInfos fieldInfos_ForNUnit
+ {
+ get { return fieldInfos; }
+ }
+ }
+
+ /// <summary> Sets the initial value </summary>
+ private class FieldsReaderLocal : CloseableThreadLocal<FieldsReader>
+ {
+ public FieldsReaderLocal(SegmentReader enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(SegmentReader enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SegmentReader enclosingInstance;
+ public SegmentReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public /*protected internal*/ override FieldsReader InitialValue()
+ {
+ return (FieldsReader) Enclosing_Instance.core.GetFieldsReaderOrig().Clone();
+ }
+ }
+
+ public /*internal*/ class Ref
+ {
+ private int refCount = 1;
+
+ public override System.String ToString()
+ {
+ return "refcount: " + refCount;
+ }
+
+ public virtual int RefCount()
+ {
+ lock (this)
+ {
+ return refCount;
+ }
+ }
+
+ public virtual int IncRef()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0);
+ refCount++;
+ return refCount;
+ }
+ }
+
+ public virtual int DecRef()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0);
+ refCount--;
+ return refCount;
+ }
+ }
+ }
+
+ /// <summary> Byte[] referencing is used because a new norm object needs
+ /// to be created for each clone, and the byte array is all
+ /// that is needed for sharing between cloned readers. The
+ /// current norm referencing is for sharing between readers
+ /// whereas the byte[] referencing is for copy on write which
+ /// is independent of reader references (i.e. incRef, decRef).
+ /// </summary>
+
+ public /*internal*/ sealed class Norm : System.ICloneable
+ {
+ private void InitBlock(SegmentReader enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SegmentReader enclosingInstance;
+ public SegmentReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal /*private*/ int refCount = 1;
+
+ // If this instance is a clone, the originalNorm
+ // references the Norm that has a real open IndexInput:
+ private Norm origNorm;
+
+ private IndexInput in_Renamed;
+ private readonly long normSeek;
+
+ // null until bytes is set
+ private Ref bytesRef;
+ internal /*private*/ byte[] bytes;
+ internal /*private*/ bool dirty;
+ internal /*private*/ int number;
+ internal /*private*/ bool rollbackDirty;
+
+ public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek)
+ {
+ InitBlock(enclosingInstance);
+ this.in_Renamed = in_Renamed;
+ this.number = number;
+ this.normSeek = normSeek;
+ }
+
+ public void IncRef()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0));
+ refCount++;
+ }
+ }
+
+ private void CloseInput()
+ {
+ if (in_Renamed != null)
+ {
+ if (in_Renamed != Enclosing_Instance.singleNormStream)
+ {
+ // It's private to us -- just close it
+ in_Renamed.Dispose();
+ }
+ else
+ {
+ // We are sharing this with others -- decRef and
+ // maybe close the shared norm stream
+ if (Enclosing_Instance.singleNormRef.DecRef() == 0)
+ {
+ Enclosing_Instance.singleNormStream.Dispose();
+ Enclosing_Instance.singleNormStream = null;
+ }
+ }
+
+ in_Renamed = null;
+ }
+ }
+
+ public void DecRef()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0));
+
+ if (--refCount == 0)
+ {
+ if (origNorm != null)
+ {
+ origNorm.DecRef();
+ origNorm = null;
+ }
+ else
+ {
+ CloseInput();
+ }
+
+ if (bytes != null)
+ {
+ System.Diagnostics.Debug.Assert(bytesRef != null);
+ bytesRef.DecRef();
+ bytes = null;
+ bytesRef = null;
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert(bytesRef == null);
+ }
+ }
+ }
+ }
+
+ // Load bytes but do not cache them if they were not
+ // already cached
+ public void Bytes(byte[] bytesOut, int offset, int len)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0));
+ if (bytes != null)
+ {
+ // Already cached -- copy from cache:
+ System.Diagnostics.Debug.Assert(len <= Enclosing_Instance.MaxDoc);
+ Array.Copy(bytes, 0, bytesOut, offset, len);
+ }
+ else
+ {
+ // Not cached
+ if (origNorm != null)
+ {
+ // Ask origNorm to load
+ origNorm.Bytes(bytesOut, offset, len);
+ }
+ else
+ {
+ // We are orig -- read ourselves from disk:
+ lock (in_Renamed)
+ {
+ in_Renamed.Seek(normSeek);
+ in_Renamed.ReadBytes(bytesOut, offset, len, false);
+ }
+ }
+ }
+ }
+ }
+
+ // Load & cache full bytes array. Returns bytes.
+ public byte[] Bytes()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0));
+ if (bytes == null)
+ {
+ // value not yet read
+ System.Diagnostics.Debug.Assert(bytesRef == null);
+ if (origNorm != null)
+ {
+ // Ask origNorm to load so that for a series of
+ // reopened readers we share a single read-only
+ // byte[]
+ bytes = origNorm.Bytes();
+ bytesRef = origNorm.bytesRef;
+ bytesRef.IncRef();
+
+ // Once we've loaded the bytes we no longer need
+ // origNorm:
+ origNorm.DecRef();
+ origNorm = null;
+ }
+ else
+ {
+ // We are the origNorm, so load the bytes for real
+ // ourself:
+ int count = Enclosing_Instance.MaxDoc;
+ bytes = new byte[count];
+
+ // Since we are orig, in must not be null
+ System.Diagnostics.Debug.Assert(in_Renamed != null);
+
+ // Read from disk.
+ lock (in_Renamed)
+ {
+ in_Renamed.Seek(normSeek);
+ in_Renamed.ReadBytes(bytes, 0, count, false);
+ }
+
+ bytesRef = new Ref();
+ CloseInput();
+ }
+ }
+
+ return bytes;
+ }
+ }
+
+ // Only for testing
+ public /*internal*/ Ref BytesRef()
+ {
+ return bytesRef;
+ }
+
+ // Called if we intend to change a norm value. We make a
+ // private copy of bytes if it's shared with others:
+ public byte[] CopyOnWrite()
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0));
+ Bytes();
+ System.Diagnostics.Debug.Assert(bytes != null);
+ System.Diagnostics.Debug.Assert(bytesRef != null);
+ if (bytesRef.RefCount() > 1)
+ {
+ // I cannot be the origNorm for another norm
+ // instance if I'm being changed. Ie, only the
+ // "head Norm" can be changed:
+ System.Diagnostics.Debug.Assert(refCount == 1);
+ Ref oldRef = bytesRef;
+ bytes = Enclosing_Instance.CloneNormBytes(bytes);
+ bytesRef = new Ref();
+ oldRef.DecRef();
+ }
+ dirty = true;
+ return bytes;
+ }
+ }
+
+ // Returns a copy of this Norm instance that shares
+ // IndexInput & bytes with the original one
+ public System.Object Clone()
+ {
+ lock (this) //LUCENENET-375
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0));
+
+ Norm clone;
+ try
+ {
+ clone = (Norm)base.MemberwiseClone();
+ }
+ catch (System.Exception cnse)
+ {
+ // Cannot happen
+ throw new System.SystemException("unexpected CloneNotSupportedException", cnse);
+ }
+ clone.refCount = 1;
+
+ if (bytes != null)
+ {
+ System.Diagnostics.Debug.Assert(bytesRef != null);
+ System.Diagnostics.Debug.Assert(origNorm == null);
+
+ // Clone holds a reference to my bytes:
+ clone.bytesRef.IncRef();
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert(bytesRef == null);
+ if (origNorm == null)
+ {
+ // I become the origNorm for the clone:
+ clone.origNorm = this;
+ }
+ clone.origNorm.IncRef();
+ }
+
+ // Only the origNorm will actually readBytes from in:
+ clone.in_Renamed = null;
+
+ return clone;
+ }
+ }
+
+ // Flush all pending changes to the next generation
+ // separate norms file.
+ public void ReWrite(SegmentInfo si)
+ {
+ System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0), "refCount=" + refCount + " origNorm=" + origNorm);
+
+ // NOTE: norms are re-written in regular directory, not cfs
+ si.AdvanceNormGen(this.number);
+ string normFileName = si.GetNormFileName(this.number);
+ IndexOutput @out = enclosingInstance.Directory().CreateOutput(normFileName);
+ bool success = false;
+ try
+ {
+ try {
+ @out.WriteBytes(bytes, enclosingInstance.MaxDoc);
+ } finally {
+ @out.Close();
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ enclosingInstance.Directory().DeleteFile(normFileName);
+ }
+ catch (Exception)
+ {
+ // suppress this so we keep throwing the
+ // original exception
+ }
+ }
+ }
+ this.dirty = false;
+ }
+ }
+
+ internal System.Collections.Generic.IDictionary<string, Norm> norms = new HashMap<string, Norm>();
+
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public static SegmentReader Get(bool readOnly, SegmentInfo si, int termInfosIndexDivisor)
+ {
+ return Get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
+ }
+
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public static SegmentReader Get(bool readOnly, Directory dir, SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor)
+ {
+ SegmentReader instance = readOnly ? new ReadOnlySegmentReader() : new SegmentReader();
+ instance.readOnly = readOnly;
+ instance.si = si;
+ instance.readBufferSize = readBufferSize;
+
+ bool success = false;
+
+ try
+ {
+ instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor);
+ if (doOpenStores)
+ {
+ instance.core.OpenDocStores(si);
+ }
+ instance.LoadDeletedDocs();
+ instance.OpenNorms(instance.core.cfsDir, readBufferSize);
+ success = true;
+ }
+ finally
+ {
+
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success)
+ {
+ instance.DoClose();
+ }
+ }
+ return instance;
+ }
+
+ internal virtual void OpenDocStores()
+ {
+ core.OpenDocStores(si);
+ }
+
+ private bool CheckDeletedCounts()
+ {
+ int recomputedCount = deletedDocs.GetRecomputedCount();
+
+ System.Diagnostics.Debug.Assert(deletedDocs.Count() == recomputedCount, "deleted count=" + deletedDocs.Count() + " vs recomputed count=" + recomputedCount);
+
+ System.Diagnostics.Debug.Assert(si.GetDelCount() == recomputedCount, "delete count mismatch: info=" + si.GetDelCount() + " vs BitVector=" + recomputedCount);
+
+ // Verify # deletes does not exceed maxDoc for this
+ // segment:
+ System.Diagnostics.Debug.Assert(si.GetDelCount() <= MaxDoc, "delete count mismatch: " + recomputedCount + ") exceeds max doc (" + MaxDoc + ") for segment " + si.name);
+
+ return true;
+ }
+
+ private void LoadDeletedDocs()
+ {
+ // NOTE: the bitvector is stored using the regular directory, not cfs
+ //if(HasDeletions(si))
+ if (si.HasDeletions())
+ {
+ deletedDocs = new BitVector(Directory(), si.GetDelFileName());
+ deletedDocsRef = new Ref();
+
+ System.Diagnostics.Debug.Assert(CheckDeletedCounts());
+ }
+ else
+ System.Diagnostics.Debug.Assert(si.GetDelCount() == 0);
+ }
+
+ /// <summary> Clones the norm bytes. May be overridden by subclasses. New and experimental.</summary>
+ /// <param name="bytes">Byte array to clone
+ /// </param>
+ /// <returns> New BitVector
+ /// </returns>
+ protected internal virtual byte[] CloneNormBytes(byte[] bytes)
+ {
+ var cloneBytes = new byte[bytes.Length];
+ Array.Copy(bytes, 0, cloneBytes, 0, bytes.Length);
+ return cloneBytes;
+ }
+
+ /// <summary> Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental.</summary>
+ /// <param name="bv">BitVector to clone
+ /// </param>
+ /// <returns> New BitVector
+ /// </returns>
+ protected internal virtual BitVector CloneDeletedDocs(BitVector bv)
+ {
+ return (BitVector) bv.Clone();
+ }
+
+ public override System.Object Clone()
+ {
+ lock (this)
+ {
+ try
+ {
+ return Clone(readOnly); // Preserve current readOnly
+ }
+ catch (System.Exception ex)
+ {
+ throw new System.SystemException(ex.Message, ex);
+ }
+ }
+ }
+
+ public override IndexReader Clone(bool openReadOnly)
+ {
+ lock (this)
+ {
+ return ReopenSegment(si, true, openReadOnly);
+ }
+ }
+
+ internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly)
+ {
+ lock (this)
+ {
+ bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
+ bool normsUpToDate = true;
+
+ bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()];
+ int fieldCount = core.fieldInfos.Size();
+ for (int i = 0; i < fieldCount; i++)
+ {
+ if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
+ {
+ normsUpToDate = false;
+ fieldNormsChanged[i] = true;
+ }
+ }
+
+ // if we're cloning we need to run through the reopenSegment logic
+ // also if both old and new readers aren't readonly, we clone to avoid sharing modifications
+ if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly)
+ {
+ return this;
+ }
+
+ // When cloning, the incoming SegmentInfos should not
+ // have any changes in it:
+ System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate));
+
+ // clone reader
+ SegmentReader clone = openReadOnly ? new ReadOnlySegmentReader() : new SegmentReader();
+
+ bool success = false;
+ try
+ {
+ core.IncRef();
+ clone.core = core;
+ clone.readOnly = openReadOnly;
+ clone.si = si;
+ clone.readBufferSize = readBufferSize;
+
+ if (!openReadOnly && hasChanges)
+ {
+ // My pending changes transfer to the new reader
+ clone.pendingDeleteCount = pendingDeleteCount;
+ clone.deletedDocsDirty = deletedDocsDirty;
+ clone.normsDirty = normsDirty;
+ clone.hasChanges = hasChanges;
+ hasChanges = false;
+ }
+
+ if (doClone)
+ {
+ if (deletedDocs != null)
+ {
+ deletedDocsRef.IncRef();
+ clone.deletedDocs = deletedDocs;
+ clone.deletedDocsRef = deletedDocsRef;
+ }
+ }
+ else
+ {
+ if (!deletionsUpToDate)
+ {
+ // load deleted docs
+ System.Diagnostics.Debug.Assert(clone.deletedDocs == null);
+ clone.LoadDeletedDocs();
+ }
+ else if (deletedDocs != null)
+ {
+ deletedDocsRef.IncRef();
+ clone.deletedDocs = deletedDocs;
+ clone.deletedDocsRef = deletedDocsRef;
+ }
+ }
+
+ clone.norms = new HashMap<string, Norm>();
+
+ // Clone norms
+ for (int i = 0; i < fieldNormsChanged.Length; i++)
+ {
+
+ // Clone unchanged norms to the cloned reader
+ if (doClone || !fieldNormsChanged[i])
+ {
+ System.String curField = core.fieldInfos.FieldInfo(i).name;
+ Norm norm = this.norms[curField];
+ if (norm != null)
+ clone.norms[curField] = (Norm)norm.Clone();
+ }
+ }
+
+ // If we are not cloning, then this will open anew
+ // any norms that have changed:
+ clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize);
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // An exception occured during reopen, we have to decRef the norms
+ // that we incRef'ed already and close singleNormsStream and FieldsReader
+ clone.DecRef();
+ }
+ }
+
+ return clone;
+ }
+ }
+
+ protected internal override void DoCommit(System.Collections.Generic.IDictionary<string, string> commitUserData)
+ {
+ if (hasChanges)
+ {
+ StartCommit();
+ bool success = false;
+ try
+ {
+ CommitChanges(commitUserData);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ RollbackCommit();
+ }
+ }
+ }
+ }
+
+ private void CommitChanges(System.Collections.Generic.IDictionary<string, string> commitUserData)
+ {
+ if (deletedDocsDirty)
+ { // re-write deleted
+ si.AdvanceDelGen();
+
+ // We can write directly to the actual name (vs to a
+ // .tmp & renaming it) because the file is not live
+ // until segments file is written:
+ string delFileName = si.GetDelFileName();
+ bool success = false;
+ try
+ {
+ deletedDocs.Write(Directory(), delFileName);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ Directory().DeleteFile(delFileName);
+ }
+ catch (Exception)
+ {
+ // suppress this so we keep throwing the
+ // original exception
+ }
+ }
+ }
+
+ si.SetDelCount(si.GetDelCount() + pendingDeleteCount);
+ pendingDeleteCount = 0;
+ System.Diagnostics.Debug.Assert(deletedDocs.Count() == si.GetDelCount(), "delete count mismatch during commit: info=" + si.GetDelCount() + " vs BitVector=" + deletedDocs.Count());
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert(pendingDeleteCount == 0);
+ }
+
+ if (normsDirty)
+ { // re-write norms
+ si.SetNumFields(core.fieldInfos.Size());
+ foreach (Norm norm in norms.Values)
+ {
+ if (norm.dirty)
+ {
+ norm.ReWrite(si);
+ }
+ }
+ }
+ deletedDocsDirty = false;
+ normsDirty = false;
+ hasChanges = false;
+ }
+
+ internal virtual FieldsReader GetFieldsReader()
+ {
+ return fieldsReaderLocal.Get();
+ }
+
+ protected internal override void DoClose()
+ {
+ termVectorsLocal.Close();
+ fieldsReaderLocal.Close();
+
+ if (deletedDocs != null)
+ {
+ deletedDocsRef.DecRef();
+ // null so if an app hangs on to us we still free most ram
+ deletedDocs = null;
+ }
+
+ foreach(Norm norm in norms.Values)
+ {
+ norm.DecRef();
+ }
+ if (core != null)
+ {
+ core.DecRef();
+ }
+ }
+
+ //internal static bool HasDeletions(SegmentInfo si)
+ //{
+ // // Don't call ensureOpen() here (it could affect performance)
+ // return si.HasDeletions();
+ //}
+
+ public override bool HasDeletions
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return deletedDocs != null;
+ }
+ }
+
+ internal static bool UsesCompoundFile(SegmentInfo si)
+ {
+ return si.GetUseCompoundFile();
+ }
+
+ internal static bool HasSeparateNorms(SegmentInfo si)
+ {
+ return si.HasSeparateNorms();
+ }
+
+ protected internal override void DoDelete(int docNum)
+ {
+ if (deletedDocs == null)
+ {
+ deletedDocs = new BitVector(MaxDoc);
+ deletedDocsRef = new Ref();
+ }
+ // there is more than 1 SegmentReader with a reference to this
+ // deletedDocs BitVector so decRef the current deletedDocsRef,
+ // clone the BitVector, create a new deletedDocsRef
+ if (deletedDocsRef.RefCount() > 1)
+ {
+ Ref oldRef = deletedDocsRef;
+ deletedDocs = CloneDeletedDocs(deletedDocs);
+ deletedDocsRef = new Ref();
+ oldRef.DecRef();
+ }
+ deletedDocsDirty = true;
+ if (!deletedDocs.GetAndSet(docNum))
+ pendingDeleteCount++;
+ }
+
+ protected internal override void DoUndeleteAll()
+ {
+ deletedDocsDirty = false;
+ if (deletedDocs != null)
+ {
+ System.Diagnostics.Debug.Assert(deletedDocsRef != null);
+ deletedDocsRef.DecRef();
+ deletedDocs = null;
+ deletedDocsRef = null;
+ pendingDeleteCount = 0;
+ si.ClearDelGen();
+ si.SetDelCount(0);
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert(deletedDocsRef == null);
+ System.Diagnostics.Debug.Assert(pendingDeleteCount == 0);
+ }
+ }
+
+ internal virtual System.Collections.Generic.IList<string> Files()
+ {
+ return si.Files();
+ }
+
+ public override TermEnum Terms()
+ {
+ EnsureOpen();
+ return core.GetTermsReader().Terms();
+ }
+
+ public override TermEnum Terms(Term t)
+ {
+ EnsureOpen();
+ return core.GetTermsReader().Terms(t);
+ }
+
+ public /*internal*/ virtual FieldInfos FieldInfos()
+ {
+ return core.fieldInfos;
+ }
+
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ EnsureOpen();
+ return GetFieldsReader().Doc(n, fieldSelector);
+ }
+
+ public override bool IsDeleted(int n)
+ {
+ lock (this)
+ {
+ return (deletedDocs != null && deletedDocs.Get(n));
+ }
+ }
+
+ public override TermDocs TermDocs(Term term)
+ {
+ if (term == null)
+ {
+ return new AllTermDocs(this);
+ }
+ else
+ {
+ return base.TermDocs(term);
+ }
+ }
+
+ public override TermDocs TermDocs()
+ {
+ EnsureOpen();
+ return new SegmentTermDocs(this);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ EnsureOpen();
+ return new SegmentTermPositions(this);
+ }
+
+ public override int DocFreq(Term t)
+ {
+ EnsureOpen();
+ TermInfo ti = core.GetTermsReader().Get(t);
+ if (ti != null)
+ return ti.docFreq;
+ else
+ return 0;
+ }
+
+ public override int NumDocs()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ int n = MaxDoc;
+ if (deletedDocs != null)
+ n -= deletedDocs.Count();
+ return n;
+ }
+
+ public override int MaxDoc
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return si.docCount;
+ }
+ }
+
+ /// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption)">
+ /// </seealso>
+ public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldOption)
+ {
+ EnsureOpen();
+
+ System.Collections.Generic.ISet<string> fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>();
+ for (int i = 0; i < core.fieldInfos.Size(); i++)
+ {
+ FieldInfo fi = core.fieldInfos.FieldInfo(i);
+ if (fieldOption == IndexReader.FieldOption.ALL)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
+ {
+ fieldSet.Add(fi.name);
+ }
+ else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
+ {
+ fieldSet.Add(fi.name);
+ }
+ }
+ return fieldSet;
+ }
+
+
+ public override bool HasNorms(System.String field)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ return norms.ContainsKey(field);
+ }
+ }
+
+ // can return null if norms aren't stored
+ protected internal virtual byte[] GetNorms(System.String field)
+ {
+ lock (this)
+ {
+ Norm norm = norms[field];
+ if (norm == null)
+ return null; // not indexed, or norms not stored
+ return norm.Bytes();
+ }
+ }
+
+ // returns fake norms if norms aren't available
+ public override byte[] Norms(System.String field)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = GetNorms(field);
+ return bytes;
+ }
+ }
+
+ protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
+ {
+ Norm norm = norms[field];
+ if (norm == null)
+ // not an indexed field
+ return ;
+
+ normsDirty = true;
+ norm.CopyOnWrite()[doc] = value_Renamed; // set the value
+ }
+
+ /// <summary>Read norms into a pre-allocated array. </summary>
+ public override void Norms(System.String field, byte[] bytes, int offset)
+ {
+ lock (this)
+ {
+
+ EnsureOpen();
+ Norm norm = norms[field];
+ if (norm == null)
+ {
+ for (int i = offset; i < bytes.Length; i++)
+ {
+ bytes[i] = (byte) DefaultSimilarity.EncodeNorm(1.0f);
+ }
+ return ;
+ }
+
+ norm.Bytes(bytes, offset, MaxDoc);
+ }
+ }
+
+
+ private void OpenNorms(Directory cfsDir, int readBufferSize)
+ {
+ long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now)
+ int maxDoc = MaxDoc;
+ for (int i = 0; i < core.fieldInfos.Size(); i++)
+ {
+ FieldInfo fi = core.fieldInfos.FieldInfo(i);
+ if (norms.ContainsKey(fi.name))
+ {
+ // in case this SegmentReader is being re-opened, we might be able to
+ // reuse some norm instances and skip loading them here
+ continue;
+ }
+ if (fi.isIndexed && !fi.omitNorms)
+ {
+ Directory d = Directory();
+ System.String fileName = si.GetNormFileName(fi.number);
+ if (!si.HasSeparateNorms(fi.number))
+ {
+ d = cfsDir;
+ }
+
+ // singleNormFile means multiple norms share this file
+ bool singleNormFile = fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION);
+ IndexInput normInput = null;
+ long normSeek;
+
+ if (singleNormFile)
+ {
+ normSeek = nextNormSeek;
+ if (singleNormStream == null)
+ {
+ singleNormStream = d.OpenInput(fileName, readBufferSize);
+ singleNormRef = new Ref();
+ }
+ else
+ {
+ singleNormRef.IncRef();
+ }
+ // All norms in the .nrm file can share a single IndexInput since
+ // they are only used in a synchronized context.
+ // If this were to change in the future, a clone could be done here.
+ normInput = singleNormStream;
+ }
+ else
+ {
+ normSeek = 0;
+ normInput = d.OpenInput(fileName);
+ }
+
+ norms[fi.name] = new Norm(this, normInput, fi.number, normSeek);
+ nextNormSeek += maxDoc; // increment also if some norms are separate
+ }
+ }
+ }
+
+ public /*internal*/ virtual bool TermsIndexLoaded()
+ {
+ return core.TermsIndexIsLoaded();
+ }
+
+ // NOTE: only called from IndexWriter when a near
+ // real-time reader is opened, or applyDeletes is run,
+ // sharing a segment that's still being merged. This
+ // method is not thread safe, and relies on the
+ // synchronization in IndexWriter
+ internal virtual void LoadTermsIndex(int termsIndexDivisor)
+ {
+ core.LoadTermsIndex(si, termsIndexDivisor);
+ }
+
+ // for testing only
+ public /*internal*/ virtual bool NormsClosed()
+ {
+ if (singleNormStream != null)
+ {
+ return false;
+ }
+ return norms.Values.All(norm => norm.refCount <= 0);
+ }
+
+ // for testing only
+ public /*internal*/ virtual bool NormsClosed(System.String field)
+ {
+ return norms[field].refCount == 0;
+ }
+
+ /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
+ /// <returns> TermVectorsReader
+ /// </returns>
+ internal virtual TermVectorsReader GetTermVectorsReader()
+ {
+ TermVectorsReader tvReader = termVectorsLocal.Get();
+ if (tvReader == null)
+ {
+ TermVectorsReader orig = core.GetTermVectorsReaderOrig();
+ if (orig == null)
+ {
+ return null;
+ }
+ else
+ {
+ try
+ {
+ tvReader = (TermVectorsReader) orig.Clone();
+ }
+ catch (System.Exception)
+ {
+ return null;
+ }
+ }
+ termVectorsLocal.Set(tvReader);
+ }
+ return tvReader;
+ }
+
+ internal virtual TermVectorsReader GetTermVectorsReaderOrig()
+ {
+ return core.GetTermVectorsReaderOrig();
+ }
+
+ /// <summary>Return a term frequency vector for the specified document and field. The
+ /// vector returned contains term numbers and frequencies for all terms in
+ /// the specified field of this document, if the field had storeTermVector
+ /// flag set. If the flag was not set, the method returns null.
+ /// </summary>
+ /// <throws> IOException </throws>
+ public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field)
+ {
+ // Check if this field is invalid or has no stored term vector
+ EnsureOpen();
+ FieldInfo fi = core.fieldInfos.FieldInfo(field);
+ if (fi == null || !fi.storeTermVector)
+ return null;
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
+ if (termVectorsReader == null)
+ return null;
+
+ return termVectorsReader.Get(docNumber, field);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ FieldInfo fi = core.fieldInfos.FieldInfo(field);
+ if (fi == null || !fi.storeTermVector)
+ return;
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
+ if (termVectorsReader == null)
+ {
+ return;
+ }
+ termVectorsReader.Get(docNumber, field, mapper);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
+ if (termVectorsReader == null)
+ return ;
+
+ termVectorsReader.Get(docNumber, mapper);
+ }
+
+ /// <summary>Return an array of term frequency vectors for the specified document.
+ /// The array contains a vector for each vectorized field in the document.
+ /// Each vector vector contains term numbers and frequencies for all terms
+ /// in a given vectorized field.
+ /// If no such fields existed, the method returns null.
+ /// </summary>
+ /// <throws> IOException </throws>
+ public override ITermFreqVector[] GetTermFreqVectors(int docNumber)
+ {
+ EnsureOpen();
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
+ if (termVectorsReader == null)
+ return null;
+
+ return termVectorsReader.Get(docNumber);
+ }
+
+ /// <summary> Return the name of the segment this reader is reading.</summary>
+ public virtual string SegmentName
+ {
+ get { return core.segment; }
+ }
+
+ /// <summary> Return the SegmentInfo of the segment this reader is reading.</summary>
+ internal virtual SegmentInfo SegmentInfo
+ {
+ get { return si; }
+ set { si = value; }
+ }
+
+ internal virtual void StartCommit()
+ {
+ rollbackSegmentInfo = (SegmentInfo)si.Clone();
+ rollbackHasChanges = hasChanges;
+ rollbackDeletedDocsDirty = deletedDocsDirty;
+ rollbackNormsDirty = normsDirty;
+ rollbackPendingDeleteCount = pendingDeleteCount;
+ foreach(Norm norm in norms.Values)
+ {
+ norm.rollbackDirty = norm.dirty;
+ }
+ }
+
+ internal virtual void RollbackCommit()
+ {
+ si.Reset(rollbackSegmentInfo);
+ hasChanges = rollbackHasChanges;
+ deletedDocsDirty = rollbackDeletedDocsDirty;
+ normsDirty = rollbackNormsDirty;
+ pendingDeleteCount = rollbackPendingDeleteCount;
+ foreach(Norm norm in norms.Values)
+ {
+ norm.dirty = norm.rollbackDirty;
+ }
+ }
+
+ /// <summary>Returns the directory this index resides in. </summary>
+ public override Directory Directory()
+ {
+ // Don't ensureOpen here -- in certain cases, when a
+ // cloned/reopened reader needs to commit, it may call
+ // this method on the closed original reader
+ return core.dir;
+ }
+
+ // This is necessary so that cloned SegmentReaders (which
+ // share the underlying postings data) will map to the
+ // same entry in the FieldCache. See LUCENE-1579.
+
+ public override object FieldCacheKey
+ {
+ get { return core.freqStream; }
+ }
+
+ public override object DeletesCacheKey
+ {
+ get { return deletedDocs; }
+ }
+
+
+ public override long UniqueTermCount
+ {
+ get { return core.GetTermsReader().Size(); }
+ }
+
+ /// <summary> Lotsa tests did hacks like:<br/>
+ /// SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>
+ /// They broke. This method serves as a hack to keep hacks working
+ /// We do it with R/W access for the tests (BW compatibility)
+ /// </summary>
+ [Obsolete("Remove this when tests are fixed!")]
+ public /*internal*/ static SegmentReader GetOnlySegmentReader(Directory dir)
+ {
+ return GetOnlySegmentReader(IndexReader.Open(dir,false));
+ }
+
+ public /*internal*/ static SegmentReader GetOnlySegmentReader(IndexReader reader)
+ {
+ var onlySegmentReader = reader as SegmentReader;
+ if (onlySegmentReader != null)
+ return onlySegmentReader;
+
+ if (reader is DirectoryReader)
+ {
+ IndexReader[] subReaders = reader.GetSequentialSubReaders();
+ if (subReaders.Length != 1)
+ {
+ throw new System.ArgumentException(reader + " has " + subReaders.Length + " segments instead of exactly one");
+ }
+
+ return (SegmentReader) subReaders[0];
+ }
+
+ throw new System.ArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader");
+ }
+
+ public override int TermInfosIndexDivisor
+ {
+ get { return core.termsIndexDivisor; }
+ }
+
+ public System.Collections.Generic.IDictionary<string, Norm> norms_ForNUnit
+ {
+ get { return norms; }
+ }
+
+ public BitVector deletedDocs_ForNUnit
+ {
+ get { return deletedDocs; }
+ }
+
+ public CoreReaders core_ForNUnit
+ {
+ get { return core; }
+ }
+
+ public Ref deletedDocsRef_ForNUnit
+ {
+ get { return deletedDocsRef; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentTermDocs.cs b/src/core/Index/SegmentTermDocs.cs
new file mode 100644
index 0000000..f7efde6
--- /dev/null
+++ b/src/core/Index/SegmentTermDocs.cs
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using BitVector = Lucene.Net.Util.BitVector;
+
+namespace Lucene.Net.Index
+{
+
+ internal class SegmentTermDocs : TermDocs
+ {
+ protected internal SegmentReader parent;
+ protected internal IndexInput freqStream;
+ protected internal int count;
+ protected internal int df;
+ protected internal BitVector deletedDocs;
+ internal int doc = 0;
+ internal int freq;
+
+ private readonly int skipInterval;
+ private readonly int maxSkipLevels;
+ private DefaultSkipListReader skipListReader;
+
+ private long freqBasePointer;
+ private long proxBasePointer;
+
+ private long skipPointer;
+ private bool haveSkipped;
+
+ protected internal bool currentFieldStoresPayloads;
+ protected internal bool currentFieldOmitTermFreqAndPositions;
+
+ private bool isDisposed;
+
+ public /*protected internal*/ SegmentTermDocs(SegmentReader parent)
+ {
+ this.parent = parent;
+ this.freqStream = (IndexInput) parent.core.freqStream.Clone();
+ lock (parent)
+ {
+ this.deletedDocs = parent.deletedDocs;
+ }
+ this.skipInterval = parent.core.GetTermsReader().SkipInterval;
+ this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels;
+ }
+
+ public virtual void Seek(Term term)
+ {
+ TermInfo ti = parent.core.GetTermsReader().Get(term);
+ Seek(ti, term);
+ }
+
+ public virtual void Seek(TermEnum termEnum)
+ {
+ TermInfo ti;
+ Term term;
+
+ // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
+ if (termEnum is SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.core.fieldInfos)
+ {
+ // optimized case
+ var segmentTermEnum = ((SegmentTermEnum) termEnum);
+ term = segmentTermEnum.Term;
+ ti = segmentTermEnum.TermInfo();
+ }
+ else
+ {
+ // punt case
+ term = termEnum.Term;
+ ti = parent.core.GetTermsReader().Get(term);
+ }
+
+ Seek(ti, term);
+ }
+
+ internal virtual void Seek(TermInfo ti, Term term)
+ {
+ count = 0;
+ FieldInfo fi = parent.core.fieldInfos.FieldInfo(term.Field);
+ currentFieldOmitTermFreqAndPositions = (fi != null) && fi.omitTermFreqAndPositions;
+ currentFieldStoresPayloads = (fi != null) && fi.storePayloads;
+ if (ti == null)
+ {
+ df = 0;
+ }
+ else
+ {
+ df = ti.docFreq;
+ doc = 0;
+ freqBasePointer = ti.freqPointer;
+ proxBasePointer = ti.proxPointer;
+ skipPointer = freqBasePointer + ti.skipOffset;
+ freqStream.Seek(freqBasePointer);
+ haveSkipped = false;
+ }
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ freqStream.Dispose();
+ if (skipListReader != null)
+ skipListReader.Dispose();
+
+ isDisposed = true;
+ }
+
+ public int Doc
+ {
+ get { return doc; }
+ }
+
+ public int Freq
+ {
+ get { return freq; }
+ }
+
+ protected internal virtual void SkippingDoc()
+ {
+ }
+
+ public virtual bool Next()
+ {
+ while (true)
+ {
+ if (count == df)
+ return false;
+ int docCode = freqStream.ReadVInt();
+
+ if (currentFieldOmitTermFreqAndPositions)
+ {
+ doc += docCode;
+ freq = 1;
+ }
+ else
+ {
+ doc += Number.URShift(docCode, 1); // shift off low bit
+ if ((docCode & 1) != 0)
+ // if low bit is set
+ freq = 1;
+ // freq is one
+ else
+ freq = freqStream.ReadVInt(); // else read freq
+ }
+
+ count++;
+
+ if (deletedDocs == null || !deletedDocs.Get(doc))
+ break;
+ SkippingDoc();
+ }
+ return true;
+ }
+
+ /// <summary>Optimized implementation. </summary>
+ public virtual int Read(int[] docs, int[] freqs)
+ {
+ int length = docs.Length;
+ if (currentFieldOmitTermFreqAndPositions)
+ {
+ return ReadNoTf(docs, freqs, length);
+ }
+ else
+ {
+ int i = 0;
+ while (i < length && count < df)
+ {
+ // manually inlined call to next() for speed
+ int docCode = freqStream.ReadVInt();
+ doc += Number.URShift(docCode, 1); // shift off low bit
+ if ((docCode & 1) != 0)
+ // if low bit is set
+ freq = 1;
+ // freq is one
+ else
+ freq = freqStream.ReadVInt(); // else read freq
+ count++;
+
+ if (deletedDocs == null || !deletedDocs.Get(doc))
+ {
+ docs[i] = doc;
+ freqs[i] = freq;
+ ++i;
+ }
+ }
+ return i;
+ }
+ }
+
+ private int ReadNoTf(int[] docs, int[] freqs, int length)
+ {
+ int i = 0;
+ while (i < length && count < df)
+ {
+ // manually inlined call to next() for speed
+ doc += freqStream.ReadVInt();
+ count++;
+
+ if (deletedDocs == null || !deletedDocs.Get(doc))
+ {
+ docs[i] = doc;
+ // Hardware freq to 1 when term freqs were not
+ // stored in the index
+ freqs[i] = 1;
+ ++i;
+ }
+ }
+ return i;
+ }
+
+
+ /// <summary>Overridden by SegmentTermPositions to skip in prox stream. </summary>
+ protected internal virtual void SkipProx(long proxPointer, int payloadLength)
+ {
+ }
+
+ /// <summary>Optimized implementation. </summary>
+ public virtual bool SkipTo(int target)
+ {
+ if (df >= skipInterval)
+ {
+ // optimized case
+ if (skipListReader == null)
+ skipListReader = new DefaultSkipListReader((IndexInput) freqStream.Clone(), maxSkipLevels, skipInterval); // lazily clone
+
+ if (!haveSkipped)
+ {
+ // lazily initialize skip stream
+ skipListReader.Init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
+ haveSkipped = true;
+ }
+
+ int newCount = skipListReader.SkipTo(target);
+ if (newCount > count)
+ {
+ freqStream.Seek(skipListReader.GetFreqPointer());
+ SkipProx(skipListReader.GetProxPointer(), skipListReader.GetPayloadLength());
+
+ doc = skipListReader.GetDoc();
+ count = newCount;
+ }
+ }
+
+ // done skipping, now just scan
+ do
+ {
+ if (!Next())
+ return false;
+ }
+ while (target > doc);
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentTermEnum.cs b/src/core/Index/SegmentTermEnum.cs
new file mode 100644
index 0000000..77224df
--- /dev/null
+++ b/src/core/Index/SegmentTermEnum.cs
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+ internal sealed class SegmentTermEnum : TermEnum, System.ICloneable
+ {
+ private IndexInput input;
+ internal FieldInfos fieldInfos;
+ internal long size;
+ internal long position = - 1;
+
+ private TermBuffer termBuffer = new TermBuffer();
+ private TermBuffer prevBuffer = new TermBuffer();
+ private TermBuffer scanBuffer = new TermBuffer(); // used for scanning
+
+ private TermInfo termInfo = new TermInfo();
+
+ private readonly int format;
+ private readonly bool isIndex = false;
+ internal long indexPointer = 0;
+ internal int indexInterval;
+ internal int skipInterval;
+ internal int maxSkipLevels;
+ private readonly int formatM1SkipInterval;
+
+ internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
+ {
+ input = i;
+ fieldInfos = fis;
+ isIndex = isi;
+ maxSkipLevels = 1; // use single-level skip lists for formats > -3
+
+ int firstInt = input.ReadInt();
+ if (firstInt >= 0)
+ {
+ // original-format file, without explicit format version number
+ format = 0;
+ size = firstInt;
+
+ // back-compatible settings
+ indexInterval = 128;
+ skipInterval = System.Int32.MaxValue; // switch off skipTo optimization
+ }
+ else
+ {
+ // we have a format version number
+ format = firstInt;
+
+ // check that it is a format we can understand
+ if (format < TermInfosWriter.FORMAT_CURRENT)
+ throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");
+
+ size = input.ReadLong(); // read the size
+
+ if (format == - 1)
+ {
+ if (!isIndex)
+ {
+ indexInterval = input.ReadInt();
+ formatM1SkipInterval = input.ReadInt();
+ }
+ // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
+ // skipTo implementation of these versions
+ skipInterval = System.Int32.MaxValue;
+ }
+ else
+ {
+ indexInterval = input.ReadInt();
+ skipInterval = input.ReadInt();
+ if (format <= TermInfosWriter.FORMAT)
+ {
+ // this new format introduces multi-level skipping
+ maxSkipLevels = input.ReadInt();
+ }
+ }
+ System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0");
+ System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0");
+ }
+ if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+ {
+ termBuffer.SetPreUTF8Strings();
+ scanBuffer.SetPreUTF8Strings();
+ prevBuffer.SetPreUTF8Strings();
+ }
+ }
+
+ public System.Object Clone()
+ {
+ SegmentTermEnum clone = null;
+ try
+ {
+ clone = (SegmentTermEnum) base.MemberwiseClone();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ clone.input = (IndexInput) input.Clone();
+ clone.termInfo = new TermInfo(termInfo);
+
+ clone.termBuffer = (TermBuffer) termBuffer.Clone();
+ clone.prevBuffer = (TermBuffer) prevBuffer.Clone();
+ clone.scanBuffer = new TermBuffer();
+
+ return clone;
+ }
+
+ internal void Seek(long pointer, long p, Term t, TermInfo ti)
+ {
+ input.Seek(pointer);
+ position = p;
+ termBuffer.Set(t);
+ prevBuffer.Reset();
+ termInfo.Set(ti);
+ }
+
+ /// <summary>Increments the enumeration to the next element. True if one exists.</summary>
+ public override bool Next()
+ {
+ if (position++ >= size - 1)
+ {
+ prevBuffer.Set(termBuffer);
+ termBuffer.Reset();
+ return false;
+ }
+
+ prevBuffer.Set(termBuffer);
+ termBuffer.Read(input, fieldInfos);
+
+ termInfo.docFreq = input.ReadVInt(); // read doc freq
+ termInfo.freqPointer += input.ReadVLong(); // read freq pointer
+ termInfo.proxPointer += input.ReadVLong(); // read prox pointer
+
+ if (format == - 1)
+ {
+ // just read skipOffset in order to increment file pointer;
+ // value is never used since skipTo is switched off
+ if (!isIndex)
+ {
+ if (termInfo.docFreq > formatM1SkipInterval)
+ {
+ termInfo.skipOffset = input.ReadVInt();
+ }
+ }
+ }
+ else
+ {
+ if (termInfo.docFreq >= skipInterval)
+ termInfo.skipOffset = input.ReadVInt();
+ }
+
+ if (isIndex)
+ indexPointer += input.ReadVLong(); // read index pointer
+
+ return true;
+ }
+
+ /// <summary>Optimized scan, without allocating new terms.
+ /// Return number of invocations to next().
+ /// </summary>
+ internal int ScanTo(Term term)
+ {
+ scanBuffer.Set(term);
+ int count = 0;
+ while (scanBuffer.CompareTo(termBuffer) > 0 && Next())
+ {
+ count++;
+ }
+ return count;
+ }
+
+ /// <summary>Returns the current Term in the enumeration.
+ /// Initially invalid, valid after next() called for the first time.
+ /// </summary>
+ public override Term Term
+ {
+ get { return termBuffer.ToTerm(); }
+ }
+
+ /// <summary>Returns the previous Term enumerated. Initially null.</summary>
+ public /*internal*/ Term Prev()
+ {
+ return prevBuffer.ToTerm();
+ }
+
+ /// <summary>Returns the current TermInfo in the enumeration.
+ /// Initially invalid, valid after next() called for the first time.
+ /// </summary>
+ internal TermInfo TermInfo()
+ {
+ return new TermInfo(termInfo);
+ }
+
+ /// <summary>Sets the argument to the current TermInfo in the enumeration.
+ /// Initially invalid, valid after next() called for the first time.
+ /// </summary>
+ internal void TermInfo(TermInfo ti)
+ {
+ ti.Set(termInfo);
+ }
+
+ /// <summary>Returns the docFreq from the current TermInfo in the enumeration.
+ /// Initially invalid, valid after next() called for the first time.
+ /// </summary>
+ public override int DocFreq()
+ {
+ return termInfo.docFreq;
+ }
+
+ /* Returns the freqPointer from the current TermInfo in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ internal long FreqPointer()
+ {
+ return termInfo.freqPointer;
+ }
+
+ /* Returns the proxPointer from the current TermInfo in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ internal long ProxPointer()
+ {
+ return termInfo.proxPointer;
+ }
+
+ /// <summary>Closes the enumeration to further activity, freeing resources. </summary>
+ protected override void Dispose(bool disposing)
+ {
+ input.Dispose();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentTermPositionVector.cs b/src/core/Index/SegmentTermPositionVector.cs
new file mode 100644
index 0000000..b430419
--- /dev/null
+++ b/src/core/Index/SegmentTermPositionVector.cs
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ class SegmentTermPositionVector:SegmentTermVector, TermPositionVector
+ {
+ protected internal int[][] positions;
+ protected internal TermVectorOffsetInfo[][] offsets;
+ public static readonly int[] EMPTY_TERM_POS = new int[0];
+
+ public SegmentTermPositionVector(System.String field, System.String[] terms, int[] termFreqs, int[][] positions, TermVectorOffsetInfo[][] offsets):base(field, terms, termFreqs)
+ {
+ this.offsets = offsets;
+ this.positions = positions;
+ }
+
+ /// <summary> Returns an array of TermVectorOffsetInfo in which the term is found.
+ ///
+ /// </summary>
+ /// <param name="index">The position in the array to get the offsets from
+ /// </param>
+ /// <returns> An array of TermVectorOffsetInfo objects or the empty list
+ /// </returns>
+ /// <seealso cref="Lucene.Net.Analysis.Token">
+ /// </seealso>
+ public virtual TermVectorOffsetInfo[] GetOffsets(int index)
+ {
+ TermVectorOffsetInfo[] result = TermVectorOffsetInfo.EMPTY_OFFSET_INFO;
+ if (offsets == null)
+ return null;
+ if (index >= 0 && index < offsets.Length)
+ {
+ result = offsets[index];
+ }
+ return result;
+ }
+
+ /// <summary> Returns an array of positions in which the term is found.
+ /// Terms are identified by the index at which its number appears in the
+ /// term String array obtained from the <c>indexOf</c> method.
+ /// </summary>
+ public virtual int[] GetTermPositions(int index)
+ {
+ int[] result = EMPTY_TERM_POS;
+ if (positions == null)
+ return null;
+ if (index >= 0 && index < positions.Length)
+ {
+ result = positions[index];
+ }
+
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentTermPositions.cs b/src/core/Index/SegmentTermPositions.cs
new file mode 100644
index 0000000..7c5856c
--- /dev/null
+++ b/src/core/Index/SegmentTermPositions.cs
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+ internal sealed class SegmentTermPositions : SegmentTermDocs, TermPositions
+ {
+ private IndexInput proxStream;
+ private int proxCount;
+ private int position;
+
+ // the current payload length
+ private int payloadLength;
+ // indicates whether the payload of the currend position has
+ // been read from the proxStream yet
+ private bool needToLoadPayload;
+
+ // these variables are being used to remember information
+ // for a lazy skip
+ private long lazySkipPointer = - 1;
+ private int lazySkipProxCount = 0;
+
+ internal SegmentTermPositions(SegmentReader p):base(p)
+ {
+ this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time
+ }
+
+ internal override void Seek(TermInfo ti, Term term)
+ {
+ base.Seek(ti, term);
+ if (ti != null)
+ lazySkipPointer = ti.proxPointer;
+
+ lazySkipProxCount = 0;
+ proxCount = 0;
+ payloadLength = 0;
+ needToLoadPayload = false;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ base.Dispose(disposing);
+ if (proxStream != null)
+ proxStream.Dispose();
+ }
+
+ public int NextPosition()
+ {
+ if (currentFieldOmitTermFreqAndPositions)
+ // This field does not store term freq, positions, payloads
+ return 0;
+ // perform lazy skips if neccessary
+ LazySkip();
+ proxCount--;
+ return position += ReadDeltaPosition();
+ }
+
+ private int ReadDeltaPosition()
+ {
+ int delta = proxStream.ReadVInt();
+ if (currentFieldStoresPayloads)
+ {
+ // if the current field stores payloads then
+ // the position delta is shifted one bit to the left.
+ // if the LSB is set, then we have to read the current
+ // payload length
+ if ((delta & 1) != 0)
+ {
+ payloadLength = proxStream.ReadVInt();
+ }
+ delta = Number.URShift(delta, 1);
+ needToLoadPayload = true;
+ }
+ return delta;
+ }
+
+ protected internal override void SkippingDoc()
+ {
+ // we remember to skip a document lazily
+ lazySkipProxCount += freq;
+ }
+
+ public override bool Next()
+ {
+ // we remember to skip the remaining positions of the current
+ // document lazily
+ lazySkipProxCount += proxCount;
+
+ if (base.Next())
+ {
+ // run super
+ proxCount = freq; // note frequency
+ position = 0; // reset position
+ return true;
+ }
+ return false;
+ }
+
+ public override int Read(int[] docs, int[] freqs)
+ {
+ throw new System.NotSupportedException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
+ }
+
+
+ /// <summary>Called by super.skipTo(). </summary>
+ protected internal override void SkipProx(long proxPointer, int payloadLength)
+ {
+ // we save the pointer, we might have to skip there lazily
+ lazySkipPointer = proxPointer;
+ lazySkipProxCount = 0;
+ proxCount = 0;
+ this.payloadLength = payloadLength;
+ needToLoadPayload = false;
+ }
+
+ private void SkipPositions(int n)
+ {
+ System.Diagnostics.Debug.Assert(!currentFieldOmitTermFreqAndPositions);
+ for (int f = n; f > 0; f--)
+ {
+ // skip unread positions
+ ReadDeltaPosition();
+ SkipPayload();
+ }
+ }
+
+ private void SkipPayload()
+ {
+ if (needToLoadPayload && payloadLength > 0)
+ {
+ proxStream.Seek(proxStream.FilePointer + payloadLength);
+ }
+ needToLoadPayload = false;
+ }
+
+ // It is not always neccessary to move the prox pointer
+ // to a new document after the freq pointer has been moved.
+ // Consider for example a phrase query with two terms:
+ // the freq pointer for term 1 has to move to document x
+ // to answer the question if the term occurs in that document. But
+ // only if term 2 also matches document x, the positions have to be
+ // read to figure out if term 1 and term 2 appear next
+ // to each other in document x and thus satisfy the query.
+ // So we move the prox pointer lazily to the document
+ // as soon as positions are requested.
+ private void LazySkip()
+ {
+ if (proxStream == null)
+ {
+ // clone lazily
+ proxStream = (IndexInput) parent.core.proxStream.Clone();
+ }
+
+ // we might have to skip the current payload
+ // if it was not read yet
+ SkipPayload();
+
+ if (lazySkipPointer != - 1)
+ {
+ proxStream.Seek(lazySkipPointer);
+ lazySkipPointer = - 1;
+ }
+
+ if (lazySkipProxCount != 0)
+ {
+ SkipPositions(lazySkipProxCount);
+ lazySkipProxCount = 0;
+ }
+ }
+
+ public int PayloadLength
+ {
+ get { return payloadLength; }
+ }
+
+ public byte[] GetPayload(byte[] data, int offset)
+ {
+ if (!needToLoadPayload)
+ {
+ throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+ }
+
+ // read payloads lazily
+ byte[] retArray;
+ int retOffset;
+ if (data == null || data.Length - offset < payloadLength)
+ {
+ // the array is too small to store the payload data,
+ // so we allocate a new one
+ retArray = new byte[payloadLength];
+ retOffset = 0;
+ }
+ else
+ {
+ retArray = data;
+ retOffset = offset;
+ }
+ proxStream.ReadBytes(retArray, retOffset, payloadLength);
+ needToLoadPayload = false;
+ return retArray;
+ }
+
+ public bool IsPayloadAvailable
+ {
+ get { return needToLoadPayload && payloadLength > 0; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentTermVector.cs b/src/core/Index/SegmentTermVector.cs
new file mode 100644
index 0000000..35bcc95
--- /dev/null
+++ b/src/core/Index/SegmentTermVector.cs
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+
+ class SegmentTermVector : ITermFreqVector
+ {
+ private System.String field;
+ private System.String[] terms;
+ private int[] termFreqs;
+
+ internal SegmentTermVector(System.String field, System.String[] terms, int[] termFreqs)
+ {
+ this.field = field;
+ this.terms = terms;
+ this.termFreqs = termFreqs;
+ }
+
+ /// <summary> </summary>
+ /// <value> The number of the field this vector is associated with </value>
+ public virtual string Field
+ {
+ get { return field; }
+ }
+
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder();
+ sb.Append('{');
+ sb.Append(field).Append(": ");
+ if (terms != null)
+ {
+ for (int i = 0; i < terms.Length; i++)
+ {
+ if (i > 0)
+ sb.Append(", ");
+ sb.Append(terms[i]).Append('/').Append(termFreqs[i]);
+ }
+ }
+ sb.Append('}');
+
+ return sb.ToString();
+ }
+
+ public virtual int Size
+ {
+ get { return terms == null ? 0 : terms.Length; }
+ }
+
+ public virtual System.String[] GetTerms()
+ {
+ return terms;
+ }
+
+ public virtual int[] GetTermFrequencies()
+ {
+ return termFreqs;
+ }
+
+ public virtual int IndexOf(System.String termText)
+ {
+ if (terms == null)
+ return - 1;
+ int res = System.Array.BinarySearch(terms, termText, System.StringComparer.Ordinal);
+ return res >= 0?res:- 1;
+ }
+
+ public virtual int[] IndexesOf(System.String[] termNumbers, int start, int len)
+ {
+ // TODO: there must be a more efficient way of doing this.
+ // At least, we could advance the lower bound of the terms array
+ // as we find valid indexes. Also, it might be possible to leverage
+ // this even more by starting in the middle of the termNumbers array
+ // and thus dividing the terms array maybe in half with each found index.
+ int[] res = new int[len];
+
+ for (int i = 0; i < len; i++)
+ {
+ res[i] = IndexOf(termNumbers[start + i]);
+ }
+ return res;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SegmentWriteState.cs b/src/core/Index/SegmentWriteState.cs
new file mode 100644
index 0000000..09db9e1
--- /dev/null
+++ b/src/core/Index/SegmentWriteState.cs
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ class SegmentWriteState
+ {
+ internal DocumentsWriter docWriter;
+ internal Directory directory;
+ internal System.String segmentName;
+ internal System.String docStoreSegmentName;
+ internal int numDocs;
+ internal int termIndexInterval;
+ internal int numDocsInStore;
+ internal System.Collections.Generic.ICollection<string> flushedFiles;
+
+ public SegmentWriteState(DocumentsWriter docWriter, Directory directory, System.String segmentName, System.String docStoreSegmentName, int numDocs, int numDocsInStore, int termIndexInterval)
+ {
+ this.docWriter = docWriter;
+ this.directory = directory;
+ this.segmentName = segmentName;
+ this.docStoreSegmentName = docStoreSegmentName;
+ this.numDocs = numDocs;
+ this.numDocsInStore = numDocsInStore;
+ this.termIndexInterval = termIndexInterval;
+ flushedFiles = new System.Collections.Generic.HashSet<string>();
+ }
+
+ public virtual System.String SegmentFileName(System.String ext)
+ {
+ return segmentName + "." + ext;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SerialMergeScheduler.cs b/src/core/Index/SerialMergeScheduler.cs
new file mode 100644
index 0000000..867ee8f
--- /dev/null
+++ b/src/core/Index/SerialMergeScheduler.cs
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>A <see cref="MergeScheduler" /> that simply does each merge
+ /// sequentially, using the current thread.
+ /// </summary>
+ public class SerialMergeScheduler:MergeScheduler
+ {
+
+ /// <summary>Just do the merges in sequence. We do this
+ /// "synchronized" so that even if the application is using
+ /// multiple threads, only one merge may run at a time.
+ /// </summary>
+ public override void Merge(IndexWriter writer)
+ {
+ lock (this)
+ {
+ while (true)
+ {
+ MergePolicy.OneMerge merge = writer.GetNextMerge();
+ if (merge == null)
+ break;
+ writer.Merge(merge);
+ }
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SnapshotDeletionPolicy.cs b/src/core/Index/SnapshotDeletionPolicy.cs
new file mode 100644
index 0000000..030b6b0
--- /dev/null
+++ b/src/core/Index/SnapshotDeletionPolicy.cs
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>A <see cref="IndexDeletionPolicy" /> that wraps around any other
+ /// <see cref="IndexDeletionPolicy" /> and adds the ability to hold and
+ /// later release a single "snapshot" of an index. While
+ /// the snapshot is held, the <see cref="IndexWriter" /> will not
+ /// remove any files associated with it even if the index is
+ /// otherwise being actively, arbitrarily changed. Because
+ /// we wrap another arbitrary <see cref="IndexDeletionPolicy" />, this
+ /// gives you the freedom to continue using whatever <see cref="IndexDeletionPolicy" />
+ /// you would normally want to use with your
+ /// index. Note that you can re-use a single instance of
+ /// SnapshotDeletionPolicy across multiple writers as long
+ /// as they are against the same index Directory. Any
+ /// snapshot held when a writer is closed will "survive"
+ /// when the next writer is opened.
+ ///
+ /// <p/><b>WARNING</b>: This API is a new and experimental and
+ /// may suddenly change.<p/>
+ /// </summary>
+
+ public class SnapshotDeletionPolicy : IndexDeletionPolicy
+ {
+
+ private IndexCommit lastCommit;
+ private IndexDeletionPolicy primary;
+ private System.String snapshot;
+
+ public SnapshotDeletionPolicy(IndexDeletionPolicy primary)
+ {
+ this.primary = primary;
+ }
+
+ public virtual void OnInit<T>(IList<T> commits) where T : IndexCommit
+ {
+ lock (this)
+ {
+ primary.OnInit(WrapCommits(commits));
+ lastCommit = commits[commits.Count - 1];
+ }
+ }
+
+ public virtual void OnCommit<T>(IList<T> commits) where T : IndexCommit
+ {
+ lock (this)
+ {
+ primary.OnCommit(WrapCommits(commits));
+ lastCommit = commits[commits.Count - 1];
+ }
+ }
+
+ /// <summary>Take a snapshot of the most recent commit to the
+ /// index. You must call release() to free this snapshot.
+ /// Note that while the snapshot is held, the files it
+ /// references will not be deleted, which will consume
+ /// additional disk space in your index. If you take a
+ /// snapshot at a particularly bad time (say just before
+ /// you call optimize()) then in the worst case this could
+ /// consume an extra 1X of your total index size, until
+ /// you release the snapshot.
+ /// </summary>
+ public virtual IndexCommit Snapshot()
+ {
+ lock (this)
+ {
+ if (lastCommit == null)
+ {
+ throw new System.SystemException("no index commits to snapshot !");
+ }
+
+ if (snapshot == null)
+ snapshot = lastCommit.SegmentsFileName;
+ else
+ throw new System.SystemException("snapshot is already set; please call release() first");
+ return lastCommit;
+ }
+ }
+
+ /// <summary>Release the currently held snapshot. </summary>
+ public virtual void Release()
+ {
+ lock (this)
+ {
+ if (snapshot != null)
+ snapshot = null;
+ else
+ throw new System.SystemException("snapshot was not set; please call snapshot() first");
+ }
+ }
+
+ private class MyCommitPoint : IndexCommit
+ {
+ private void InitBlock(SnapshotDeletionPolicy enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SnapshotDeletionPolicy enclosingInstance;
+ public SnapshotDeletionPolicy Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal IndexCommit cp;
+ internal MyCommitPoint(SnapshotDeletionPolicy enclosingInstance, IndexCommit cp)
+ {
+ InitBlock(enclosingInstance);
+ this.cp = cp;
+ }
+
+ public override string ToString()
+ {
+ return "SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp + ")";
+ }
+
+ public override string SegmentsFileName
+ {
+ get { return cp.SegmentsFileName; }
+ }
+
+ public override ICollection<string> FileNames
+ {
+ get { return cp.FileNames; }
+ }
+
+ public override Directory Directory
+ {
+ get { return cp.Directory; }
+ }
+
+ public override void Delete()
+ {
+ lock (Enclosing_Instance)
+ {
+ // Suppress the delete request if this commit point is
+ // our current snapshot.
+ if (Enclosing_Instance.snapshot == null || !Enclosing_Instance.snapshot.Equals(SegmentsFileName))
+ cp.Delete();
+ }
+ }
+
+ public override bool IsDeleted
+ {
+ get { return cp.IsDeleted; }
+ }
+
+ public override long Version
+ {
+ get { return cp.Version; }
+ }
+
+ public override long Generation
+ {
+ get { return cp.Generation; }
+ }
+
+ public override IDictionary<string, string> UserData
+ {
+ get { return cp.UserData; }
+ }
+
+ public override bool IsOptimized
+ {
+ get { return cp.IsOptimized; }
+ }
+ }
+
+ private IList<IndexCommit> WrapCommits<T>(IList<T> commits) where T : IndexCommit
+ {
+ int count = commits.Count;
+ var myCommits = new List<IndexCommit>(count);
+ for (int i = 0; i < count; i++)
+ {
+ myCommits.Add(new MyCommitPoint(this, commits[i]));
+ }
+ return myCommits;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/SortedTermVectorMapper.cs b/src/core/Index/SortedTermVectorMapper.cs
new file mode 100644
index 0000000..3d00b37
--- /dev/null
+++ b/src/core/Index/SortedTermVectorMapper.cs
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Store a sorted collection of <see cref="Lucene.Net.Index.TermVectorEntry" />s. Collects all term information
+ /// into a single, SortedSet.
+ /// <br/>
+ /// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not
+ /// know what Fields they correlate with.
+ /// <br/>
+ /// This is not thread-safe
+ /// </summary>
+ public class SortedTermVectorMapper:TermVectorMapper
+ {
+ private SortedSet<TermVectorEntry> currentSet;
+ private IDictionary<string, TermVectorEntry> termToTVE = new HashMap<string, TermVectorEntry>();
+ private bool storeOffsets;
+ private bool storePositions;
+ /// <summary> Stand-in name for the field in <see cref="TermVectorEntry" />.</summary>
+ public const System.String ALL = "_ALL_";
+
+ /// <summary> </summary>
+ /// <param name="comparator">A Comparator for sorting <see cref="TermVectorEntry" />s
+ /// </param>
+ public SortedTermVectorMapper(IComparer<TermVectorEntry> comparator)
+ : this(false, false, comparator)
+ {
+ }
+
+
+ public SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer<TermVectorEntry> comparator)
+ : base(ignoringPositions, ignoringOffsets)
+ {
+ currentSet = new SortedSet<TermVectorEntry>(comparator);
+ }
+
+ /// <summary> </summary>
+ /// <param name="term">The term to map
+ /// </param>
+ /// <param name="frequency">The frequency of the term
+ /// </param>
+ /// <param name="offsets">Offset information, may be null
+ /// </param>
+ /// <param name="positions">Position information, may be null
+ /// </param>
+ //We need to combine any previous mentions of the term
+ public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+ {
+ TermVectorEntry entry = termToTVE[term];
+ if (entry == null)
+ {
+ entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true?offsets:null, storePositions == true?positions:null);
+ termToTVE[term] = entry;
+ currentSet.Add(entry);
+ }
+ else
+ {
+ entry.Frequency = entry.Frequency + frequency;
+ if (storeOffsets)
+ {
+ TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets();
+ //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions
+ if (existingOffsets != null && offsets != null && offsets.Length > 0)
+ {
+ //copy over the existing offsets
+ TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length];
+ Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length);
+ Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length);
+ entry.SetOffsets(newOffsets);
+ }
+ else if (existingOffsets == null && offsets != null && offsets.Length > 0)
+ {
+ entry.SetOffsets(offsets);
+ }
+ //else leave it alone
+ }
+ if (storePositions)
+ {
+ int[] existingPositions = entry.GetPositions();
+ if (existingPositions != null && positions != null && positions.Length > 0)
+ {
+ int[] newPositions = new int[existingPositions.Length + positions.Length];
+ Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length);
+ Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length);
+ entry.SetPositions(newPositions);
+ }
+ else if (existingPositions == null && positions != null && positions.Length > 0)
+ {
+ entry.SetPositions(positions);
+ }
+ }
+ }
+ }
+
+ public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
+ {
+
+ this.storeOffsets = storeOffsets;
+ this.storePositions = storePositions;
+ }
+
+ /// <summary> The TermVectorEntrySet. A SortedSet of <see cref="TermVectorEntry" /> objects. Sort is by the comparator passed into the constructor.
+ /// <br/>
+ /// This set will be empty until after the mapping process takes place.
+ ///
+ /// </summary>
+ /// <value> The SortedSet of &lt;see cref=&quot;TermVectorEntry&quot; /&gt;. </value>
+ public virtual SortedSet<TermVectorEntry> TermVectorEntrySet
+ {
+ get { return currentSet; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/StaleReaderException.cs b/src/core/Index/StaleReaderException.cs
new file mode 100644
index 0000000..271070f
--- /dev/null
+++ b/src/core/Index/StaleReaderException.cs
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> This exception is thrown when an <see cref="IndexReader" />
+ /// tries to make changes to the index (via <see cref="IndexReader.DeleteDocument" />
+ ///, <see cref="IndexReader.UndeleteAll" />
+ /// or <see cref="IndexReader.SetNorm(int,string,float)" />)
+ /// but changes have already been committed to the index
+ /// since this reader was instantiated. When this happens
+ /// you must open a new reader on the current index to make
+ /// the changes.
+ /// </summary>
+ [Serializable]
+ public class StaleReaderException:System.IO.IOException
+ {
+ public StaleReaderException(System.String message):base(message)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/StoredFieldsWriter.cs b/src/core/Index/StoredFieldsWriter.cs
new file mode 100644
index 0000000..c4548b4
--- /dev/null
+++ b/src/core/Index/StoredFieldsWriter.cs
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This is a DocFieldConsumer that writes stored fields. </summary>
+ sealed class StoredFieldsWriter
+ {
+ private void InitBlock()
+ {
+ docFreeList = new PerDoc[1];
+ }
+
+ internal FieldsWriter fieldsWriter;
+ internal DocumentsWriter docWriter;
+ internal FieldInfos fieldInfos;
+ internal int lastDocID;
+
+ internal PerDoc[] docFreeList;
+ internal int freeCount;
+
+ public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos)
+ {
+ InitBlock();
+ this.docWriter = docWriter;
+ this.fieldInfos = fieldInfos;
+ }
+
+ public StoredFieldsWriterPerThread AddThread(DocumentsWriter.DocState docState)
+ {
+ return new StoredFieldsWriterPerThread(docState, this);
+ }
+
+ public void Flush(SegmentWriteState state)
+ {
+ lock (this)
+ {
+
+ if (state.numDocsInStore > 0)
+ {
+ // It's possible that all documents seen in this segment
+ // hit non-aborting exceptions, in which case we will
+ // not have yet init'd the FieldsWriter:
+ InitFieldsWriter();
+
+ // Fill fdx file to include any final docs that we
+ // skipped because they hit non-aborting exceptions
+ Fill(state.numDocsInStore - docWriter.DocStoreOffset);
+ }
+
+ if (fieldsWriter != null)
+ fieldsWriter.Flush();
+ }
+ }
+
+ private void InitFieldsWriter()
+ {
+ if (fieldsWriter == null)
+ {
+ System.String docStoreSegment = docWriter.DocStoreSegment;
+ if (docStoreSegment != null)
+ {
+ System.Diagnostics.Debug.Assert(docStoreSegment != null);
+ fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos);
+ docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION);
+ docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+ lastDocID = 0;
+ }
+ }
+ }
+
+ public void CloseDocStore(SegmentWriteState state)
+ {
+ lock (this)
+ {
+ int inc = state.numDocsInStore - lastDocID;
+ if (inc > 0)
+ {
+ InitFieldsWriter();
+ Fill(state.numDocsInStore - docWriter.DocStoreOffset);
+ }
+
+ if (fieldsWriter != null)
+ {
+ fieldsWriter.Dispose();
+ fieldsWriter = null;
+ lastDocID = 0;
+ System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
+ state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
+ state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+
+ state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
+ state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+
+ System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
+
+ if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName))
+ throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
+ }
+ }
+ }
+
+ internal int allocCount;
+
+ internal PerDoc GetPerDoc()
+ {
+ lock (this)
+ {
+ if (freeCount == 0)
+ {
+ allocCount++;
+ if (allocCount > docFreeList.Length)
+ {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
+ docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
+ }
+ return new PerDoc(this);
+ }
+ else
+ return docFreeList[--freeCount];
+ }
+ }
+
+ internal void Abort()
+ {
+ lock (this)
+ {
+ if (fieldsWriter != null)
+ {
+ try
+ {
+ fieldsWriter.Dispose();
+ }
+ catch (System.Exception)
+ {
+ }
+ fieldsWriter = null;
+ lastDocID = 0;
+ }
+ }
+ }
+
+ /// <summary>Fills in any hole in the docIDs </summary>
+ internal void Fill(int docID)
+ {
+ int docStoreOffset = docWriter.DocStoreOffset;
+
+ // We must "catch up" for all docs before us
+ // that had no stored fields:
+ int end = docID + docStoreOffset;
+ while (lastDocID < end)
+ {
+ fieldsWriter.SkipDocument();
+ lastDocID++;
+ }
+ }
+
+ internal void FinishDocument(PerDoc perDoc)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument start"));
+ InitFieldsWriter();
+
+ Fill(perDoc.docID);
+
+ // Append stored fields to the real FieldsWriter:
+ fieldsWriter.FlushDocument(perDoc.numStoredFields, perDoc.fdt);
+ lastDocID++;
+ perDoc.Reset();
+ Free(perDoc);
+ System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument end"));
+ }
+ }
+
+ public bool FreeRAM()
+ {
+ return false;
+ }
+
+ internal void Free(PerDoc perDoc)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
+ System.Diagnostics.Debug.Assert(0 == perDoc.numStoredFields);
+ System.Diagnostics.Debug.Assert(0 == perDoc.fdt.Length);
+ System.Diagnostics.Debug.Assert(0 == perDoc.fdt.FilePointer);
+ docFreeList[freeCount++] = perDoc;
+ }
+ }
+
+ internal class PerDoc:DocumentsWriter.DocWriter
+ {
+ public PerDoc(StoredFieldsWriter enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(StoredFieldsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ buffer = enclosingInstance.docWriter.NewPerDocBuffer();
+ fdt = new RAMOutputStream(buffer);
+ }
+ private StoredFieldsWriter enclosingInstance;
+ public StoredFieldsWriter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal DocumentsWriter.PerDocBuffer buffer ;
+ internal RAMOutputStream fdt;
+ internal int numStoredFields;
+
+ internal void Reset()
+ {
+ fdt.Reset();
+ buffer.Recycle();
+ numStoredFields = 0;
+ }
+
+ public override void Abort()
+ {
+ Reset();
+ Enclosing_Instance.Free(this);
+ }
+
+ public override long SizeInBytes()
+ {
+ return buffer.SizeInBytes;
+ }
+
+ public override void Finish()
+ {
+ Enclosing_Instance.FinishDocument(this);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/StoredFieldsWriterPerThread.cs b/src/core/Index/StoredFieldsWriterPerThread.cs
new file mode 100644
index 0000000..1784125
--- /dev/null
+++ b/src/core/Index/StoredFieldsWriterPerThread.cs
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class StoredFieldsWriterPerThread
+ {
+
+ internal FieldsWriter localFieldsWriter;
+ internal StoredFieldsWriter storedFieldsWriter;
+ internal DocumentsWriter.DocState docState;
+
+ internal StoredFieldsWriter.PerDoc doc;
+
+ public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter)
+ {
+ this.storedFieldsWriter = storedFieldsWriter;
+ this.docState = docState;
+ localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos);
+ }
+
+ public void StartDocument()
+ {
+ if (doc != null)
+ {
+ // Only happens if previous document hit non-aborting
+ // exception while writing stored fields into
+ // localFieldsWriter:
+ doc.Reset();
+ doc.docID = docState.docID;
+ }
+ }
+
+ public void AddField(IFieldable field, FieldInfo fieldInfo)
+ {
+ if (doc == null)
+ {
+ doc = storedFieldsWriter.GetPerDoc();
+ doc.docID = docState.docID;
+ localFieldsWriter.SetFieldsStream(doc.fdt);
+ System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
+ System.Diagnostics.Debug.Assert(0 == doc.fdt.Length);
+ System.Diagnostics.Debug.Assert(0 == doc.fdt.FilePointer);
+ }
+
+ localFieldsWriter.WriteField(fieldInfo, field);
+ System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
+ doc.numStoredFields++;
+ }
+
+ public DocumentsWriter.DocWriter FinishDocument()
+ {
+ // If there were any stored fields in this doc, doc will
+ // be non-null; else it's null.
+ try
+ {
+ return doc;
+ }
+ finally
+ {
+ doc = null;
+ }
+ }
+
+ public void Abort()
+ {
+ if (doc != null)
+ {
+ doc.Abort();
+ doc = null;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/Term.cs b/src/core/Index/Term.cs
new file mode 100644
index 0000000..cac6b15
--- /dev/null
+++ b/src/core/Index/Term.cs
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>A Term represents a word from text. This is the unit of search. It is
+ /// composed of two elements, the text of the word, as a string, and the name of
+ /// the field that the text occured in, an interned string.
+ /// Note that terms may represent more than words from text fields, but also
+ /// things like dates, email addresses, urls, etc.
+ /// </summary>
+ [Serializable]
+ public sealed class Term : System.IComparable<Term>
+ {
+ internal System.String field;
+ internal System.String text;
+
+ /// <summary>Constructs a Term with the given field and text.
+ /// <p/>Note that a null field or null text value results in undefined
+ /// behavior for most Lucene APIs that accept a Term parameter.
+ /// </summary>
+ public Term(System.String fld, System.String txt)
+ {
+ field = StringHelper.Intern(fld);
+ text = txt;
+ }
+
+ /// <summary>Constructs a Term with the given field and empty text.
+ /// This serves two purposes: 1) reuse of a Term with the same field.
+ /// 2) pattern for a query.
+ ///
+ /// </summary>
+ /// <param name="fld">
+ /// </param>
+ public Term(System.String fld):this(fld, "", true)
+ {
+ }
+
+ internal Term(System.String fld, System.String txt, bool intern)
+ {
+ field = intern?StringHelper.Intern(fld):fld; // field names are interned
+ text = txt; // unless already known to be
+ }
+
+ /// <summary>Returns the field of this term, an interned string. The field indicates
+ /// the part of a document which this term came from.
+ /// </summary>
+ public string Field
+ {
+ get { return field; }
+ }
+
+ /// <summary>Returns the text of this term. In the case of words, this is simply the
+ /// text of the word. In the case of dates and other types, this is an
+ /// encoding of the object as a string.
+ /// </summary>
+ public string Text
+ {
+ get { return text; }
+ }
+
+ /// <summary> Optimized construction of new Terms by reusing same field as this Term
+ /// - avoids field.intern() overhead
+ /// </summary>
+ /// <param name="text">The text of the new term (field is implicitly same as this Term instance)
+ /// </param>
+ /// <returns> A new Term
+ /// </returns>
+ public Term CreateTerm(System.String text)
+ {
+ return new Term(field, text, false);
+ }
+
+ //@Override
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ Term other = (Term) obj;
+ if (field == null)
+ {
+ if (other.field != null)
+ return false;
+ }
+ else if (!field.Equals(other.field))
+ return false;
+ if (text == null)
+ {
+ if (other.text != null)
+ return false;
+ }
+ else if (!text.Equals(other.text))
+ return false;
+ return true;
+ }
+
+ //@Override
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = 1;
+ result = prime*result + ((field == null) ? 0 : field.GetHashCode());
+ result = prime*result + ((text == null) ? 0 : text.GetHashCode());
+ return result;
+ }
+
+ /// <summary>Compares two terms, returning a negative integer if this
+ /// term belongs before the argument, zero if this term is equal to the
+ /// argument, and a positive integer if this term belongs after the argument.
+ /// The ordering of terms is first by field, then by text.
+ /// </summary>
+ public int CompareTo(Term other)
+ {
+ if ((System.Object) field == (System.Object) other.field)
+ // fields are interned
+ return String.CompareOrdinal(text, other.text);
+ else
+ return String.CompareOrdinal(field, other.field);
+ }
+
+ ///// <summary>Resets the field and text of a Term. </summary>
+ //internal void Set(System.String fld, System.String txt)
+ //{
+ // field = fld;
+ // text = txt;
+ //}
+
+ public override System.String ToString()
+ {
+ return field + ":" + text;
+ }
+
+// private void ReadObject(System.IO.BinaryReader in_Renamed)
+// {
+// in_Renamed.defaultReadObject();
+// field = StringHelper.Intern(field);
+// }
+
+ [System.Runtime.Serialization.OnDeserialized]
+ internal void OnDeserialized(System.Runtime.Serialization.StreamingContext context)
+ {
+ field = StringHelper.Intern(field);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermBuffer.cs b/src/core/Index/TermBuffer.cs
new file mode 100644
index 0000000..d97969c
--- /dev/null
+++ b/src/core/Index/TermBuffer.cs
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class TermBuffer : System.ICloneable
+ {
+
+ private System.String field;
+ private Term term; // cached
+ private bool preUTF8Strings; // true if strings are stored in modified UTF8 encoding (LUCENE-510)
+ private bool dirty; // true if text was set externally (ie not read via UTF8 bytes)
+
+ private UnicodeUtil.UTF16Result text = new UnicodeUtil.UTF16Result();
+ private UnicodeUtil.UTF8Result bytes = new UnicodeUtil.UTF8Result();
+
+ public int CompareTo(TermBuffer other)
+ {
+ if ((System.Object) field == (System.Object) other.field)
+ // fields are interned
+ return CompareChars(text.result, text.length, other.text.result, other.text.length);
+ else
+ return String.CompareOrdinal(field, other.field);
+ }
+
+ private static int CompareChars(char[] chars1, int len1, char[] chars2, int len2)
+ {
+ int end = len1 < len2?len1:len2;
+ for (int k = 0; k < end; k++)
+ {
+ char c1 = chars1[k];
+ char c2 = chars2[k];
+ if (c1 != c2)
+ {
+ return c1 - c2;
+ }
+ }
+ return len1 - len2;
+ }
+
+ /// <summary>Call this if the IndexInput passed to <see cref="Read" />
+ /// stores terms in the "modified UTF8" (pre LUCENE-510)
+ /// format.
+ /// </summary>
+ internal void SetPreUTF8Strings()
+ {
+ preUTF8Strings = true;
+ }
+
+ public void Read(IndexInput input, FieldInfos fieldInfos)
+ {
+ this.term = null; // invalidate cache
+ int start = input.ReadVInt();
+ int length = input.ReadVInt();
+ int totalLength = start + length;
+ if (preUTF8Strings)
+ {
+ text.SetLength(totalLength);
+ input.ReadChars(text.result, start, length);
+ }
+ else
+ {
+
+ if (dirty)
+ {
+ // Fully convert all bytes since bytes is dirty
+ UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
+ bytes.SetLength(totalLength);
+ input.ReadBytes(bytes.result, start, length);
+ UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
+ dirty = false;
+ }
+ else
+ {
+ // Incrementally convert only the UTF8 bytes that are new:
+ bytes.SetLength(totalLength);
+ input.ReadBytes(bytes.result, start, length);
+ UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
+ }
+ }
+ this.field = fieldInfos.FieldName(input.ReadVInt());
+ }
+
+ public void Set(Term term)
+ {
+ if (term == null)
+ {
+ Reset();
+ return ;
+ }
+ System.String termText = term.Text;
+ int termLen = termText.Length;
+ text.SetLength(termLen);
+ TextSupport.GetCharsFromString(termText, 0, termLen, text.result, 0);
+ dirty = true;
+ field = term.Field;
+ this.term = term;
+ }
+
+ public void Set(TermBuffer other)
+ {
+ text.CopyText(other.text);
+ dirty = true;
+ field = other.field;
+ term = other.term;
+ }
+
+ public void Reset()
+ {
+ field = null;
+ text.SetLength(0);
+ term = null;
+ dirty = true;
+ }
+
+ public Term ToTerm()
+ {
+ if (field == null)
+ // unset
+ return null;
+
+ if (term == null)
+ term = new Term(field, new System.String(text.result, 0, text.length), false);
+
+ return term;
+ }
+
+ public System.Object Clone()
+ {
+ TermBuffer clone = null;
+ try
+ {
+ clone = (TermBuffer) base.MemberwiseClone();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ clone.dirty = true;
+ clone.bytes = new UnicodeUtil.UTF8Result();
+ clone.text = new UnicodeUtil.UTF16Result();
+ clone.text.CopyText(text);
+ return clone;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermDocs.cs b/src/core/Index/TermDocs.cs
new file mode 100644
index 0000000..0ffdc28
--- /dev/null
+++ b/src/core/Index/TermDocs.cs
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+ /// <summary>TermDocs provides an interface for enumerating &lt;document, frequency&gt;
+ /// pairs for a term. <p/> The document portion names each document containing
+ /// the term. Documents are indicated by number. The frequency portion gives
+ /// the number of times the term occurred in each document. <p/> The pairs are
+ /// ordered by document number.
+ /// </summary>
+ /// <seealso cref="IndexReader.TermDocs()" />
+ public interface TermDocs : IDisposable
+ {
+ /// <summary>Sets this to the data for a term.
+ /// The enumeration is reset to the start of the data for this term.
+ /// </summary>
+ void Seek(Term term);
+
+ /// <summary>Sets this to the data for the current term in a <see cref="TermEnum" />.
+ /// This may be optimized in some implementations.
+ /// </summary>
+ void Seek(TermEnum termEnum);
+
+ /// <summary>Returns the current document number. <p/> This is invalid until <see cref="Next()" />
+ /// is called for the first time.
+ /// </summary>
+ int Doc { get; }
+
+ /// <summary>Returns the frequency of the term within the current document. <p/> This
+ /// is invalid until <see cref="Next()" /> is called for the first time.
+ /// </summary>
+ int Freq { get; }
+
+ /// <summary>Moves to the next pair in the enumeration. <p/> Returns true iff there is
+ /// such a next pair in the enumeration.
+ /// </summary>
+ bool Next();
+
+ /// <summary>Attempts to read multiple entries from the enumeration, up to length of
+ /// <i>docs</i>. Document numbers are stored in <i>docs</i>, and term
+ /// frequencies are stored in <i>freqs</i>. The <i>freqs</i> array must be as
+ /// long as the <i>docs</i> array.
+ ///
+ /// <p/>Returns the number of entries read. Zero is only returned when the
+ /// stream has been exhausted.
+ /// </summary>
+ int Read(int[] docs, int[] freqs);
+
+ /// <summary>Skips entries to the first beyond the current whose document number is
+ /// greater than or equal to <i>target</i>. <p/>Returns true iff there is such
+ /// an entry. <p/>Behaves as if written: <code>
+ /// boolean skipTo(int target) {
+ /// do {
+ /// if (!next())
+ /// return false;
+ /// } while (target > doc());
+ /// return true;
+ /// }
+ /// </code>
+ /// Some implementations are considerably more efficient than that.
+ /// </summary>
+ bool SkipTo(int target);
+
+ // TODO: Determine which release this will be removed from
+ /// <summary>Frees associated resources. </summary>
+ [Obsolete("Use Dispose() instead")]
+ void Close();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermEnum.cs b/src/core/Index/TermEnum.cs
new file mode 100644
index 0000000..e663bd1
--- /dev/null
+++ b/src/core/Index/TermEnum.cs
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Abstract class for enumerating terms.
+ /// <p/>Term enumerations are always ordered by Term.compareTo(). Each term in
+ /// the enumeration is greater than all that precede it.
+ /// </summary>
+ public abstract class TermEnum : IDisposable
+ {
+ /// <summary>Increments the enumeration to the next element. True if one exists.</summary>
+ public abstract bool Next();
+
+ /// <summary>Returns the current Term in the enumeration.</summary>
+ public abstract Term Term { get; }
+
+ /// <summary>Returns the docFreq of the current Term in the enumeration.</summary>
+ public abstract int DocFreq();
+
+ /// <summary>Closes the enumeration to further activity, freeing resources. </summary>
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary>Closes the enumeration to further activity, freeing resources. </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermFreqVector.cs b/src/core/Index/TermFreqVector.cs
new file mode 100644
index 0000000..ea0eb43
--- /dev/null
+++ b/src/core/Index/TermFreqVector.cs
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Provides access to stored term vector of
+ /// a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the
+ /// <see cref="Lucene.Net.Documents.Document" /> and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the
+ /// frequency of getTerms()[5], assuming there are at least 5 terms in the Document.
+ /// </summary>
+ public interface ITermFreqVector
+ {
+ /// <summary> The <see cref="IFieldable" /> name. </summary>
+ /// <value> The name of the field this vector is associated with. </value>
+ string Field { get; }
+
+ /// <value> The number of terms in the term vector. </value>
+ int Size { get; }
+
+ /// <returns> An Array of term texts in ascending order.
+ /// </returns>
+ System.String[] GetTerms();
+
+
+ /// <summary>Array of term frequencies. Locations of the array correspond one to one
+ /// to the terms in the array obtained from <c>getTerms</c>
+ /// method. Each location in the array contains the number of times this
+ /// term occurs in the document or the document field.
+ /// </summary>
+ int[] GetTermFrequencies();
+
+
+ /// <summary>Return an index in the term numbers array returned from
+ /// <c>getTerms</c> at which the term with the specified
+ /// <c>term</c> appears. If this term does not appear in the array,
+ /// return -1.
+ /// </summary>
+ int IndexOf(System.String term);
+
+
+ /// <summary>Just like <c>indexOf(int)</c> but searches for a number of terms
+ /// at the same time. Returns an array that has the same size as the number
+ /// of terms searched for, each slot containing the result of searching for
+ /// that term number.
+ ///
+ /// </summary>
+ /// <param name="terms">array containing terms to look for
+ /// </param>
+ /// <param name="start">index in the array where the list of terms starts
+ /// </param>
+ /// <param name="len">the number of terms in the list
+ /// </param>
+ int[] IndexesOf(System.String[] terms, int start, int len);
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermInfo.cs b/src/core/Index/TermInfo.cs
new file mode 100644
index 0000000..5869f6f
--- /dev/null
+++ b/src/core/Index/TermInfo.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>A TermInfo is the record of information stored for a term.</summary>
+
+ sealed class TermInfo
+ {
+ /// <summary>The number of documents which contain the term. </summary>
+ internal int docFreq = 0;
+
+ internal long freqPointer = 0;
+ internal long proxPointer = 0;
+ internal int skipOffset;
+
+ internal TermInfo()
+ {
+ }
+
+ internal TermInfo(int df, long fp, long pp)
+ {
+ docFreq = df;
+ freqPointer = fp;
+ proxPointer = pp;
+ }
+
+ internal TermInfo(TermInfo ti)
+ {
+ docFreq = ti.docFreq;
+ freqPointer = ti.freqPointer;
+ proxPointer = ti.proxPointer;
+ skipOffset = ti.skipOffset;
+ }
+
+ internal void Set(int docFreq, long freqPointer, long proxPointer, int skipOffset)
+ {
+ this.docFreq = docFreq;
+ this.freqPointer = freqPointer;
+ this.proxPointer = proxPointer;
+ this.skipOffset = skipOffset;
+ }
+
+ internal void Set(TermInfo ti)
+ {
+ docFreq = ti.docFreq;
+ freqPointer = ti.freqPointer;
+ proxPointer = ti.proxPointer;
+ skipOffset = ti.skipOffset;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermInfosReader.cs b/src/core/Index/TermInfosReader.cs
new file mode 100644
index 0000000..044a7c3
--- /dev/null
+++ b/src/core/Index/TermInfosReader.cs
@@ -0,0 +1,325 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Cache;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This stores a monotonically increasing set of &lt;Term, TermInfo&gt; pairs in a
+ /// Directory. Pairs are accessed either by Term or by ordinal position the
+ /// set.
+ /// </summary>
+
+ sealed class TermInfosReader : IDisposable
+ {
+ private readonly Directory directory;
+ private readonly String segment;
+ private readonly FieldInfos fieldInfos;
+
+ private bool isDisposed;
+
+ private readonly CloseableThreadLocal<ThreadResources> threadResources = new CloseableThreadLocal<ThreadResources>();
+ private readonly SegmentTermEnum origEnum;
+ private readonly long size;
+
+ private readonly Term[] indexTerms;
+ private readonly TermInfo[] indexInfos;
+ private readonly long[] indexPointers;
+
+ private readonly int totalIndexInterval;
+
+ private const int DEFAULT_CACHE_SIZE = 1024;
+
+ /// <summary> Per-thread resources managed by ThreadLocal</summary>
+ private sealed class ThreadResources
+ {
+ internal SegmentTermEnum termEnum;
+
+ // Used for caching the least recently looked-up Terms
+ internal Cache<Term, TermInfo> termInfoCache;
+ }
+
+ internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
+ {
+ bool success = false;
+
+ if (indexDivisor < 1 && indexDivisor != - 1)
+ {
+ throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
+ }
+
+ try
+ {
+ directory = dir;
+ segment = seg;
+ fieldInfos = fis;
+
+ origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
+ size = origEnum.size;
+
+
+ if (indexDivisor != - 1)
+ {
+ // Load terms index
+ totalIndexInterval = origEnum.indexInterval * indexDivisor;
+ var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
+
+ try
+ {
+ int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
+
+ indexTerms = new Term[indexSize];
+ indexInfos = new TermInfo[indexSize];
+ indexPointers = new long[indexSize];
+
+ for (int i = 0; indexEnum.Next(); i++)
+ {
+ indexTerms[i] = indexEnum.Term;
+ indexInfos[i] = indexEnum.TermInfo();
+ indexPointers[i] = indexEnum.indexPointer;
+
+ for (int j = 1; j < indexDivisor; j++)
+ if (!indexEnum.Next())
+ break;
+ }
+ }
+ finally
+ {
+ indexEnum.Close();
+ }
+ }
+ else
+ {
+ // Do not load terms index:
+ totalIndexInterval = - 1;
+ indexTerms = null;
+ indexInfos = null;
+ indexPointers = null;
+ }
+ success = true;
+ }
+ finally
+ {
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success)
+ {
+ Dispose();
+ }
+ }
+ }
+
+ public int SkipInterval
+ {
+ get { return origEnum.skipInterval; }
+ }
+
+ public int MaxSkipLevels
+ {
+ get { return origEnum.maxSkipLevels; }
+ }
+
+ public void Dispose()
+ {
+ if (isDisposed) return;
+
+ // Move to protected method if class becomes unsealed
+ if (origEnum != null)
+ origEnum.Dispose();
+ threadResources.Dispose();
+
+ isDisposed = true;
+ }
+
+ /// <summary>Returns the number of term/value pairs in the set. </summary>
+ internal long Size()
+ {
+ return size;
+ }
+
+ private ThreadResources GetThreadResources()
+ {
+ ThreadResources resources = threadResources.Get();
+ if (resources == null)
+ {
+ resources = new ThreadResources
+ {termEnum = Terms(), termInfoCache = new SimpleLRUCache<Term, TermInfo>(DEFAULT_CACHE_SIZE)};
+ // Cache does not have to be thread-safe, it is only used by one thread at the same time
+ threadResources.Set(resources);
+ }
+ return resources;
+ }
+
+
+ /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
+ private int GetIndexOffset(Term term)
+ {
+ int lo = 0; // binary search indexTerms[]
+ int hi = indexTerms.Length - 1;
+
+ while (hi >= lo)
+ {
+ int mid = Number.URShift((lo + hi), 1);
+ int delta = term.CompareTo(indexTerms[mid]);
+ if (delta < 0)
+ hi = mid - 1;
+ else if (delta > 0)
+ lo = mid + 1;
+ else
+ return mid;
+ }
+ return hi;
+ }
+
+ private void SeekEnum(SegmentTermEnum enumerator, int indexOffset)
+ {
+ enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
+ }
+
+ /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
+ internal TermInfo Get(Term term)
+ {
+ return Get(term, true);
+ }
+
+ /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
+ private TermInfo Get(Term term, bool useCache)
+ {
+ if (size == 0)
+ return null;
+
+ EnsureIndexIsRead();
+
+ TermInfo ti;
+ ThreadResources resources = GetThreadResources();
+ Cache<Term, TermInfo> cache = null;
+
+ if (useCache)
+ {
+ cache = resources.termInfoCache;
+ // check the cache first if the term was recently looked up
+ ti = cache.Get(term);
+ if (ti != null)
+ {
+ return ti;
+ }
+ }
+
+ // optimize sequential access: first try scanning cached enum w/o seeking
+ SegmentTermEnum enumerator = resources.termEnum;
+ if (enumerator.Term != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term) >= 0))
+ {
+ int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1;
+ if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
+ {
+ // no need to seek
+
+ int numScans = enumerator.ScanTo(term);
+ if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
+ {
+ ti = enumerator.TermInfo();
+ if (cache != null && numScans > 1)
+ {
+ // we only want to put this TermInfo into the cache if
+ // scanEnum skipped more than one dictionary entry.
+ // This prevents RangeQueries or WildcardQueries to
+ // wipe out the cache when they iterate over a large numbers
+ // of terms in order
+ cache.Put(term, ti);
+ }
+ }
+ else
+ {
+ ti = null;
+ }
+
+ return ti;
+ }
+ }
+
+ // random-access: must seek
+ SeekEnum(enumerator, GetIndexOffset(term));
+ enumerator.ScanTo(term);
+ if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
+ {
+ ti = enumerator.TermInfo();
+ if (cache != null)
+ {
+ cache.Put(term, ti);
+ }
+ }
+ else
+ {
+ ti = null;
+ }
+ return ti;
+ }
+
+ private void EnsureIndexIsRead()
+ {
+ if (indexTerms == null)
+ {
+ throw new SystemException("terms index was not loaded when this reader was created");
+ }
+ }
+
+ /// <summary>Returns the position of a Term in the set or -1. </summary>
+ internal long GetPosition(Term term)
+ {
+ if (size == 0)
+ return - 1;
+
+ EnsureIndexIsRead();
+ int indexOffset = GetIndexOffset(term);
+
+ SegmentTermEnum enumerator = GetThreadResources().termEnum;
+ SeekEnum(enumerator, indexOffset);
+
+ while (term.CompareTo(enumerator.Term) > 0 && enumerator.Next())
+ {
+ }
+
+ if (term.CompareTo(enumerator.Term) == 0)
+ return enumerator.position;
+ else
+ return - 1;
+ }
+
+ /// <summary>Returns an enumeration of all the Terms and TermInfos in the set. </summary>
+ public SegmentTermEnum Terms()
+ {
+ return (SegmentTermEnum) origEnum.Clone();
+ }
+
+ /// <summary>Returns an enumeration of terms starting at or after the named term. </summary>
+ public SegmentTermEnum Terms(Term term)
+ {
+ // don't use the cache in this call because we want to reposition the
+ // enumeration
+ Get(term, false);
+ return (SegmentTermEnum) GetThreadResources().termEnum.Clone();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermInfosWriter.cs b/src/core/Index/TermInfosWriter.cs
new file mode 100644
index 0000000..c2512c3
--- /dev/null
+++ b/src/core/Index/TermInfosWriter.cs
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Directory = Lucene.Net.Store.Directory;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This stores a monotonically increasing set of &lt;Term, TermInfo&gt; pairs in a
+ /// Directory. A TermInfos can be written once, in order.
+ /// </summary>
+
+ sealed class TermInfosWriter : IDisposable
+ {
+ /// <summary>The file format version, a negative number. </summary>
+ public const int FORMAT = - 3;
+
+ // Changed strings to true utf8 with length-in-bytes not
+ // length-in-chars
+ public const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = - 4;
+
+ // NOTE: always change this if you switch to a new format!
+ public static readonly int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+
+ private bool isDisposed;
+
+ private FieldInfos fieldInfos;
+ private IndexOutput output;
+ private TermInfo lastTi = new TermInfo();
+ private long size;
+
+ // TODO: the default values for these two parameters should be settable from
+ // IndexWriter. However, once that's done, folks will start setting them to
+ // ridiculous values and complaining that things don't work well, as with
+ // mergeFactor. So, let's wait until a number of folks find that alternate
+ // values work better. Note that both of these values are stored in the
+ // segment, so that it's safe to change these w/o rebuilding all indexes.
+
+ /// <summary>Expert: The fraction of terms in the "dictionary" which should be stored
+ /// in RAM. Smaller values use more memory, but make searching slightly
+ /// faster, while larger values use less memory and make searching slightly
+ /// slower. Searching is typically not dominated by dictionary lookup, so
+ /// tweaking this is rarely useful.
+ /// </summary>
+ internal int indexInterval = 128;
+
+ /// <summary>Expert: The fraction of <see cref="TermDocs" /> entries stored in skip tables,
+ /// used to accellerate <see cref="TermDocs.SkipTo(int)" />. Larger values result in
+ /// smaller indexes, greater acceleration, but fewer accelerable cases, while
+ /// smaller values result in bigger indexes, less acceleration and more
+ /// accelerable cases. More detailed experiments would be useful here.
+ /// </summary>
+ internal int skipInterval = 16;
+
+ /// <summary>Expert: The maximum number of skip levels. Smaller values result in
+ /// slightly smaller indexes, but slower skipping in big posting lists.
+ /// </summary>
+ internal int maxSkipLevels = 10;
+
+ private long lastIndexPointer;
+ private bool isIndex;
+ private byte[] lastTermBytes = new byte[10];
+ private int lastTermBytesLength = 0;
+ private int lastFieldNumber = - 1;
+
+ private TermInfosWriter other;
+ private UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result();
+
+ internal TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval)
+ {
+ Initialize(directory, segment, fis, interval, false);
+ other = new TermInfosWriter(directory, segment, fis, interval, true);
+ other.other = this;
+ }
+
+ private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex)
+ {
+ Initialize(directory, segment, fis, interval, isIndex);
+ }
+
+ private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
+ {
+ indexInterval = interval;
+ fieldInfos = fis;
+ isIndex = isi;
+ output = directory.CreateOutput(segment + (isIndex?".tii":".tis"));
+ output.WriteInt(FORMAT_CURRENT); // write format
+ output.WriteLong(0); // leave space for size
+ output.WriteInt(indexInterval); // write indexInterval
+ output.WriteInt(skipInterval); // write skipInterval
+ output.WriteInt(maxSkipLevels); // write maxSkipLevels
+ System.Diagnostics.Debug.Assert(InitUTF16Results());
+ }
+
+ internal void Add(Term term, TermInfo ti)
+ {
+ UnicodeUtil.UTF16toUTF8(term.Text, 0, term.Text.Length, utf8Result);
+ Add(fieldInfos.FieldNumber(term.Field), utf8Result.result, utf8Result.length, ti);
+ }
+
+ // Currently used only by assert statements
+ internal UnicodeUtil.UTF16Result utf16Result1;
+ internal UnicodeUtil.UTF16Result utf16Result2;
+
+ // Currently used only by assert statements
+ private bool InitUTF16Results()
+ {
+ utf16Result1 = new UnicodeUtil.UTF16Result();
+ utf16Result2 = new UnicodeUtil.UTF16Result();
+ return true;
+ }
+
+ // Currently used only by assert statement
+ private int CompareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength)
+ {
+
+ if (lastFieldNumber != fieldNumber)
+ {
+ int cmp = String.CompareOrdinal(fieldInfos.FieldName(lastFieldNumber), fieldInfos.FieldName(fieldNumber));
+ // If there is a field named "" (empty string) then we
+ // will get 0 on this comparison, yet, it's "OK". But
+ // it's not OK if two different field numbers map to
+ // the same name.
+ if (cmp != 0 || lastFieldNumber != - 1)
+ return cmp;
+ }
+
+ UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1);
+ UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2);
+ int len;
+ if (utf16Result1.length < utf16Result2.length)
+ len = utf16Result1.length;
+ else
+ len = utf16Result2.length;
+
+ for (int i = 0; i < len; i++)
+ {
+ char ch1 = utf16Result1.result[i];
+ char ch2 = utf16Result2.result[i];
+ if (ch1 != ch2)
+ return ch1 - ch2;
+ }
+ return utf16Result1.length - utf16Result2.length;
+ }
+
+ /// <summary>Adds a new &lt;fieldNumber, termBytes&gt;, TermInfo> pair to the set.
+ /// Term must be lexicographically greater than all previous Terms added.
+ /// TermInfo pointers must be positive and greater than all previous.
+ /// </summary>
+ internal void Add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti)
+ {
+
+ System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 ||
+ (isIndex && termBytesLength == 0 && lastTermBytesLength == 0),
+ "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + " (number " + fieldNumber + ")" +
+ " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
+ " text=" + System.Text.Encoding.UTF8.GetString(termBytes, 0, termBytesLength) + " lastText=" + System.Text.Encoding.UTF8.GetString(lastTermBytes, 0, lastTermBytesLength));
+
+ System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
+ System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");
+
+ if (!isIndex && size % indexInterval == 0)
+ other.Add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term
+
+ WriteTerm(fieldNumber, termBytes, termBytesLength); // write term
+
+ output.WriteVInt(ti.docFreq); // write doc freq
+ output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
+ output.WriteVLong(ti.proxPointer - lastTi.proxPointer);
+
+ if (ti.docFreq >= skipInterval)
+ {
+ output.WriteVInt(ti.skipOffset);
+ }
+
+ if (isIndex)
+ {
+ output.WriteVLong(other.output.FilePointer - lastIndexPointer);
+ lastIndexPointer = other.output.FilePointer; // write pointer
+ }
+
+ lastFieldNumber = fieldNumber;
+ lastTi.Set(ti);
+ size++;
+ }
+
+ private void WriteTerm(int fieldNumber, byte[] termBytes, int termBytesLength)
+ {
+
+ // TODO: UTF16toUTF8 could tell us this prefix
+ // Compute prefix in common with last term:
+ int start = 0;
+ int limit = termBytesLength < lastTermBytesLength?termBytesLength:lastTermBytesLength;
+ while (start < limit)
+ {
+ if (termBytes[start] != lastTermBytes[start])
+ break;
+ start++;
+ }
+
+ int length = termBytesLength - start;
+ output.WriteVInt(start); // write shared prefix length
+ output.WriteVInt(length); // write delta length
+ output.WriteBytes(termBytes, start, length); // write delta bytes
+ output.WriteVInt(fieldNumber); // write field num
+ if (lastTermBytes.Length < termBytesLength)
+ {
+ byte[] newArray = new byte[(int) (termBytesLength * 1.5)];
+ Array.Copy(lastTermBytes, 0, newArray, 0, start);
+ lastTermBytes = newArray;
+ }
+ Array.Copy(termBytes, start, lastTermBytes, start, length);
+ lastTermBytesLength = termBytesLength;
+ }
+
+ /// <summary>Called to complete TermInfos creation. </summary>
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ if (isDisposed) return;
+
+ output.Seek(4); // write size after format
+ output.WriteLong(size);
+ output.Dispose();
+
+ if (!isIndex)
+ other.Dispose();
+
+ isDisposed = true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermPositionVector.cs b/src/core/Index/TermPositionVector.cs
new file mode 100644
index 0000000..fe57719
--- /dev/null
+++ b/src/core/Index/TermPositionVector.cs
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>Extends <c>TermFreqVector</c> to provide additional information about
+ /// positions in which each of the terms is found. A TermPositionVector not necessarily
+ /// contains both positions and offsets, but at least one of these arrays exists.
+ /// </summary>
+ public interface TermPositionVector:ITermFreqVector
+ {
+
+ /// <summary>Returns an array of positions in which the term is found.
+ /// Terms are identified by the index at which its number appears in the
+ /// term String array obtained from the <c>indexOf</c> method.
+ /// May return null if positions have not been stored.
+ /// </summary>
+ int[] GetTermPositions(int index);
+
+ /// <summary> Returns an array of TermVectorOffsetInfo in which the term is found.
+ /// May return null if offsets have not been stored.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Analysis.Token">
+ ///
+ /// </seealso>
+ /// <param name="index">The position in the array to get the offsets from
+ /// </param>
+ /// <returns> An array of TermVectorOffsetInfo objects or the empty list
+ /// </returns>
+ TermVectorOffsetInfo[] GetOffsets(int index);
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermPositions.cs b/src/core/Index/TermPositions.cs
new file mode 100644
index 0000000..ff58a5c
--- /dev/null
+++ b/src/core/Index/TermPositions.cs
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> TermPositions provides an interface for enumerating the &lt;document,
+ /// frequency, &lt;position&gt;* &gt; tuples for a term. <p/> The document and
+ /// frequency are the same as for a TermDocs. The positions portion lists the ordinal
+ /// positions of each occurrence of a term in a document.
+ ///
+ /// </summary>
+ /// <seealso cref="IndexReader.TermPositions()">
+ /// </seealso>
+
+ public interface TermPositions : TermDocs
+ {
+ /// <summary>Returns next position in the current document. It is an error to call
+ /// this more than <see cref="TermDocs.Freq()" /> times
+ /// without calling <see cref="TermDocs.Next()" /><p/> This is
+ /// invalid until <see cref="TermDocs.Next()" /> is called for
+ /// the first time.
+ /// </summary>
+ int NextPosition();
+
+ /// <summary> Returns the length of the payload at the current term position.
+ /// This is invalid until <see cref="NextPosition()" /> is called for
+ /// the first time.<br/>
+ /// </summary>
+ /// <value> length of the current payload in number of bytes </value>
+ int PayloadLength { get; }
+
+ /// <summary> Returns the payload data at the current term position.
+ /// This is invalid until <see cref="NextPosition()" /> is called for
+ /// the first time.
+ /// This method must not be called more than once after each call
+ /// of <see cref="NextPosition()" />. However, payloads are loaded lazily,
+ /// so if the payload data for the current position is not needed,
+ /// this method may not be called at all for performance reasons.<br/>
+ ///
+ /// </summary>
+ /// <param name="data">the array into which the data of this payload is to be
+ /// stored, if it is big enough; otherwise, a new byte[] array
+ /// is allocated for this purpose.
+ /// </param>
+ /// <param name="offset">the offset in the array into which the data of this payload
+ /// is to be stored.
+ /// </param>
+ /// <returns> a byte[] array containing the data of this payload
+ /// </returns>
+ /// <throws> IOException </throws>
+ byte[] GetPayload(byte[] data, int offset);
+
+ /// <summary> Checks if a payload can be loaded at this position.
+ /// <p/>
+ /// Payloads can only be loaded once per call to
+ /// <see cref="NextPosition()" />.
+ ///
+ /// </summary>
+ /// <value> true if there is a payload available at this position that can be loaded </value>
+ bool IsPayloadAvailable { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorEntry.cs b/src/core/Index/TermVectorEntry.cs
new file mode 100644
index 0000000..cfdc57d
--- /dev/null
+++ b/src/core/Index/TermVectorEntry.cs
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Convenience class for holding TermVector information.</summary>
+ public class TermVectorEntry
+ {
+ private System.String field;
+ private System.String term;
+ private int frequency;
+ private TermVectorOffsetInfo[] offsets;
+ private int[] positions;
+
+
+ public TermVectorEntry()
+ {
+ }
+
+ public TermVectorEntry(System.String field, System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+ {
+ this.field = field;
+ this.term = term;
+ this.frequency = frequency;
+ this.offsets = offsets;
+ this.positions = positions;
+ }
+
+
+ public virtual string Field
+ {
+ get { return field; }
+ }
+
+ public virtual int Frequency
+ {
+ get { return frequency; }
+ internal set { this.frequency = value; }
+ }
+
+ internal virtual void SetOffsets(TermVectorOffsetInfo[] value)
+ {
+ offsets = value;
+ }
+
+ public virtual TermVectorOffsetInfo[] GetOffsets()
+ {
+ return offsets;
+ }
+
+ internal virtual void SetPositions(int[] value)
+ {
+ positions = value;
+ }
+
+ public virtual int[] GetPositions()
+ {
+ return positions;
+ }
+
+ public virtual string Term
+ {
+ get { return term; }
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (o == null || GetType() != o.GetType())
+ return false;
+
+ TermVectorEntry that = (TermVectorEntry) o;
+
+ if (term != null?!term.Equals(that.term):that.term != null)
+ return false;
+
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ return (term != null?term.GetHashCode():0);
+ }
+
+ public override System.String ToString()
+ {
+ return "TermVectorEntry{" + "field='" + field + '\'' + ", term='" + term + '\'' + ", frequency=" + frequency + '}';
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorEntryFreqSortedComparator.cs b/src/core/Index/TermVectorEntryFreqSortedComparator.cs
new file mode 100644
index 0000000..e5de075
--- /dev/null
+++ b/src/core/Index/TermVectorEntryFreqSortedComparator.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Compares <see cref="Lucene.Net.Index.TermVectorEntry" />s first by frequency and then by
+ /// the term (case-sensitive)
+ ///
+ ///
+ /// </summary>
+ public class TermVectorEntryFreqSortedComparator : System.Collections.Generic.IComparer<TermVectorEntry>
+ {
+ public virtual int Compare(TermVectorEntry entry, TermVectorEntry entry1)
+ {
+ int result = 0;
+ result = entry1.Frequency - entry.Frequency;
+ if (result == 0)
+ {
+ result = String.CompareOrdinal(entry.Term, entry1.Term);
+ if (result == 0)
+ {
+ result = String.CompareOrdinal(entry.Field, entry1.Field);
+ }
+ }
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorMapper.cs b/src/core/Index/TermVectorMapper.cs
new file mode 100644
index 0000000..5ff6603
--- /dev/null
+++ b/src/core/Index/TermVectorMapper.cs
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> The TermVectorMapper can be used to map Term Vectors into your own
+ /// structure instead of the parallel array structure used by
+ /// <see cref="Lucene.Net.Index.IndexReader.GetTermFreqVector(int,String)" />.
+ /// <p/>
+ /// It is up to the implementation to make sure it is thread-safe.
+ ///
+ ///
+ ///
+ /// </summary>
+ public abstract class TermVectorMapper
+ {
+
+ private bool ignoringPositions;
+ private bool ignoringOffsets;
+
+
+ protected internal TermVectorMapper()
+ {
+ }
+
+ /// <summary> </summary>
+ /// <param name="ignoringPositions">true if this mapper should tell Lucene to ignore positions even if they are stored
+ /// </param>
+ /// <param name="ignoringOffsets">similar to ignoringPositions
+ /// </param>
+ protected internal TermVectorMapper(bool ignoringPositions, bool ignoringOffsets)
+ {
+ this.ignoringPositions = ignoringPositions;
+ this.ignoringOffsets = ignoringOffsets;
+ }
+
+ /// <summary> Tell the mapper what to expect in regards to field, number of terms, offset and position storage.
+ /// This method will be called once before retrieving the vector for a field.
+ ///
+ /// This method will be called before <see cref="Map(String,int,TermVectorOffsetInfo[],int[])" />.
+ /// </summary>
+ /// <param name="field">The field the vector is for
+ /// </param>
+ /// <param name="numTerms">The number of terms that need to be mapped
+ /// </param>
+ /// <param name="storeOffsets">true if the mapper should expect offset information
+ /// </param>
+ /// <param name="storePositions">true if the mapper should expect positions info
+ /// </param>
+ public abstract void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions);
+ /// <summary> Map the Term Vector information into your own structure</summary>
+ /// <param name="term">The term to add to the vector
+ /// </param>
+ /// <param name="frequency">The frequency of the term in the document
+ /// </param>
+ /// <param name="offsets">null if the offset is not specified, otherwise the offset into the field of the term
+ /// </param>
+ /// <param name="positions">null if the position is not specified, otherwise the position in the field of the term
+ /// </param>
+ public abstract void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions);
+
+ /// <summary> Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they
+ /// can be skipped over. Derived classes should set this to true if they want to ignore positions. The default
+ /// is false, meaning positions will be loaded if they are stored.
+ /// </summary>
+ /// <value> false </value>
+ public virtual bool IsIgnoringPositions
+ {
+ get { return ignoringPositions; }
+ }
+
+ /// <summary> </summary>
+ /// <seealso cref="IsIgnoringPositions()"> Same principal as <see cref="IsIgnoringPositions()" />, but applied to offsets. false by default.
+ /// </seealso>
+ /// <value> false </value>
+ public virtual bool IsIgnoringOffsets
+ {
+ get { return ignoringOffsets; }
+ }
+
+ /// <summary> Passes down the index of the document whose term vector is currently being mapped,
+ /// once for each top level call to a term vector reader.
+ /// <p/>
+ /// Default implementation IGNORES the document number. Override if your implementation needs the document number.
+ /// <p/>
+ /// NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations.
+ ///
+ /// </summary>
+ /// <param name="documentNumber">index of document currently being mapped
+ /// </param>
+ public virtual void SetDocumentNumber(int documentNumber)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorOffsetInfo.cs b/src/core/Index/TermVectorOffsetInfo.cs
new file mode 100644
index 0000000..3e7f885
--- /dev/null
+++ b/src/core/Index/TermVectorOffsetInfo.cs
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using System.Runtime.InteropServices;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> The TermVectorOffsetInfo class holds information pertaining to a Term in a <see cref="Lucene.Net.Index.TermPositionVector" />'s
+ /// offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the
+ /// original content).
+ /// </summary>
+ [Serializable]
+ public struct TermVectorOffsetInfo : IEquatable<TermVectorOffsetInfo>
+ {
+ /// <summary> Convenience declaration when creating a <see cref="Lucene.Net.Index.TermPositionVector" /> that stores only position information.</summary>
+ [NonSerialized]
+ public static readonly TermVectorOffsetInfo[] EMPTY_OFFSET_INFO = new TermVectorOffsetInfo[0];
+
+ [NonSerialized]
+ public static readonly TermVectorOffsetInfo Null = new TermVectorOffsetInfo(int.MinValue, int.MinValue);
+
+ private int startOffset;
+ private int endOffset;
+
+ //public TermVectorOffsetInfo()
+ //{
+ //}
+
+ public TermVectorOffsetInfo(int startOffset, int endOffset)
+ {
+ this.endOffset = endOffset;
+ this.startOffset = startOffset;
+ }
+
+ /// <summary> The accessor for the ending offset for the term</summary>
+ /// <value> The offset </value>
+ public int EndOffset
+ {
+ get { return endOffset; }
+ set { this.endOffset = value; }
+ }
+
+ /// <summary> The accessor for the starting offset of the term.
+ ///
+ /// </summary>
+ /// <value> The offset </value>
+ public int StartOffset
+ {
+ get { return startOffset; }
+ set { this.startOffset = value; }
+ }
+
+ ///// <summary> Two TermVectorOffsetInfos are equals if both the start and end offsets are the same</summary>
+ ///// <param name="o">The comparison Object
+ ///// </param>
+ ///// <returns> true if both <see cref="GetStartOffset()" /> and <see cref="GetEndOffset()" /> are the same for both objects.
+ ///// </returns>
+ //public override bool Equals(System.Object o)
+ //{
+ // if (this == o)
+ // return true;
+ // if (!(o is TermVectorOffsetInfo))
+ // return false;
+
+ // TermVectorOffsetInfo termVectorOffsetInfo = (TermVectorOffsetInfo) o;
+
+ // if (endOffset != termVectorOffsetInfo.endOffset)
+ // return false;
+ // if (startOffset != termVectorOffsetInfo.startOffset)
+ // return false;
+
+ // return true;
+ //}
+
+ //public override int GetHashCode()
+ //{
+ // int result;
+ // result = startOffset;
+ // result = 29 * result + endOffset;
+ // return result;
+ //}
+
+
+ public bool Equals(TermVectorOffsetInfo other)
+ {
+ return startOffset == other.startOffset && endOffset == other.endOffset;
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj))
+ {
+ return EndOffset == int.MinValue && StartOffset == int.MinValue;
+ }
+ if (obj.GetType() != typeof (TermVectorOffsetInfo)) return false;
+ return Equals((TermVectorOffsetInfo) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ return (startOffset*397) ^ endOffset;
+ }
+ }
+
+ public static bool operator ==(TermVectorOffsetInfo left, object right)
+ {
+ return left.Equals(right);
+ }
+
+ public static bool operator !=(TermVectorOffsetInfo left, object right)
+ {
+ return !left.Equals(right);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorsReader.cs b/src/core/Index/TermVectorsReader.cs
new file mode 100644
index 0000000..56cf764
--- /dev/null
+++ b/src/core/Index/TermVectorsReader.cs
@@ -0,0 +1,731 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+ class TermVectorsReader : System.ICloneable, IDisposable
+ {
+
+ // NOTE: if you make a new format, it must be larger than
+ // the current format
+ internal const int FORMAT_VERSION = 2;
+
+ // Changes to speed up bulk merging of term vectors:
+ internal const int FORMAT_VERSION2 = 3;
+
+ // Changed strings to UTF8 with length-in-bytes not length-in-chars
+ internal const int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
+
+ // NOTE: always change this if you switch to a new format!
+ internal static readonly int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
+
+ //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file
+ internal const int FORMAT_SIZE = 4;
+
+ internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x1);
+ internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x2);
+
+ private FieldInfos fieldInfos;
+
+ private IndexInput tvx;
+ private IndexInput tvd;
+ private IndexInput tvf;
+ private int size;
+ private int numTotalDocs;
+
+ // The docID offset where our docs begin in the index
+ // file. This will be 0 if we have our own private file.
+ private int docStoreOffset;
+
+ private int format;
+ private bool isDisposed;
+
+ internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos):this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE)
+ {
+ }
+
+ internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize):this(d, segment, fieldInfos, readBufferSize, - 1, 0)
+ {
+ }
+
+ internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
+ {
+ bool success = false;
+
+ try
+ {
+ if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
+ {
+ tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
+ format = CheckValidFormat(tvx);
+ tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
+ int tvdFormat = CheckValidFormat(tvd);
+ tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
+ int tvfFormat = CheckValidFormat(tvf);
+
+ System.Diagnostics.Debug.Assert(format == tvdFormat);
+ System.Diagnostics.Debug.Assert(format == tvfFormat);
+
+ if (format >= FORMAT_VERSION2)
+ {
+ System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
+ numTotalDocs = (int)(tvx.Length() >> 4);
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
+ numTotalDocs = (int)(tvx.Length() >> 3);
+ }
+
+ if (-1 == docStoreOffset)
+ {
+ this.docStoreOffset = 0;
+ this.size = numTotalDocs;
+ System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
+ }
+ else
+ {
+ this.docStoreOffset = docStoreOffset;
+ this.size = size;
+ // Verify the file is long enough to hold all of our
+ // docs
+ System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
+ }
+ }
+ else
+ {
+ // If all documents flushed in a segment had hit
+ // non-aborting exceptions, it's possible that
+ // FieldInfos.hasVectors returns true yet the term
+ // vector files don't exist.
+ format = 0;
+ }
+
+
+ this.fieldInfos = fieldInfos;
+ success = true;
+ }
+ finally
+ {
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success)
+ {
+ Dispose();
+ }
+ }
+ }
+
+ // Used for bulk copy when merging
+ internal virtual IndexInput GetTvdStream()
+ {
+ return tvd;
+ }
+
+ // Used for bulk copy when merging
+ internal virtual IndexInput GetTvfStream()
+ {
+ return tvf;
+ }
+
+ private void SeekTvx(int docNum)
+ {
+ if (format < FORMAT_VERSION2)
+ tvx.Seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE);
+ else
+ tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
+ }
+
+ internal virtual bool CanReadRawDocs()
+ {
+ return format >= FORMAT_UTF8_LENGTH_IN_BYTES;
+ }
+
+ /// <summary>Retrieve the length (in bytes) of the tvd and tvf
+ /// entries for the next numDocs starting with
+ /// startDocID. This is used for bulk copying when
+ /// merging segments, if the field numbers are
+ /// congruent. Once this returns, the tvf &amp; tvd streams
+ /// are seeked to the startDocID.
+ /// </summary>
+ internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
+ {
+
+ if (tvx == null)
+ {
+ for (int i = 0; i < tvdLengths.Length; i++)
+ {
+ tvdLengths[i] = 0;
+ }
+ for (int i = 0; i < tvfLengths.Length; i++)
+ {
+ tvfLengths[i] = 0;
+ }
+ return ;
+ }
+
+ // SegmentMerger calls canReadRawDocs() first and should
+ // not call us if that returns false.
+ if (format < FORMAT_VERSION2)
+ throw new System.SystemException("cannot read raw docs with older term vector formats");
+
+ SeekTvx(startDocID);
+
+ long tvdPosition = tvx.ReadLong();
+ tvd.Seek(tvdPosition);
+
+ long tvfPosition = tvx.ReadLong();
+ tvf.Seek(tvfPosition);
+
+ long lastTvdPosition = tvdPosition;
+ long lastTvfPosition = tvfPosition;
+
+ int count = 0;
+ while (count < numDocs)
+ {
+ int docID = docStoreOffset + startDocID + count + 1;
+ System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
+ if (docID < numTotalDocs)
+ {
+ tvdPosition = tvx.ReadLong();
+ tvfPosition = tvx.ReadLong();
+ }
+ else
+ {
+ tvdPosition = tvd.Length();
+ tvfPosition = tvf.Length();
+ System.Diagnostics.Debug.Assert(count == numDocs - 1);
+ }
+ tvdLengths[count] = (int) (tvdPosition - lastTvdPosition);
+ tvfLengths[count] = (int) (tvfPosition - lastTvfPosition);
+ count++;
+ lastTvdPosition = tvdPosition;
+ lastTvfPosition = tvfPosition;
+ }
+ }
+
+ private int CheckValidFormat(IndexInput in_Renamed)
+ {
+ int format = in_Renamed.ReadInt();
+ if (format > FORMAT_CURRENT)
+ {
+ throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less");
+ }
+ return format;
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ // make all effort to close up. Keep the first exception
+ // and throw it as a new one.
+ System.IO.IOException keep = null;
+ if (tvx != null)
+ try
+ {
+ tvx.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (tvd != null)
+ try
+ {
+ tvd.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (tvf != null)
+ try
+ {
+ tvf.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (keep != null)
+ {
+ throw new System.IO.IOException(keep.StackTrace);
+ }
+ }
+
+ isDisposed = true;
+ }
+
+ /// <summary> </summary>
+ /// <returns> The number of documents in the reader
+ /// </returns>
+ internal virtual int Size()
+ {
+ return size;
+ }
+
+ public virtual void Get(int docNum, System.String field, TermVectorMapper mapper)
+ {
+ if (tvx != null)
+ {
+ int fieldNumber = fieldInfos.FieldNumber(field);
+ //We need to account for the FORMAT_SIZE at when seeking in the tvx
+ //We don't need to do this in other seeks because we already have the
+ // file pointer
+ //that was written in another file
+ SeekTvx(docNum);
+ //System.out.println("TVX Pointer: " + tvx.getFilePointer());
+ long tvdPosition = tvx.ReadLong();
+
+ tvd.Seek(tvdPosition);
+ int fieldCount = tvd.ReadVInt();
+ //System.out.println("Num Fields: " + fieldCount);
+ // There are only a few fields per document. We opt for a full scan
+ // rather then requiring that they be ordered. We need to read through
+ // all of the fields anyway to get to the tvf pointers.
+ int number = 0;
+ int found = - 1;
+ for (int i = 0; i < fieldCount; i++)
+ {
+ if (format >= FORMAT_VERSION)
+ number = tvd.ReadVInt();
+ else
+ number += tvd.ReadVInt();
+
+ if (number == fieldNumber)
+ found = i;
+ }
+
+ // This field, although valid in the segment, was not found in this
+ // document
+ if (found != - 1)
+ {
+ // Compute position in the tvf file
+ long position;
+ if (format >= FORMAT_VERSION2)
+ position = tvx.ReadLong();
+ else
+ position = tvd.ReadVLong();
+ for (int i = 1; i <= found; i++)
+ position += tvd.ReadVLong();
+
+ mapper.SetDocumentNumber(docNum);
+ ReadTermVector(field, position, mapper);
+ }
+ else
+ {
+ //System.out.println("Fieldable not found");
+ }
+ }
+ else
+ {
+ //System.out.println("No tvx file");
+ }
+ }
+
+
+
+ /// <summary> Retrieve the term vector for the given document and field</summary>
+ /// <param name="docNum">The document number to retrieve the vector for
+ /// </param>
+ /// <param name="field">The field within the document to retrieve
+ /// </param>
+ /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
+ /// </returns>
+ /// <throws> IOException if there is an error reading the term vector files </throws>
+ public /*internal*/ virtual ITermFreqVector Get(int docNum, System.String field)
+ {
+ // Check if no term vectors are available for this segment at all
+ ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
+ Get(docNum, field, mapper);
+
+ return mapper.MaterializeVector();
+ }
+
+ // Reads the String[] fields; you have to pre-seek tvd to
+ // the right point
+ private System.String[] ReadFields(int fieldCount)
+ {
+ int number = 0;
+ System.String[] fields = new System.String[fieldCount];
+
+ for (int i = 0; i < fieldCount; i++)
+ {
+ if (format >= FORMAT_VERSION)
+ number = tvd.ReadVInt();
+ else
+ number += tvd.ReadVInt();
+
+ fields[i] = fieldInfos.FieldName(number);
+ }
+
+ return fields;
+ }
+
+ // Reads the long[] offsets into TVF; you have to pre-seek
+ // tvx/tvd to the right point
+ private long[] ReadTvfPointers(int fieldCount)
+ {
+ // Compute position in the tvf file
+ long position;
+ if (format >= FORMAT_VERSION2)
+ position = tvx.ReadLong();
+ else
+ position = tvd.ReadVLong();
+
+ long[] tvfPointers = new long[fieldCount];
+ tvfPointers[0] = position;
+
+ for (int i = 1; i < fieldCount; i++)
+ {
+ position += tvd.ReadVLong();
+ tvfPointers[i] = position;
+ }
+
+ return tvfPointers;
+ }
+
+ /// <summary> Return all term vectors stored for this document or null if the could not be read in.
+ ///
+ /// </summary>
+ /// <param name="docNum">The document number to retrieve the vector for
+ /// </param>
+ /// <returns> All term frequency vectors
+ /// </returns>
+ /// <throws> IOException if there is an error reading the term vector files </throws>
+ public /*internal*/ virtual ITermFreqVector[] Get(int docNum)
+ {
+ ITermFreqVector[] result = null;
+ if (tvx != null)
+ {
+ //We need to offset by
+ SeekTvx(docNum);
+ long tvdPosition = tvx.ReadLong();
+
+ tvd.Seek(tvdPosition);
+ int fieldCount = tvd.ReadVInt();
+
+ // No fields are vectorized for this document
+ if (fieldCount != 0)
+ {
+ System.String[] fields = ReadFields(fieldCount);
+ long[] tvfPointers = ReadTvfPointers(fieldCount);
+ result = ReadTermVectors(docNum, fields, tvfPointers);
+ }
+ }
+ else
+ {
+ //System.out.println("No tvx file");
+ }
+ return result;
+ }
+
+ public virtual void Get(int docNumber, TermVectorMapper mapper)
+ {
+ // Check if no term vectors are available for this segment at all
+ if (tvx != null)
+ {
+ //We need to offset by
+
+ SeekTvx(docNumber);
+ long tvdPosition = tvx.ReadLong();
+
+ tvd.Seek(tvdPosition);
+ int fieldCount = tvd.ReadVInt();
+
+ // No fields are vectorized for this document
+ if (fieldCount != 0)
+ {
+ System.String[] fields = ReadFields(fieldCount);
+ long[] tvfPointers = ReadTvfPointers(fieldCount);
+ mapper.SetDocumentNumber(docNumber);
+ ReadTermVectors(fields, tvfPointers, mapper);
+ }
+ }
+ else
+ {
+ //System.out.println("No tvx file");
+ }
+ }
+
+
+ private SegmentTermVector[] ReadTermVectors(int docNum, System.String[] fields, long[] tvfPointers)
+ {
+ SegmentTermVector[] res = new SegmentTermVector[fields.Length];
+ for (int i = 0; i < fields.Length; i++)
+ {
+ var mapper = new ParallelArrayTermVectorMapper();
+ mapper.SetDocumentNumber(docNum);
+ ReadTermVector(fields[i], tvfPointers[i], mapper);
+ res[i] = (SegmentTermVector) mapper.MaterializeVector();
+ }
+ return res;
+ }
+
+ private void ReadTermVectors(System.String[] fields, long[] tvfPointers, TermVectorMapper mapper)
+ {
+ for (int i = 0; i < fields.Length; i++)
+ {
+ ReadTermVector(fields[i], tvfPointers[i], mapper);
+ }
+ }
+
+
+ /// <summary> </summary>
+ /// <param name="field">The field to read in
+ /// </param>
+ /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
+ /// </param>
+ /// <param name="mapper">The mapper used to map the TermVector
+ /// </param>
+ /// <throws> IOException </throws>
+ private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
+ {
+
+ // Now read the data from specified position
+ //We don't need to offset by the FORMAT here since the pointer already includes the offset
+ tvf.Seek(tvfPointer);
+
+ int numTerms = tvf.ReadVInt();
+ //System.out.println("Num Terms: " + numTerms);
+ // If no terms - return a constant empty termvector. However, this should never occur!
+ if (numTerms == 0)
+ return ;
+
+ bool storePositions;
+ bool storeOffsets;
+
+ if (format >= FORMAT_VERSION)
+ {
+ byte bits = tvf.ReadByte();
+ storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+ }
+ else
+ {
+ tvf.ReadVInt();
+ storePositions = false;
+ storeOffsets = false;
+ }
+ mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
+ int start = 0;
+ int deltaLength = 0;
+ int totalLength = 0;
+ byte[] byteBuffer;
+ char[] charBuffer;
+ bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;
+
+ // init the buffers
+ if (preUTF8)
+ {
+ charBuffer = new char[10];
+ byteBuffer = null;
+ }
+ else
+ {
+ charBuffer = null;
+ byteBuffer = new byte[20];
+ }
+
+ for (int i = 0; i < numTerms; i++)
+ {
+ start = tvf.ReadVInt();
+ deltaLength = tvf.ReadVInt();
+ totalLength = start + deltaLength;
+
+ System.String term;
+
+ if (preUTF8)
+ {
+ // Term stored as java chars
+ if (charBuffer.Length < totalLength)
+ {
+ char[] newCharBuffer = new char[(int) (1.5 * totalLength)];
+ Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
+ charBuffer = newCharBuffer;
+ }
+ tvf.ReadChars(charBuffer, start, deltaLength);
+ term = new System.String(charBuffer, 0, totalLength);
+ }
+ else
+ {
+ // Term stored as utf8 bytes
+ if (byteBuffer.Length < totalLength)
+ {
+ byte[] newByteBuffer = new byte[(int) (1.5 * totalLength)];
+ Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
+ byteBuffer = newByteBuffer;
+ }
+ tvf.ReadBytes(byteBuffer, start, deltaLength);
+ term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
+ }
+ int freq = tvf.ReadVInt();
+ int[] positions = null;
+ if (storePositions)
+ {
+ //read in the positions
+ //does the mapper even care about positions?
+ if (mapper.IsIgnoringPositions == false)
+ {
+ positions = new int[freq];
+ int prevPosition = 0;
+ for (int j = 0; j < freq; j++)
+ {
+ positions[j] = prevPosition + tvf.ReadVInt();
+ prevPosition = positions[j];
+ }
+ }
+ else
+ {
+ //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip
+ //
+ for (int j = 0; j < freq; j++)
+ {
+ tvf.ReadVInt();
+ }
+ }
+ }
+ TermVectorOffsetInfo[] offsets = null;
+ if (storeOffsets)
+ {
+ //does the mapper even care about offsets?
+ if (mapper.IsIgnoringOffsets == false)
+ {
+ offsets = new TermVectorOffsetInfo[freq];
+ int prevOffset = 0;
+ for (int j = 0; j < freq; j++)
+ {
+ int startOffset = prevOffset + tvf.ReadVInt();
+ int endOffset = startOffset + tvf.ReadVInt();
+ offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
+ prevOffset = endOffset;
+ }
+ }
+ else
+ {
+ for (int j = 0; j < freq; j++)
+ {
+ tvf.ReadVInt();
+ tvf.ReadVInt();
+ }
+ }
+ }
+ mapper.Map(term, freq, offsets, positions);
+ }
+ }
+
+ public virtual System.Object Clone()
+ {
+
+ TermVectorsReader clone = (TermVectorsReader) base.MemberwiseClone();
+
+ // These are null when a TermVectorsReader was created
+ // on a segment that did not have term vectors saved
+ if (tvx != null && tvd != null && tvf != null)
+ {
+ clone.tvx = (IndexInput) tvx.Clone();
+ clone.tvd = (IndexInput) tvd.Clone();
+ clone.tvf = (IndexInput) tvf.Clone();
+ }
+
+ return clone;
+ }
+ }
+
+
+ /// <summary> Models the existing parallel array structure</summary>
+ class ParallelArrayTermVectorMapper:TermVectorMapper
+ {
+
+ private System.String[] terms;
+ private int[] termFreqs;
+ private int[][] positions;
+ private TermVectorOffsetInfo[][] offsets;
+ private int currentPosition;
+ private bool storingOffsets;
+ private bool storingPositions;
+ private System.String field;
+
+ public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
+ {
+ this.field = field;
+ terms = new System.String[numTerms];
+ termFreqs = new int[numTerms];
+ this.storingOffsets = storeOffsets;
+ this.storingPositions = storePositions;
+ if (storePositions)
+ this.positions = new int[numTerms][];
+ if (storeOffsets)
+ this.offsets = new TermVectorOffsetInfo[numTerms][];
+ }
+
+ public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+ {
+ terms[currentPosition] = term;
+ termFreqs[currentPosition] = frequency;
+ if (storingOffsets)
+ {
+ this.offsets[currentPosition] = offsets;
+ }
+ if (storingPositions)
+ {
+ this.positions[currentPosition] = positions;
+ }
+ currentPosition++;
+ }
+
+ /// <summary> Construct the vector</summary>
+ /// <returns> The <see cref="ITermFreqVector" /> based on the mappings.
+ /// </returns>
+ public virtual ITermFreqVector MaterializeVector()
+ {
+ SegmentTermVector tv = null;
+ if (field != null && terms != null)
+ {
+ if (storingPositions || storingOffsets)
+ {
+ tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
+ }
+ else
+ {
+ tv = new SegmentTermVector(field, terms, termFreqs);
+ }
+ }
+ return tv;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorsTermsWriter.cs b/src/core/Index/TermVectorsTermsWriter.cs
new file mode 100644
index 0000000..d128a75
--- /dev/null
+++ b/src/core/Index/TermVectorsTermsWriter.cs
@@ -0,0 +1,380 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+ sealed class TermVectorsTermsWriter:TermsHashConsumer
+ {
+ private void InitBlock()
+ {
+ docFreeList = new PerDoc[1];
+ }
+
+ internal DocumentsWriter docWriter;
+ internal TermVectorsWriter termVectorsWriter;
+ internal PerDoc[] docFreeList;
+ internal int freeCount;
+ internal IndexOutput tvx;
+ internal IndexOutput tvd;
+ internal IndexOutput tvf;
+ internal int lastDocID;
+
+ public TermVectorsTermsWriter(DocumentsWriter docWriter)
+ {
+ InitBlock();
+ this.docWriter = docWriter;
+ }
+
+ public override TermsHashConsumerPerThread AddThread(TermsHashPerThread termsHashPerThread)
+ {
+ return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
+ }
+
+ internal override void CreatePostings(RawPostingList[] postings, int start, int count)
+ {
+ int end = start + count;
+ for (int i = start; i < end; i++)
+ postings[i] = new PostingList();
+ }
+
+ public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
+ {
+ lock (this)
+ {
+ // NOTE: it's possible that all documents seen in this segment
+ // hit non-aborting exceptions, in which case we will
+ // not have yet init'd the TermVectorsWriter. This is
+ // actually OK (unlike in the stored fields case)
+ // because, although IieldInfos.hasVectors() will return
+ // true, the TermVectorsReader gracefully handles
+ // non-existence of the term vectors files.
+ if (tvx != null)
+ {
+
+ if (state.numDocsInStore > 0)
+ // In case there are some final documents that we
+ // didn't see (because they hit a non-aborting exception):
+ Fill(state.numDocsInStore - docWriter.DocStoreOffset);
+
+ tvx.Flush();
+ tvd.Flush();
+ tvf.Flush();
+ }
+
+ foreach(var entry in threadsAndFields)
+ {
+ foreach(var field in entry.Value)
+ {
+ TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)field;
+ perField.termsHashPerField.Reset();
+ perField.ShrinkHash();
+ }
+
+ TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key;
+ perThread.termsHashPerThread.Reset(true);
+ }
+ }
+ }
+
+ internal override void CloseDocStore(SegmentWriteState state)
+ {
+ lock (this)
+ {
+ if (tvx != null)
+ {
+ // At least one doc in this run had term vectors
+ // enabled
+ Fill(state.numDocsInStore - docWriter.DocStoreOffset);
+ tvx.Close();
+ tvf.Close();
+ tvd.Close();
+ tvx = null;
+ System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
+ System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
+ if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName))
+ throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
+
+ state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+ state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+ state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+
+ docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+ docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+ docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+
+ lastDocID = 0;
+ }
+ }
+ }
+
+ internal int allocCount;
+
+ internal PerDoc GetPerDoc()
+ {
+ lock (this)
+ {
+ if (freeCount == 0)
+ {
+ allocCount++;
+ if (allocCount > docFreeList.Length)
+ {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
+ docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
+ }
+ return new PerDoc(this);
+ }
+ else
+ return docFreeList[--freeCount];
+ }
+ }
+
+ /// <summary>Fills in no-term-vectors for all docs we haven't seen
+ /// since the last doc that had term vectors.
+ /// </summary>
+ internal void Fill(int docID)
+ {
+ int docStoreOffset = docWriter.DocStoreOffset;
+ int end = docID + docStoreOffset;
+ if (lastDocID < end)
+ {
+ long tvfPosition = tvf.FilePointer;
+ while (lastDocID < end)
+ {
+ tvx.WriteLong(tvd.FilePointer);
+ tvd.WriteVInt(0);
+ tvx.WriteLong(tvfPosition);
+ lastDocID++;
+ }
+ }
+ }
+
+ internal void InitTermVectorsWriter()
+ {
+ lock (this)
+ {
+ if (tvx == null)
+ {
+
+ System.String docStoreSegment = docWriter.DocStoreSegment;
+
+ if (docStoreSegment == null)
+ return ;
+
+ System.Diagnostics.Debug.Assert(docStoreSegment != null);
+
+ // If we hit an exception while init'ing the term
+ // vector output files, we must abort this segment
+ // because those files will be in an unknown
+ // state:
+ tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+ tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+ tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+
+ tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+ tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+ tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+
+ docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+ docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+ docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+
+ lastDocID = 0;
+ }
+ }
+ }
+
+ internal void FinishDocument(PerDoc perDoc)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start"));
+
+ InitTermVectorsWriter();
+
+ Fill(perDoc.docID);
+
+ // Append term vectors to the real outputs:
+ tvx.WriteLong(tvd.FilePointer);
+ tvx.WriteLong(tvf.FilePointer);
+ tvd.WriteVInt(perDoc.numVectorFields);
+ if (perDoc.numVectorFields > 0)
+ {
+ for (int i = 0; i < perDoc.numVectorFields; i++)
+ tvd.WriteVInt(perDoc.fieldNumbers[i]);
+ System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]);
+ long lastPos = perDoc.fieldPointers[0];
+ for (int i = 1; i < perDoc.numVectorFields; i++)
+ {
+ long pos = perDoc.fieldPointers[i];
+ tvd.WriteVLong(pos - lastPos);
+ lastPos = pos;
+ }
+ perDoc.perDocTvf.WriteTo(tvf);
+ perDoc.numVectorFields = 0;
+ }
+
+ System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset);
+
+ lastDocID++;
+ perDoc.Reset();
+ Free(perDoc);
+ System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end"));
+ }
+ }
+
+ public bool FreeRAM()
+ {
+ // We don't hold any state beyond one doc, so we don't
+ // free persistent RAM here
+ return false;
+ }
+
+ public override void Abort()
+ {
+ if (tvx != null)
+ {
+ try
+ {
+ tvx.Close();
+ }
+ catch (System.Exception)
+ {
+ }
+ tvx = null;
+ }
+ if (tvd != null)
+ {
+ try
+ {
+ tvd.Close();
+ }
+ catch (System.Exception)
+ {
+ }
+ tvd = null;
+ }
+ if (tvf != null)
+ {
+ try
+ {
+ tvf.Close();
+ }
+ catch (System.Exception)
+ {
+ }
+ tvf = null;
+ }
+ lastDocID = 0;
+ }
+
+ internal void Free(PerDoc doc)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
+ docFreeList[freeCount++] = doc;
+ }
+ }
+
+ internal class PerDoc:DocumentsWriter.DocWriter
+ {
+ public PerDoc(TermVectorsTermsWriter enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(TermVectorsTermsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ buffer = enclosingInstance.docWriter.NewPerDocBuffer();
+ perDocTvf = new RAMOutputStream(buffer);
+ }
+ private TermVectorsTermsWriter enclosingInstance;
+ public TermVectorsTermsWriter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal DocumentsWriter.PerDocBuffer buffer;
+ internal RAMOutputStream perDocTvf;
+ internal int numVectorFields;
+
+ internal int[] fieldNumbers = new int[1];
+ internal long[] fieldPointers = new long[1];
+
+ internal void Reset()
+ {
+ perDocTvf.Reset();
+ buffer.Recycle();
+ numVectorFields = 0;
+ }
+
+ public override void Abort()
+ {
+ Reset();
+ Enclosing_Instance.Free(this);
+ }
+
+ internal void AddField(int fieldNumber)
+ {
+ if (numVectorFields == fieldNumbers.Length)
+ {
+ fieldNumbers = ArrayUtil.Grow(fieldNumbers);
+ fieldPointers = ArrayUtil.Grow(fieldPointers);
+ }
+ fieldNumbers[numVectorFields] = fieldNumber;
+ fieldPointers[numVectorFields] = perDocTvf.FilePointer;
+ numVectorFields++;
+ }
+
+ public override long SizeInBytes()
+ {
+ return buffer.SizeInBytes;
+ }
+
+ public override void Finish()
+ {
+ Enclosing_Instance.FinishDocument(this);
+ }
+ }
+
+ internal sealed class PostingList:RawPostingList
+ {
+ internal int freq; // How many times this term occurred in the current doc
+ internal int lastOffset; // Last offset we saw
+ internal int lastPosition; // Last position where this term occurred
+ }
+
+ internal override int BytesPerPosting()
+ {
+ return RawPostingList.BYTES_SIZE + 3 * DocumentsWriter.INT_NUM_BYTE;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorsTermsWriterPerField.cs b/src/core/Index/TermVectorsTermsWriterPerField.cs
new file mode 100644
index 0000000..e6bb827
--- /dev/null
+++ b/src/core/Index/TermVectorsTermsWriterPerField.cs
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Documents;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class TermVectorsTermsWriterPerField:TermsHashConsumerPerField
+ {
+
+ internal TermVectorsTermsWriterPerThread perThread;
+ internal TermsHashPerField termsHashPerField;
+ internal TermVectorsTermsWriter termsWriter;
+ internal FieldInfo fieldInfo;
+ internal DocumentsWriter.DocState docState;
+ internal FieldInvertState fieldState;
+
+ internal bool doVectors;
+ internal bool doVectorPositions;
+ internal bool doVectorOffsets;
+
+ internal int maxNumPostings;
+ internal IOffsetAttribute offsetAttribute = null;
+
+ public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.termsHashPerField = termsHashPerField;
+ this.perThread = perThread;
+ this.termsWriter = perThread.termsWriter;
+ this.fieldInfo = fieldInfo;
+ docState = termsHashPerField.docState;
+ fieldState = termsHashPerField.fieldState;
+ }
+
+ internal override int GetStreamCount()
+ {
+ return 2;
+ }
+
+ internal override bool Start(IFieldable[] fields, int count)
+ {
+ doVectors = false;
+ doVectorPositions = false;
+ doVectorOffsets = false;
+
+ for (int i = 0; i < count; i++)
+ {
+ IFieldable field = fields[i];
+ if (field.IsIndexed && field.IsTermVectorStored)
+ {
+ doVectors = true;
+ doVectorPositions |= field.IsStorePositionWithTermVector;
+ doVectorOffsets |= field.IsStoreOffsetWithTermVector;
+ }
+ }
+
+ if (doVectors)
+ {
+ if (perThread.doc == null)
+ {
+ perThread.doc = termsWriter.GetPerDoc();
+ perThread.doc.docID = docState.docID;
+ System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0);
+ System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.Length);
+ System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.FilePointer);
+ }
+
+ System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID);
+ if (termsHashPerField.numPostings != 0)
+ {
+ // Only necessary if previous doc hit a
+ // non-aborting exception while writing vectors in
+ // this field:
+ termsHashPerField.Reset();
+ perThread.termsHashPerThread.Reset(false);
+ }
+ }
+
+ // TODO: only if needed for performance
+ //perThread.postingsCount = 0;
+
+ return doVectors;
+ }
+
+ public void Abort()
+ {
+ }
+
+ /// <summary>Called once per field per document if term vectors
+ /// are enabled, to write the vectors to
+ /// RAMOutputStream, which is then quickly flushed to
+ /// the real term vectors files in the Directory.
+ /// </summary>
+ internal override void Finish()
+ {
+
+ System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start"));
+
+ int numPostings = termsHashPerField.numPostings;
+
+ System.Diagnostics.Debug.Assert(numPostings >= 0);
+
+ if (!doVectors || numPostings == 0)
+ return ;
+
+ if (numPostings > maxNumPostings)
+ maxNumPostings = numPostings;
+
+ IndexOutput tvf = perThread.doc.perDocTvf;
+
+ // This is called once, after inverting all occurences
+ // of a given field in the doc. At this point we flush
+ // our hash into the DocWriter.
+
+ System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector);
+ System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo));
+
+ perThread.doc.AddField(termsHashPerField.fieldInfo.number);
+
+ RawPostingList[] postings = termsHashPerField.SortPostings();
+
+ tvf.WriteVInt(numPostings);
+ byte bits = (byte) (0x0);
+ if (doVectorPositions)
+ bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
+ if (doVectorOffsets)
+ bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
+ tvf.WriteByte(bits);
+
+ int encoderUpto = 0;
+ int lastTermBytesCount = 0;
+
+ ByteSliceReader reader = perThread.vectorSliceReader;
+ char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers;
+ for (int j = 0; j < numPostings; j++)
+ {
+ TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList) postings[j];
+ int freq = posting.freq;
+
+ char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+ // We swap between two encoders to save copying
+ // last Term's byte array
+ UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto];
+
+ // TODO: we could do this incrementally
+ UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result);
+ int termBytesCount = utf8Result.length;
+
+ // TODO: UTF16toUTF8 could tell us this prefix
+ // Compute common prefix between last term and
+ // this term
+ int prefix = 0;
+ if (j > 0)
+ {
+ byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result;
+ byte[] termBytes = perThread.utf8Results[encoderUpto].result;
+ while (prefix < lastTermBytesCount && prefix < termBytesCount)
+ {
+ if (lastTermBytes[prefix] != termBytes[prefix])
+ break;
+ prefix++;
+ }
+ }
+ encoderUpto = 1 - encoderUpto;
+ lastTermBytesCount = termBytesCount;
+
+ int suffix = termBytesCount - prefix;
+ tvf.WriteVInt(prefix);
+ tvf.WriteVInt(suffix);
+ tvf.WriteBytes(utf8Result.result, prefix, suffix);
+ tvf.WriteVInt(freq);
+
+ if (doVectorPositions)
+ {
+ termsHashPerField.InitReader(reader, posting, 0);
+ reader.WriteTo(tvf);
+ }
+
+ if (doVectorOffsets)
+ {
+ termsHashPerField.InitReader(reader, posting, 1);
+ reader.WriteTo(tvf);
+ }
+ }
+
+ termsHashPerField.Reset();
+
+ // NOTE: we clear, per-field, at the thread level,
+ // because term vectors fully write themselves on each
+ // field; this saves RAM (eg if large doc has two large
+ // fields w/ term vectors on) because we recycle/reuse
+ // all RAM after each field:
+ perThread.termsHashPerThread.Reset(false);
+ }
+
+ internal void ShrinkHash()
+ {
+ termsHashPerField.ShrinkHash(maxNumPostings);
+ maxNumPostings = 0;
+ }
+
+ internal override void Start(IFieldable f)
+ {
+ if (doVectorOffsets)
+ {
+ offsetAttribute = fieldState.attributeSource.AddAttribute<IOffsetAttribute>();
+ }
+ else
+ {
+ offsetAttribute = null;
+ }
+ }
+
+ internal override void NewTerm(RawPostingList p0)
+ {
+
+ System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.newTerm start"));
+
+ TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0;
+
+ p.freq = 1;
+
+ if (doVectorOffsets)
+ {
+ int startOffset = fieldState.offset + offsetAttribute.StartOffset; ;
+ int endOffset = fieldState.offset + offsetAttribute.EndOffset;
+
+ termsHashPerField.WriteVInt(1, startOffset);
+ termsHashPerField.WriteVInt(1, endOffset - startOffset);
+ p.lastOffset = endOffset;
+ }
+
+ if (doVectorPositions)
+ {
+ termsHashPerField.WriteVInt(0, fieldState.position);
+ p.lastPosition = fieldState.position;
+ }
+ }
+
+ internal override void AddTerm(RawPostingList p0)
+ {
+
+ System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.addTerm start"));
+
+ TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0;
+ p.freq++;
+
+ if (doVectorOffsets)
+ {
+ int startOffset = fieldState.offset + offsetAttribute.StartOffset; ;
+ int endOffset = fieldState.offset + offsetAttribute.EndOffset;
+
+ termsHashPerField.WriteVInt(1, startOffset - p.lastOffset);
+ termsHashPerField.WriteVInt(1, endOffset - startOffset);
+ p.lastOffset = endOffset;
+ }
+
+ if (doVectorPositions)
+ {
+ termsHashPerField.WriteVInt(0, fieldState.position - p.lastPosition);
+ p.lastPosition = fieldState.position;
+ }
+ }
+
+ internal override void SkippingLongTerm()
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorsTermsWriterPerThread.cs b/src/core/Index/TermVectorsTermsWriterPerThread.cs
new file mode 100644
index 0000000..b08b920
--- /dev/null
+++ b/src/core/Index/TermVectorsTermsWriterPerThread.cs
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class TermVectorsTermsWriterPerThread:TermsHashConsumerPerThread
+ {
+
+ internal TermVectorsTermsWriter termsWriter;
+ internal TermsHashPerThread termsHashPerThread;
+ internal DocumentsWriter.DocState docState;
+
+ internal TermVectorsTermsWriter.PerDoc doc;
+
+ public TermVectorsTermsWriterPerThread(TermsHashPerThread termsHashPerThread, TermVectorsTermsWriter termsWriter)
+ {
+ this.termsWriter = termsWriter;
+ this.termsHashPerThread = termsHashPerThread;
+ docState = termsHashPerThread.docState;
+ }
+
+ // Used by perField when serializing the term vectors
+ internal ByteSliceReader vectorSliceReader = new ByteSliceReader();
+
+ internal UnicodeUtil.UTF8Result[] utf8Results = new UnicodeUtil.UTF8Result[]{new UnicodeUtil.UTF8Result(), new UnicodeUtil.UTF8Result()};
+
+ public override void StartDocument()
+ {
+ System.Diagnostics.Debug.Assert(ClearLastVectorFieldName());
+ if (doc != null)
+ {
+ doc.Reset();
+ doc.docID = docState.docID;
+ }
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ try
+ {
+ return doc;
+ }
+ finally
+ {
+ doc = null;
+ }
+ }
+
+ public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo)
+ {
+ return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ }
+
+ public override void Abort()
+ {
+ if (doc != null)
+ {
+ doc.Abort();
+ doc = null;
+ }
+ }
+
+ // Called only by assert
+ internal bool ClearLastVectorFieldName()
+ {
+ lastVectorFieldName = null;
+ return true;
+ }
+
+ // Called only by assert
+ internal System.String lastVectorFieldName;
+ internal bool VectorFieldsInOrder(FieldInfo fi)
+ {
+ try
+ {
+ if (lastVectorFieldName != null)
+ return String.CompareOrdinal(lastVectorFieldName, fi.name) < 0;
+ else
+ return true;
+ }
+ finally
+ {
+ lastVectorFieldName = fi.name;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermVectorsWriter.cs b/src/core/Index/TermVectorsWriter.cs
new file mode 100644
index 0000000..ebaa4f4
--- /dev/null
+++ b/src/core/Index/TermVectorsWriter.cs
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Directory = Lucene.Net.Store.Directory;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using StringHelper = Lucene.Net.Util.StringHelper;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+ sealed class TermVectorsWriter : IDisposable
+ {
+
+ private readonly IndexOutput tvx = null;
+ private readonly IndexOutput tvd = null;
+ private readonly IndexOutput tvf = null;
+ private readonly FieldInfos fieldInfos;
+ internal UnicodeUtil.UTF8Result[] utf8Results = new[]{new UnicodeUtil.UTF8Result(), new UnicodeUtil.UTF8Result()};
+
+ public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
+ {
+ // Open files for TermVector storage
+ tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+ tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+ tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+ tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+ tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+ tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+
+ this.fieldInfos = fieldInfos;
+ }
+
+ /// <summary> Add a complete document specified by all its term vectors. If document has no
+ /// term vectors, add value for tvx.
+ ///
+ /// </summary>
+ /// <param name="vectors">
+ /// </param>
+ /// <throws> IOException </throws>
+ public void AddAllDocVectors(ITermFreqVector[] vectors)
+ {
+
+ tvx.WriteLong(tvd.FilePointer);
+ tvx.WriteLong(tvf.FilePointer);
+
+ if (vectors != null)
+ {
+ int numFields = vectors.Length;
+ tvd.WriteVInt(numFields);
+
+ var fieldPointers = new long[numFields];
+
+ for (int i = 0; i < numFields; i++)
+ {
+ fieldPointers[i] = tvf.FilePointer;
+
+ int fieldNumber = fieldInfos.FieldNumber(vectors[i].Field);
+
+ // 1st pass: write field numbers to tvd
+ tvd.WriteVInt(fieldNumber);
+
+ int numTerms = vectors[i].Size;
+ tvf.WriteVInt(numTerms);
+
+ TermPositionVector tpVector;
+
+ byte bits;
+ bool storePositions;
+ bool storeOffsets;
+
+ if (vectors[i] is TermPositionVector)
+ {
+ // May have positions & offsets
+ tpVector = (TermPositionVector) vectors[i];
+ storePositions = tpVector.Size > 0 && tpVector.GetTermPositions(0) != null;
+ storeOffsets = tpVector.Size > 0 && tpVector.GetOffsets(0) != null;
+ bits = (byte) ((storePositions?TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR: (byte) 0) + (storeOffsets?TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR: (byte) 0));
+ }
+ else
+ {
+ tpVector = null;
+ bits = 0;
+ storePositions = false;
+ storeOffsets = false;
+ }
+
+ tvf.WriteVInt(bits);
+
+ System.String[] terms = vectors[i].GetTerms();
+ int[] freqs = vectors[i].GetTermFrequencies();
+
+ int utf8Upto = 0;
+ utf8Results[1].length = 0;
+
+ for (int j = 0; j < numTerms; j++)
+ {
+
+ UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]);
+
+ int start = StringHelper.BytesDifference(utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length);
+ int length = utf8Results[utf8Upto].length - start;
+ tvf.WriteVInt(start); // write shared prefix length
+ tvf.WriteVInt(length); // write delta length
+ tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes
+ utf8Upto = 1 - utf8Upto;
+
+ int termFreq = freqs[j];
+
+ tvf.WriteVInt(termFreq);
+
+ if (storePositions)
+ {
+ int[] positions = tpVector.GetTermPositions(j);
+ if (positions == null)
+ throw new System.SystemException("Trying to write positions that are null!");
+ System.Diagnostics.Debug.Assert(positions.Length == termFreq);
+
+ // use delta encoding for positions
+ int lastPosition = 0;
+ foreach (int position in positions)
+ {
+ tvf.WriteVInt(position - lastPosition);
+ lastPosition = position;
+ }
+ }
+
+ if (storeOffsets)
+ {
+ TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j);
+ if (offsets == null)
+ throw new System.SystemException("Trying to write offsets that are null!");
+ System.Diagnostics.Debug.Assert(offsets.Length == termFreq);
+
+ // use delta encoding for offsets
+ int lastEndOffset = 0;
+ foreach (TermVectorOffsetInfo t in offsets)
+ {
+ int startOffset = t.StartOffset;
+ int endOffset = t.EndOffset;
+ tvf.WriteVInt(startOffset - lastEndOffset);
+ tvf.WriteVInt(endOffset - startOffset);
+ lastEndOffset = endOffset;
+ }
+ }
+ }
+ }
+
+ // 2nd pass: write field pointers to tvd
+ if (numFields > 1)
+ {
+ long lastFieldPointer = fieldPointers[0];
+ for (int i = 1; i < numFields; i++)
+ {
+ long fieldPointer = fieldPointers[i];
+ tvd.WriteVLong(fieldPointer - lastFieldPointer);
+ lastFieldPointer = fieldPointer;
+ }
+ }
+ }
+ else
+ tvd.WriteVInt(0);
+ }
+
+ /// <summary> Do a bulk copy of numDocs documents from reader to our
+ /// streams. This is used to expedite merging, if the
+ /// field numbers are congruent.
+ /// </summary>
+ internal void AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
+ {
+ long tvdPosition = tvd.FilePointer;
+ long tvfPosition = tvf.FilePointer;
+ long tvdStart = tvdPosition;
+ long tvfStart = tvfPosition;
+ for (int i = 0; i < numDocs; i++)
+ {
+ tvx.WriteLong(tvdPosition);
+ tvdPosition += tvdLengths[i];
+ tvx.WriteLong(tvfPosition);
+ tvfPosition += tvfLengths[i];
+ }
+ tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart);
+ tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart);
+ System.Diagnostics.Debug.Assert(tvd.FilePointer == tvdPosition);
+ System.Diagnostics.Debug.Assert(tvf.FilePointer == tvfPosition);
+ }
+
+ /// <summary>Close all streams. </summary>
+ public void Dispose()
+ {
+ // Move to a protected method if class becomes unsealed
+
+ // make an effort to close all streams we can but remember and re-throw
+ // the first exception encountered in this process
+ System.IO.IOException keep = null;
+ if (tvx != null)
+ try
+ {
+ tvx.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ keep = e;
+ }
+ if (tvd != null)
+ try
+ {
+ tvd.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (tvf != null)
+ try
+ {
+ tvf.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (keep != null)
+ {
+ throw new System.IO.IOException(keep.StackTrace);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermsHash.cs b/src/core/Index/TermsHash.cs
new file mode 100644
index 0000000..97ae1eb
--- /dev/null
+++ b/src/core/Index/TermsHash.cs
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary>This class implements <see cref="InvertedDocConsumer" />, which
+ /// is passed each token produced by the analyzer on each
+ /// field. It stores these tokens in a hash table, and
+ /// allocates separate byte streams per token. Consumers of
+ /// this class, eg <see cref="FreqProxTermsWriter" /> and <see cref="TermVectorsTermsWriter" />
+ ///, write their own byte streams
+ /// under each term.
+ /// </summary>
+ sealed class TermsHash : InvertedDocConsumer
+ {
+
+ internal TermsHashConsumer consumer;
+ internal TermsHash nextTermsHash;
+ internal int bytesPerPosting;
+ internal int postingsFreeChunk;
+ internal DocumentsWriter docWriter;
+ private RawPostingList[] postingsFreeList = new RawPostingList[1];
+ private int postingsFreeCount;
+ private int postingsAllocCount;
+ internal bool trackAllocations;
+
+ public TermsHash(DocumentsWriter docWriter, bool trackAllocations, TermsHashConsumer consumer, TermsHash nextTermsHash)
+ {
+ this.docWriter = docWriter;
+ this.consumer = consumer;
+ this.nextTermsHash = nextTermsHash;
+ this.trackAllocations = trackAllocations;
+
+ // Why + 4*POINTER_NUM_BYTE below?
+ // +1: Posting is referenced by postingsFreeList array
+ // +3: Posting is referenced by hash, which
+ // targets 25-50% fill factor; approximate this
+ // as 3X # pointers
+ bytesPerPosting = consumer.BytesPerPosting() + 4 * DocumentsWriter.POINTER_NUM_BYTE;
+ postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting);
+ }
+
+ internal override InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread)
+ {
+ return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
+ }
+
+ internal TermsHashPerThread AddThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread)
+ {
+ return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
+ }
+
+ internal override void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ this.fieldInfos = fieldInfos;
+ consumer.SetFieldInfos(fieldInfos);
+ }
+
+ // NOTE: do not make this sync'd; it's not necessary (DW
+ // ensures all other threads are idle), and it leads to
+ // deadlock
+ public override void Abort()
+ {
+ consumer.Abort();
+ if (nextTermsHash != null)
+ nextTermsHash.Abort();
+ }
+
+ internal void ShrinkFreePostings(IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state)
+ {
+
+ System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);
+
+ int newSize = 1;
+ if (newSize != postingsFreeList.Length)
+ {
+ if (postingsFreeCount > newSize)
+ {
+ if (trackAllocations)
+ {
+ docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting);
+ }
+ postingsFreeCount = newSize;
+ postingsAllocCount = newSize;
+ }
+
+ RawPostingList[] newArray = new RawPostingList[newSize];
+ Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
+ postingsFreeList = newArray;
+ }
+ }
+
+ internal override void CloseDocStore(SegmentWriteState state)
+ {
+ lock (this)
+ {
+ consumer.CloseDocStore(state);
+ if (nextTermsHash != null)
+ nextTermsHash.CloseDocStore(state);
+ }
+ }
+
+ internal override void Flush(IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state)
+ {
+ lock (this)
+ {
+ var childThreadsAndFields = new Dictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>>();
+ Dictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> nextThreadsAndFields;
+
+ if (nextTermsHash != null)
+ {
+ nextThreadsAndFields = new Dictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>>();
+ }
+ else
+ nextThreadsAndFields = null;
+
+ foreach (var entry in threadsAndFields)
+ {
+ TermsHashPerThread perThread = (TermsHashPerThread) entry.Key;
+
+ ICollection<InvertedDocConsumerPerField> fields = entry.Value;
+
+ var fieldsIt = fields.GetEnumerator();
+ ICollection<TermsHashConsumerPerField> childFields = new HashSet<TermsHashConsumerPerField>();
+ ICollection<InvertedDocConsumerPerField> nextChildFields;
+
+ if (nextTermsHash != null)
+ {
+ nextChildFields = new HashSet<InvertedDocConsumerPerField>();
+ }
+ else
+ nextChildFields = null;
+
+ while (fieldsIt.MoveNext())
+ {
+ TermsHashPerField perField = (TermsHashPerField) fieldsIt.Current;
+ childFields.Add(perField.consumer);
+ if (nextTermsHash != null)
+ nextChildFields.Add(perField.nextPerField);
+ }
+
+ childThreadsAndFields[perThread.consumer] = childFields;
+ if (nextTermsHash != null)
+ nextThreadsAndFields[perThread.nextPerThread] = nextChildFields;
+ }
+
+ consumer.Flush(childThreadsAndFields, state);
+
+ ShrinkFreePostings(threadsAndFields, state);
+
+ if (nextTermsHash != null)
+ nextTermsHash.Flush(nextThreadsAndFields, state);
+ }
+ }
+
+ public override bool FreeRAM()
+ {
+ if (!trackAllocations)
+ return false;
+
+ bool any;
+ long bytesFreed = 0;
+ lock (this)
+ {
+ int numToFree;
+ if (postingsFreeCount >= postingsFreeChunk)
+ numToFree = postingsFreeChunk;
+ else
+ numToFree = postingsFreeCount;
+ any = numToFree > 0;
+ if (any)
+ {
+ for (int i = postingsFreeCount - numToFree; i < postingsFreeCount; i++)
+ {
+ postingsFreeList[i] = null;
+ }
+ //Arrays.fill(postingsFreeList, postingsFreeCount - numToFree, postingsFreeCount, null);
+ postingsFreeCount -= numToFree;
+ postingsAllocCount -= numToFree;
+ bytesFreed = -numToFree * bytesPerPosting;
+ any = true;
+ }
+ }
+
+ if (any)
+ {
+ docWriter.BytesAllocated(bytesFreed);
+ }
+
+ if (nextTermsHash != null)
+ any |= nextTermsHash.FreeRAM();
+
+ return any;
+ }
+
+ public void RecyclePostings(RawPostingList[] postings, int numPostings)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(postings.Length >= numPostings);
+
+ // Move all Postings from this ThreadState back to our
+ // free list. We pre-allocated this array while we were
+ // creating Postings to make sure it's large enough
+ System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length);
+ Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
+ postingsFreeCount += numPostings;
+ }
+ }
+
+ public void GetPostings(RawPostingList[] postings)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start"));
+
+ System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length);
+ System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount);
+
+ int numToCopy;
+ if (postingsFreeCount < postings.Length)
+ numToCopy = postingsFreeCount;
+ else
+ numToCopy = postings.Length;
+ int start = postingsFreeCount - numToCopy;
+ System.Diagnostics.Debug.Assert(start >= 0);
+ System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length);
+ System.Diagnostics.Debug.Assert(numToCopy <= postings.Length);
+ Array.Copy(postingsFreeList, start, postings, 0, numToCopy);
+
+ // Directly allocate the remainder if any
+ if (numToCopy != postings.Length)
+ {
+ int extra = postings.Length - numToCopy;
+ int newPostingsAllocCount = postingsAllocCount + extra;
+
+ consumer.CreatePostings(postings, numToCopy, extra);
+ System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create"));
+ postingsAllocCount += extra;
+
+ if (trackAllocations)
+ docWriter.BytesAllocated(extra * bytesPerPosting);
+
+ if (newPostingsAllocCount > postingsFreeList.Length)
+ // Pre-allocate the postingsFreeList so it's large
+ // enough to hold all postings we've given out
+ postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)];
+ }
+
+ postingsFreeCount -= numToCopy;
+
+ if (trackAllocations)
+ docWriter.BytesUsed(postings.Length * bytesPerPosting);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermsHashConsumer.cs b/src/core/Index/TermsHashConsumer.cs
new file mode 100644
index 0000000..2107375
--- /dev/null
+++ b/src/core/Index/TermsHashConsumer.cs
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class TermsHashConsumer
+ {
+ internal abstract int BytesPerPosting();
+ internal abstract void CreatePostings(RawPostingList[] postings, int start, int count);
+ public abstract TermsHashConsumerPerThread AddThread(TermsHashPerThread perThread);
+ public abstract void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state);
+ public abstract void Abort();
+ internal abstract void CloseDocStore(SegmentWriteState state);
+
+ internal FieldInfos fieldInfos;
+
+ internal virtual void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ this.fieldInfos = fieldInfos;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermsHashConsumerPerField.cs b/src/core/Index/TermsHashConsumerPerField.cs
new file mode 100644
index 0000000..11002cb
--- /dev/null
+++ b/src/core/Index/TermsHashConsumerPerField.cs
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ /// <summary>Implement this class to plug into the TermsHash
+ /// processor, which inverts and stores Tokens into a hash
+ /// table and provides an API for writing bytes into
+ /// multiple streams for each unique Token.
+ /// </summary>
+ abstract class TermsHashConsumerPerField
+ {
+ internal abstract bool Start(IFieldable[] fields, int count);
+ internal abstract void Finish();
+ internal abstract void SkippingLongTerm();
+ internal abstract void Start(IFieldable field);
+ internal abstract void NewTerm(RawPostingList p);
+ internal abstract void AddTerm(RawPostingList p);
+ internal abstract int GetStreamCount();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermsHashConsumerPerThread.cs b/src/core/Index/TermsHashConsumerPerThread.cs
new file mode 100644
index 0000000..fd98eaa
--- /dev/null
+++ b/src/core/Index/TermsHashConsumerPerThread.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class TermsHashConsumerPerThread
+ {
+ public abstract void StartDocument();
+ public abstract DocumentsWriter.DocWriter FinishDocument();
+ abstract public TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
+ abstract public void Abort();
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermsHashPerField.cs b/src/core/Index/TermsHashPerField.cs
new file mode 100644
index 0000000..87c1352
--- /dev/null
+++ b/src/core/Index/TermsHashPerField.cs
@@ -0,0 +1,639 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class TermsHashPerField:InvertedDocConsumerPerField
+ {
+ private void InitBlock()
+ {
+ postingsHashHalfSize = postingsHashSize / 2;
+ postingsHashMask = postingsHashSize - 1;
+ postingsHash = new RawPostingList[postingsHashSize];
+ }
+
+ internal TermsHashConsumerPerField consumer;
+ internal TermsHashPerField nextPerField;
+ internal TermsHashPerThread perThread;
+ internal DocumentsWriter.DocState docState;
+ internal FieldInvertState fieldState;
+ internal ITermAttribute termAtt;
+
+ // Copied from our perThread
+ internal CharBlockPool charPool;
+ internal IntBlockPool intPool;
+ internal ByteBlockPool bytePool;
+
+ internal int streamCount;
+ internal int numPostingInt;
+
+ internal FieldInfo fieldInfo;
+
+ internal bool postingsCompacted;
+ internal int numPostings;
+ private int postingsHashSize = 4;
+ private int postingsHashHalfSize;
+ private int postingsHashMask;
+ private RawPostingList[] postingsHash;
+ private RawPostingList p;
+
+ public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHashPerThread perThread, TermsHashPerThread nextPerThread, FieldInfo fieldInfo)
+ {
+ InitBlock();
+ this.perThread = perThread;
+ intPool = perThread.intPool;
+ charPool = perThread.charPool;
+ bytePool = perThread.bytePool;
+ docState = perThread.docState;
+ fieldState = docInverterPerField.fieldState;
+ this.consumer = perThread.consumer.AddField(this, fieldInfo);
+ streamCount = consumer.GetStreamCount();
+ numPostingInt = 2 * streamCount;
+ this.fieldInfo = fieldInfo;
+ if (nextPerThread != null)
+ nextPerField = (TermsHashPerField) nextPerThread.AddField(docInverterPerField, fieldInfo);
+ else
+ nextPerField = null;
+ }
+
+ internal void ShrinkHash(int targetSize)
+ {
+ System.Diagnostics.Debug.Assert(postingsCompacted || numPostings == 0);
+
+ int newSize = 4;
+
+ if (newSize != postingsHash.Length)
+ {
+ postingsHash = new RawPostingList[newSize];
+ postingsHashSize = newSize;
+ postingsHashHalfSize = newSize / 2;
+ postingsHashMask = newSize - 1;
+ }
+ System.Array.Clear(postingsHash,0,postingsHash.Length);
+ }
+
+ public void Reset()
+ {
+ if (!postingsCompacted)
+ CompactPostings();
+ System.Diagnostics.Debug.Assert(numPostings <= postingsHash.Length);
+ if (numPostings > 0)
+ {
+ perThread.termsHash.RecyclePostings(postingsHash, numPostings);
+ Array.Clear(postingsHash, 0, numPostings);
+ numPostings = 0;
+ }
+ postingsCompacted = false;
+ if (nextPerField != null)
+ nextPerField.Reset();
+ }
+
+ public override void Abort()
+ {
+ lock (this)
+ {
+ Reset();
+ if (nextPerField != null)
+ nextPerField.Abort();
+ }
+ }
+
+ public void InitReader(ByteSliceReader reader, RawPostingList p, int stream)
+ {
+ System.Diagnostics.Debug.Assert(stream < streamCount);
+ int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
+ int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
+ reader.Init(bytePool, p.byteStart + stream * ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto + stream]);
+ }
+
+ private void CompactPostings()
+ {
+ lock (this)
+ {
+ int upto = 0;
+ for (int i = 0; i < postingsHashSize; i++)
+ {
+ if (postingsHash[i] != null)
+ {
+ if (upto < i)
+ {
+ postingsHash[upto] = postingsHash[i];
+ postingsHash[i] = null;
+ }
+ upto++;
+ }
+ }
+
+ System.Diagnostics.Debug.Assert(upto == numPostings);
+ postingsCompacted = true;
+ }
+ }
+
+ /// <summary>Collapse the hash table &amp; sort in-place. </summary>
+ public RawPostingList[] SortPostings()
+ {
+ CompactPostings();
+ QuickSort(postingsHash, 0, numPostings - 1);
+ return postingsHash;
+ }
+
+ internal void QuickSort(RawPostingList[] postings, int lo, int hi)
+ {
+ if (lo >= hi)
+ return ;
+ else if (hi == 1 + lo)
+ {
+ if (ComparePostings(postings[lo], postings[hi]) > 0)
+ {
+ RawPostingList tmp = postings[lo];
+ postings[lo] = postings[hi];
+ postings[hi] = tmp;
+ }
+ return ;
+ }
+
+ int mid = Number.URShift((lo + hi), 1);
+
+ if (ComparePostings(postings[lo], postings[mid]) > 0)
+ {
+ RawPostingList tmp = postings[lo];
+ postings[lo] = postings[mid];
+ postings[mid] = tmp;
+ }
+
+ if (ComparePostings(postings[mid], postings[hi]) > 0)
+ {
+ RawPostingList tmp = postings[mid];
+ postings[mid] = postings[hi];
+ postings[hi] = tmp;
+
+ if (ComparePostings(postings[lo], postings[mid]) > 0)
+ {
+ RawPostingList tmp2 = postings[lo];
+ postings[lo] = postings[mid];
+ postings[mid] = tmp2;
+ }
+ }
+
+ int left = lo + 1;
+ int right = hi - 1;
+
+ if (left >= right)
+ return ;
+
+ RawPostingList partition = postings[mid];
+
+ for (; ; )
+ {
+ while (ComparePostings(postings[right], partition) > 0)
+ --right;
+
+ while (left < right && ComparePostings(postings[left], partition) <= 0)
+ ++left;
+
+ if (left < right)
+ {
+ RawPostingList tmp = postings[left];
+ postings[left] = postings[right];
+ postings[right] = tmp;
+ --right;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ QuickSort(postings, lo, left);
+ QuickSort(postings, left + 1, hi);
+ }
+
+ /// <summary>Compares term text for two Posting instance and
+ /// returns -1 if p1 &lt; p2; 1 if p1 &gt; p2; else 0.
+ /// </summary>
+ internal int ComparePostings(RawPostingList p1, RawPostingList p2)
+ {
+
+ if (p1 == p2)
+ return 0;
+
+ char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+ char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+ System.Diagnostics.Debug.Assert(text1 != text2 || pos1 != pos2);
+
+ while (true)
+ {
+ char c1 = text1[pos1++];
+ char c2 = text2[pos2++];
+ if (c1 != c2)
+ {
+ if (0xffff == c2)
+ return 1;
+ else if (0xffff == c1)
+ return - 1;
+ else
+ return c1 - c2;
+ }
+ else
+ // This method should never compare equal postings
+ // unless p1==p2
+ System.Diagnostics.Debug.Assert(c1 != 0xffff);
+ }
+ }
+
+ /// <summary>Test whether the text for current RawPostingList p equals
+ /// current tokenText.
+ /// </summary>
+ private bool PostingEquals(char[] tokenText, int tokenTextLen)
+ {
+
+ char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ System.Diagnostics.Debug.Assert(text != null);
+ int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+ int tokenPos = 0;
+ for (; tokenPos < tokenTextLen; pos++, tokenPos++)
+ if (tokenText[tokenPos] != text[pos])
+ return false;
+ return 0xffff == text[pos];
+ }
+
+ private bool doCall;
+ private bool doNextCall;
+
+ internal override void Start(IFieldable f)
+ {
+ termAtt = fieldState.attributeSource.AddAttribute<ITermAttribute>();
+ consumer.Start(f);
+ if (nextPerField != null)
+ {
+ nextPerField.Start(f);
+ }
+ }
+
+ internal override bool Start(IFieldable[] fields, int count)
+ {
+ doCall = consumer.Start(fields, count);
+ if (nextPerField != null)
+ doNextCall = nextPerField.Start(fields, count);
+ return doCall || doNextCall;
+ }
+
+ // Secondary entry point (for 2nd & subsequent TermsHash),
+ // because token text has already been "interned" into
+ // textStart, so we hash by textStart
+ public void Add(int textStart)
+ {
+
+ int code = textStart;
+
+ int hashPos = code & postingsHashMask;
+
+ System.Diagnostics.Debug.Assert(!postingsCompacted);
+
+ // Locate RawPostingList in hash
+ p = postingsHash[hashPos];
+
+ if (p != null && p.textStart != textStart)
+ {
+ // Conflict: keep searching different locations in
+ // the hash table.
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ hashPos = code & postingsHashMask;
+ p = postingsHash[hashPos];
+ }
+ while (p != null && p.textStart != textStart);
+ }
+
+ if (p == null)
+ {
+
+ // First time we are seeing this token since we last
+ // flushed the hash.
+
+ // Refill?
+ if (0 == perThread.freePostingsCount)
+ perThread.MorePostings();
+
+ // Pull next free RawPostingList from free list
+ p = perThread.freePostings[--perThread.freePostingsCount];
+ System.Diagnostics.Debug.Assert(p != null);
+
+ p.textStart = textStart;
+
+ System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null);
+ postingsHash[hashPos] = p;
+ numPostings++;
+
+ if (numPostings == postingsHashHalfSize)
+ RehashPostings(2 * postingsHashSize);
+
+ // Init stream slices
+ if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
+ intPool.NextBuffer();
+
+ if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE)
+ bytePool.NextBuffer();
+
+ intUptos = intPool.buffer;
+ intUptoStart = intPool.intUpto;
+ intPool.intUpto += streamCount;
+
+ p.intStart = intUptoStart + intPool.intOffset;
+
+ for (int i = 0; i < streamCount; i++)
+ {
+ int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
+ intUptos[intUptoStart + i] = upto + bytePool.byteOffset;
+ }
+ p.byteStart = intUptos[intUptoStart];
+
+ consumer.NewTerm(p);
+ }
+ else
+ {
+ intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
+ intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
+ consumer.AddTerm(p);
+ }
+ }
+
+ // Primary entry point (for first TermsHash)
+ internal override void Add()
+ {
+
+ System.Diagnostics.Debug.Assert(!postingsCompacted);
+
+ // We are first in the chain so we must "intern" the
+ // term text into textStart address
+
+ // Get the text of this term.
+ char[] tokenText = termAtt.TermBuffer();
+ ;
+ int tokenTextLen = termAtt.TermLength();
+
+ // Compute hashcode & replace any invalid UTF16 sequences
+ int downto = tokenTextLen;
+ int code = 0;
+ while (downto > 0)
+ {
+ char ch = tokenText[--downto];
+
+ if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END)
+ {
+ if (0 == downto)
+ {
+ // Unpaired
+ ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR);
+ }
+ else
+ {
+ char ch2 = tokenText[downto - 1];
+ if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END)
+ {
+ // OK: high followed by low. This is a valid
+ // surrogate pair.
+ code = ((code * 31) + ch) * 31 + ch2;
+ downto--;
+ continue;
+ }
+ else
+ {
+ // Unpaired
+ ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR);
+ }
+ }
+ }
+ else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END || ch == 0xffff))
+ {
+ // Unpaired or 0xffff
+ ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR);
+ }
+
+ code = (code * 31) + ch;
+ }
+
+ int hashPos = code & postingsHashMask;
+
+ // Locate RawPostingList in hash
+ p = postingsHash[hashPos];
+
+ if (p != null && !PostingEquals(tokenText, tokenTextLen))
+ {
+ // Conflict: keep searching different locations in
+ // the hash table.
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ hashPos = code & postingsHashMask;
+ p = postingsHash[hashPos];
+ }
+ while (p != null && !PostingEquals(tokenText, tokenTextLen));
+ }
+
+ if (p == null)
+ {
+
+ // First time we are seeing this token since we last
+ // flushed the hash.
+ int textLen1 = 1 + tokenTextLen;
+ if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE)
+ {
+ if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE)
+ {
+ // Just skip this term, to remain as robust as
+ // possible during indexing. A TokenFilter
+ // can be inserted into the analyzer chain if
+ // other behavior is wanted (pruning the term
+ // to a prefix, throwing an exception, etc).
+
+ if (docState.maxTermPrefix == null)
+ docState.maxTermPrefix = new System.String(tokenText, 0, 30);
+
+ consumer.SkippingLongTerm();
+ return ;
+ }
+ charPool.NextBuffer();
+ }
+
+ // Refill?
+ if (0 == perThread.freePostingsCount)
+ perThread.MorePostings();
+
+ // Pull next free RawPostingList from free list
+ p = perThread.freePostings[--perThread.freePostingsCount];
+ System.Diagnostics.Debug.Assert(p != null);
+
+ char[] text = charPool.buffer;
+ int textUpto = charPool.charUpto;
+ p.textStart = textUpto + charPool.charOffset;
+ charPool.charUpto += textLen1;
+ Array.Copy(tokenText, 0, text, textUpto, tokenTextLen);
+ text[textUpto + tokenTextLen] = (char) (0xffff);
+
+ System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null);
+ postingsHash[hashPos] = p;
+ numPostings++;
+
+ if (numPostings == postingsHashHalfSize)
+ RehashPostings(2 * postingsHashSize);
+
+ // Init stream slices
+ if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
+ intPool.NextBuffer();
+
+ if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE)
+ bytePool.NextBuffer();
+
+ intUptos = intPool.buffer;
+ intUptoStart = intPool.intUpto;
+ intPool.intUpto += streamCount;
+
+ p.intStart = intUptoStart + intPool.intOffset;
+
+ for (int i = 0; i < streamCount; i++)
+ {
+ int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
+ intUptos[intUptoStart + i] = upto + bytePool.byteOffset;
+ }
+ p.byteStart = intUptos[intUptoStart];
+
+ consumer.NewTerm(p);
+ }
+ else
+ {
+ intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
+ intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
+ consumer.AddTerm(p);
+ }
+
+ if (doNextCall)
+ nextPerField.Add(p.textStart);
+ }
+
+ internal int[] intUptos;
+ internal int intUptoStart;
+
+ internal void WriteByte(int stream, byte b)
+ {
+ int upto = intUptos[intUptoStart + stream];
+ byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
+ System.Diagnostics.Debug.Assert(bytes != null);
+ int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK;
+ if (bytes[offset] != 0)
+ {
+ // End of slice; allocate a new one
+ offset = bytePool.AllocSlice(bytes, offset);
+ bytes = bytePool.buffer;
+ intUptos[intUptoStart + stream] = offset + bytePool.byteOffset;
+ }
+ bytes[offset] = b;
+ (intUptos[intUptoStart + stream])++;
+ }
+
+ public void WriteBytes(int stream, byte[] b, int offset, int len)
+ {
+ // TODO: optimize
+ int end = offset + len;
+ for (int i = offset; i < end; i++)
+ WriteByte(stream, b[i]);
+ }
+
+ internal void WriteVInt(int stream, int i)
+ {
+ System.Diagnostics.Debug.Assert(stream < streamCount);
+ while ((i & ~ 0x7F) != 0)
+ {
+ WriteByte(stream, (byte) ((i & 0x7f) | 0x80));
+ i = Number.URShift(i, 7);
+ }
+ WriteByte(stream, (byte) i);
+ }
+
+ internal override void Finish()
+ {
+ consumer.Finish();
+ if (nextPerField != null)
+ nextPerField.Finish();
+ }
+
+ /// <summary>Called when postings hash is too small (> 50%
+ /// occupied) or too large (&lt; 20% occupied).
+ /// </summary>
+ internal void RehashPostings(int newSize)
+ {
+
+ int newMask = newSize - 1;
+
+ RawPostingList[] newHash = new RawPostingList[newSize];
+ for (int i = 0; i < postingsHashSize; i++)
+ {
+ RawPostingList p0 = postingsHash[i];
+ if (p0 != null)
+ {
+ int code;
+ if (perThread.primary)
+ {
+ int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+ char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ int pos = start;
+ while (text[pos] != 0xffff)
+ pos++;
+ code = 0;
+ while (pos > start)
+ code = (code * 31) + text[--pos];
+ }
+ else
+ code = p0.textStart;
+
+ int hashPos = code & newMask;
+ System.Diagnostics.Debug.Assert(hashPos >= 0);
+ if (newHash[hashPos] != null)
+ {
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ hashPos = code & newMask;
+ }
+ while (newHash[hashPos] != null);
+ }
+ newHash[hashPos] = p0;
+ }
+ }
+
+ postingsHashMask = newMask;
+ postingsHash = newHash;
+ postingsHashSize = newSize;
+ postingsHashHalfSize = newSize >> 1;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Index/TermsHashPerThread.cs b/src/core/Index/TermsHashPerThread.cs
new file mode 100644
index 0000000..f7f550c
--- /dev/null
+++ b/src/core/Index/TermsHashPerThread.cs
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class TermsHashPerThread:InvertedDocConsumerPerThread
+ {
+
+ internal TermsHash termsHash;
+ internal TermsHashConsumerPerThread consumer;
+ internal TermsHashPerThread nextPerThread;
+
+ internal CharBlockPool charPool;
+ internal IntBlockPool intPool;
+ internal ByteBlockPool bytePool;
+ internal bool primary;
+ internal DocumentsWriter.DocState docState;
+
+ internal RawPostingList[] freePostings = new RawPostingList[256];
+ internal int freePostingsCount;
+
+ public TermsHashPerThread(DocInverterPerThread docInverterPerThread, TermsHash termsHash, TermsHash nextTermsHash, TermsHashPerThread primaryPerThread)
+ {
+ docState = docInverterPerThread.docState;
+
+ this.termsHash = termsHash;
+ this.consumer = termsHash.consumer.AddThread(this);
+
+ if (nextTermsHash != null)
+ {
+ // We are primary
+ charPool = new CharBlockPool(termsHash.docWriter);
+ primary = true;
+ }
+ else
+ {
+ charPool = primaryPerThread.charPool;
+ primary = false;
+ }
+
+ intPool = new IntBlockPool(termsHash.docWriter, termsHash.trackAllocations);
+ bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator, termsHash.trackAllocations);
+
+ if (nextTermsHash != null)
+ nextPerThread = nextTermsHash.AddThread(docInverterPerThread, this);
+ else
+ nextPerThread = null;
+ }
+
+ internal override InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo)
+ {
+ return new TermsHashPerField(docInverterPerField, this, nextPerThread, fieldInfo);
+ }
+
+ public override void Abort()
+ {
+ lock (this)
+ {
+ Reset(true);
+ consumer.Abort();
+ if (nextPerThread != null)
+ nextPerThread.Abort();
+ }
+ }
+
+ // perField calls this when it needs more postings:
+ internal void MorePostings()
+ {
+ System.Diagnostics.Debug.Assert(freePostingsCount == 0);
+ termsHash.GetPostings(freePostings);
+ freePostingsCount = freePostings.Length;
+ System.Diagnostics.Debug.Assert(noNullPostings(freePostings, freePostingsCount, "consumer=" + consumer));
+ }
+
+ private static bool noNullPostings(RawPostingList[] postings, int count, System.String details)
+ {
+ for (int i = 0; i < count; i++)
+ System.Diagnostics.Debug.Assert(postings[i] != null, "postings[" + i + "] of " + count + " is null: " + details);
+ return true;
+ }
+
+ public override void StartDocument()
+ {
+ consumer.StartDocument();
+ if (nextPerThread != null)
+ nextPerThread.consumer.StartDocument();
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ DocumentsWriter.DocWriter doc = consumer.FinishDocument();
+
+ DocumentsWriter.DocWriter doc2;
+ if (nextPerThread != null)
+ doc2 = nextPerThread.consumer.FinishDocument();
+ else
+ doc2 = null;
+ if (doc == null)
+ return doc2;
+ else
+ {
+ doc.SetNext(doc2);
+ return doc;
+ }
+ }
+
+ // Clear all state
+ internal void Reset(bool recyclePostings)
+ {
+ intPool.Reset();
+ bytePool.Reset();
+
+ if (primary)
+ charPool.Reset();
+
+ if (recyclePostings)
+ {
+ termsHash.RecyclePostings(freePostings, freePostingsCount);
+ freePostingsCount = 0;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/LZOCompressor.cs b/src/core/LZOCompressor.cs
new file mode 100644
index 0000000..da0d715
--- /dev/null
+++ b/src/core/LZOCompressor.cs
@@ -0,0 +1,135 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+// LZO.Net
+// $Id: LZOCompressor.cs,v 1.1 2004/02/22 17:44:04 laptop Exp $
+
+namespace Simplicit.Net.Lzo {
+ using System;
+ using System.Diagnostics;
+ using System.Runtime.InteropServices;
+
+ /// <summary>
+ /// Wrapper class for the highly performant LZO compression library
+ /// </summary>
+ public class LZOCompressor {
+ private static TraceSwitch _traceSwitch = new TraceSwitch("Simplicit.Net.Lzo", "Switch for tracing of the LZOCompressor-Class");
+
+ #region Dll-Imports
+ [DllImport("lzo.dll")]
+ private static extern int __lzo_init3();
+ [DllImport("lzo.dll")]
+ private static extern string lzo_version_string();
+ [DllImport("lzo.dll")]
+ private static extern string lzo_version_date();
+ [DllImport("lzo.dll")]
+ private static extern int lzo1x_1_compress(
+ byte[] src,
+ int src_len,
+ byte[] dst,
+ ref int dst_len,
+ byte[] wrkmem
+ );
+ [DllImport("lzo.dll")]
+ private static extern int lzo1x_decompress(
+ byte[] src,
+ int src_len,
+ byte[] dst,
+ ref int dst_len,
+ byte[] wrkmem);
+ #endregion
+
+ private byte[] _workMemory = new byte[16384L * 4];
+
+ static LZOCompressor() {
+ int init = __lzo_init3();
+ if(init != 0) {
+ throw new Exception("Initialization of LZO-Compressor failed !");
+ }
+ }
+
+ /// <summary>
+ /// Constructor.
+ /// </summary>
+ public LZOCompressor() {
+ }
+
+ /// <summary>
+ /// Version string of the compression library.
+ /// </summary>
+ public string Version {
+ get {
+ return lzo_version_string();
+ }
+ }
+
+ /// <summary>
+ /// Version date of the compression library
+ /// </summary>
+ public string VersionDate {
+ get {
+ return lzo_version_date();
+ }
+ }
+
+ /// <summary>
+ /// Compresses a byte array and returns the compressed data in a new
+ /// array. You need the original length of the array to decompress it.
+ /// </summary>
+ /// <param name="src">Source array for compression</param>
+ /// <returns>Byte array containing the compressed data</returns>
+ public byte[] Compress(byte[] src) {
+ if(_traceSwitch.TraceVerbose) {
+ Trace.WriteLine(String.Format("LZOCompressor: trying to compress {0}", src.Length));
+ }
+ byte[] dst = new byte[src.Length + src.Length / 64 + 16 + 3 + 4];
+ int outlen = 0;
+ lzo1x_1_compress(src, src.Length, dst, ref outlen, _workMemory);
+ if(_traceSwitch.TraceVerbose) {
+ Trace.WriteLine(String.Format("LZOCompressor: compressed {0} to {1} bytes", src.Length, outlen));
+ }
+ byte[] ret = new byte[outlen + 4];
+ Array.Copy(dst, 0, ret, 0, outlen);
+ byte[] outlenarr = BitConverter.GetBytes(src.Length);
+ Array.Copy(outlenarr, 0, ret, outlen, 4);
+ return ret;
+ }
+
+ /// <summary>
+ /// Decompresses compressed data to its original state.
+ /// </summary>
+ /// <param name="src">Source array to be decompressed</param>
+ /// <returns>Decompressed data</returns>
+ public byte[] Decompress(byte[] src) {
+ if(_traceSwitch.TraceVerbose) {
+ Trace.WriteLine(String.Format("LZOCompressor: trying to decompress {0}", src.Length));
+ }
+ int origlen = BitConverter.ToInt32(src, src.Length - 4);
+ byte[] dst = new byte[origlen];
+ int outlen = origlen;
+ lzo1x_decompress(src, src.Length - 4, dst, ref outlen, _workMemory);
+ if(_traceSwitch.TraceVerbose) {
+ Trace.WriteLine(String.Format("LZOCompressor: decompressed {0} to {1} bytes", src.Length, origlen));
+ }
+ return dst;
+ }
+ }
+}
diff --git a/src/core/LucenePackage.cs b/src/core/LucenePackage.cs
new file mode 100644
index 0000000..9904b41
--- /dev/null
+++ b/src/core/LucenePackage.cs
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net
+{
+
+ /// <summary>Lucene's package information, including version. *</summary>
+ public sealed class LucenePackage
+ {
+
+ private LucenePackage()
+ {
+ } // can't construct
+
+ /*
+ * /// <summary>Return Lucene's package, including version information. </summary>
+ // {{Aroush-1.9}}
+ public static Package Get()
+ {
+ return typeof(LucenePackage).getPackage();
+ }
+ */
+ }
+} \ No newline at end of file
diff --git a/src/core/Messages/INLSException.cs b/src/core/Messages/INLSException.cs
new file mode 100644
index 0000000..99c3248
--- /dev/null
+++ b/src/core/Messages/INLSException.cs
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Messages
+{
+
+ /// <summary> Interface that exceptions should implement to support lazy loading of messages.
+ ///
+ /// For Native Language Support (NLS), system of software internationalization.
+ ///
+ /// This Interface should be implemented by all exceptions that require
+ /// translation
+ ///
+ /// </summary>
+ public interface INLSException
+ {
+ /// <value> a instance of a class that implements the Message interface </value>
+ Message MessageObject { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Messages/Message.cs b/src/core/Messages/Message.cs
new file mode 100644
index 0000000..d9c2b03
--- /dev/null
+++ b/src/core/Messages/Message.cs
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Messages
+{
+
+ /// <summary> Message Interface for a lazy loading.
+ /// For Native Language Support (NLS), system of software internationalization.
+ /// </summary>
+ public interface Message
+ {
+ string Key { get; }
+
+ object[] GetArguments();
+
+ System.String GetLocalizedMessage();
+
+ System.String GetLocalizedMessage(System.Globalization.CultureInfo locale);
+ }
+} \ No newline at end of file
diff --git a/src/core/Messages/MessageImpl.cs b/src/core/Messages/MessageImpl.cs
new file mode 100644
index 0000000..8e53215
--- /dev/null
+++ b/src/core/Messages/MessageImpl.cs
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Text;
+
+namespace Lucene.Net.Messages
+{
+
+ /// <summary> Default implementation of Message interface.
+ /// For Native Language Support (NLS), system of software internationalization.
+ /// </summary>
+ [Serializable]
+ public class MessageImpl : Message
+ {
+
+ private const long serialVersionUID = - 3077643314630884523L;
+
+ private System.String key;
+
+ private System.Object[] arguments = new System.Object[0];
+
+ public MessageImpl(System.String key)
+ {
+ this.key = key;
+ }
+
+ public MessageImpl(System.String key, params System.Object[] args):this(key)
+ {
+ this.arguments = args;
+ }
+
+ public virtual object[] GetArguments()
+ {
+ return this.arguments;
+ }
+
+ public virtual string Key
+ {
+ get { return this.key; }
+ }
+
+ public virtual string GetLocalizedMessage()
+ {
+ return GetLocalizedMessage(System.Threading.Thread.CurrentThread.CurrentCulture);
+ }
+
+ public virtual string GetLocalizedMessage(System.Globalization.CultureInfo locale)
+ {
+ return NLS.GetLocalizedMessage(Key, locale, GetArguments());
+ }
+
+ public override string ToString()
+ {
+ System.Object[] args = GetArguments();
+ StringBuilder argsString = new StringBuilder();
+ if (args != null)
+ {
+ for (int i = 0; i < args.Length; i++)
+ {
+ argsString.Append(i == 0 ? " " : ", ").Append(args[i]);
+ }
+ }
+ return argsString.ToString();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Messages/NLS.cs b/src/core/Messages/NLS.cs
new file mode 100644
index 0000000..9677de2
--- /dev/null
+++ b/src/core/Messages/NLS.cs
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Messages
+{
+
+ /// <summary> MessageBundles classes extend this class, to implement a bundle.
+ ///
+ /// For Native Language Support (NLS), system of software internationalization.
+ ///
+ /// This interface is similar to the NLS class in eclipse.osgi.util.NLS class -
+ /// initializeMessages() method resets the values of all static strings, should
+ /// only be called by classes that extend from NLS (see TestMessages.java for
+ /// reference) - performs validation of all message in a bundle, at class load
+ /// time - performs per message validation at runtime - see NLSTest.java for
+ /// usage reference
+ ///
+ /// MessageBundle classes may subclass this type.
+ /// </summary>
+ public class NLS
+ {
+ public interface IPriviligedAction
+ {
+ /// <summary>
+ /// Performs the priviliged action.
+ /// </summary>
+ /// <returns>A value that may represent the result of the action.</returns>
+ System.Object Run();
+ }
+
+ private class AnonymousClassPrivilegedAction : IPriviligedAction
+ {
+ public AnonymousClassPrivilegedAction(System.Reflection.FieldInfo field)
+ {
+ InitBlock(field);
+ }
+ private void InitBlock(System.Reflection.FieldInfo field)
+ {
+ this.field = field;
+ }
+ private System.Reflection.FieldInfo field;
+ public virtual System.Object Run()
+ {
+ // field.setAccessible(true); // {{Aroush-2.9}} java.lang.reflect.AccessibleObject.setAccessible
+ return null;
+ }
+ }
+
+ private static IDictionary<string, Type> bundles = new HashMap<string, Type>(0);
+
+ protected internal NLS()
+ {
+ // Do not instantiate
+ }
+
+ public static System.String GetLocalizedMessage(System.String key)
+ {
+ return GetLocalizedMessage(key, System.Threading.Thread.CurrentThread.CurrentCulture);
+ }
+
+ public static System.String GetLocalizedMessage(System.String key, System.Globalization.CultureInfo locale)
+ {
+ System.Object message = GetResourceBundleObject(key, locale);
+ if (message == null)
+ {
+ return "Message with key:" + key + " and locale: " + locale + " not found.";
+ }
+ return message.ToString();
+ }
+
+ public static System.String GetLocalizedMessage(System.String key, System.Globalization.CultureInfo locale, params System.Object[] args)
+ {
+ System.String str = GetLocalizedMessage(key, locale);
+
+ if (args.Length > 0)
+ {
+ str = System.String.Format(str, args);
+ }
+
+ return str;
+ }
+
+ public static System.String GetLocalizedMessage(System.String key, params System.Object[] args)
+ {
+ return GetLocalizedMessage(key, System.Threading.Thread.CurrentThread.CurrentCulture, args);
+ }
+
+ /// <summary> Initialize a given class with the message bundle Keys Should be called from
+ /// a class that extends NLS in a static block at class load time.
+ ///
+ /// </summary>
+ /// <param name="bundleName">Property file with that contains the message bundle
+ /// </param>
+ /// <param name="clazz">where constants will reside
+ /// </param>
+ //@SuppressWarnings("unchecked")
+ protected internal static void InitializeMessages<T>(System.String bundleName)
+ {
+ try
+ {
+ Load<T>();
+ if (!bundles.ContainsKey(bundleName))
+ bundles[bundleName] = typeof(T);
+ }
+ catch (System.Exception)
+ {
+ // ignore all errors and exceptions
+ // because this function is supposed to be called at class load time.
+ }
+ }
+
+ private static System.Object GetResourceBundleObject(System.String messageKey, System.Globalization.CultureInfo locale)
+ {
+
+ // slow resource checking
+ // need to loop thru all registered resource bundles
+ for (var it = bundles.Keys.GetEnumerator(); it.MoveNext(); )
+ {
+ System.Type clazz = bundles[it.Current];
+ System.Threading.Thread.CurrentThread.CurrentUICulture = locale;
+ System.Resources.ResourceManager resourceBundle = System.Resources.ResourceManager.CreateFileBasedResourceManager(clazz.Name, "Messages", null); //{{Lucene.Net-2.9.1}} Can we make resourceDir "Messages" more general?
+ if (resourceBundle != null)
+ {
+ try
+ {
+ System.Object obj = resourceBundle.GetObject(messageKey);
+ if (obj != null)
+ return obj;
+ }
+ catch (System.Resources.MissingManifestResourceException)
+ {
+ // just continue it might be on the next resource bundle
+ }
+ }
+ }
+ // if resource is not found
+ return null;
+ }
+
+ private static void Load<T>()
+ {
+ var clazz = typeof (T);
+ System.Reflection.FieldInfo[] fieldArray = clazz.GetFields(System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Static);
+
+ bool isFieldAccessible = clazz.IsPublic;
+
+ // build a map of field names to Field objects
+ int len = fieldArray.Length;
+ var fields = new HashMap<string, System.Reflection.FieldInfo>(len * 2);
+ for (int i = 0; i < len; i++)
+ {
+ fields[fieldArray[i].Name] = fieldArray[i];
+ LoadfieldValue<T>(fieldArray[i], isFieldAccessible);
+ }
+ }
+
+ /// <param name="field"></param>
+ /// <param name="isFieldAccessible"></param>
+ private static void LoadfieldValue<T>(System.Reflection.FieldInfo field, bool isFieldAccessible)
+ {
+ var clazz = typeof (T);
+ /*
+ int MOD_EXPECTED = Modifier.PUBLIC | Modifier.STATIC;
+ int MOD_MASK = MOD_EXPECTED | Modifier.FINAL;
+ if ((field.getModifiers() & MOD_MASK) != MOD_EXPECTED)
+ return ;
+ */
+ if (!(field.IsPublic || field.IsStatic))
+ return ;
+
+ // Set a value for this empty field.
+ if (!isFieldAccessible)
+ MakeAccessible(field);
+ try
+ {
+ field.SetValue(null, field.Name);
+ ValidateMessage<T>(field.Name);
+ }
+ catch (System.ArgumentException)
+ {
+ // should not happen
+ }
+ catch (System.UnauthorizedAccessException)
+ {
+ // should not happen
+ }
+ }
+
+ /// <param name="key">- Message Key
+ /// </param>
+ private static void ValidateMessage<T>(System.String key)
+ {
+ // Test if the message is present in the resource bundle
+ var clazz = typeof (T);
+ try
+ {
+ System.Threading.Thread.CurrentThread.CurrentUICulture = System.Threading.Thread.CurrentThread.CurrentCulture;
+ System.Resources.ResourceManager resourceBundle = System.Resources.ResourceManager.CreateFileBasedResourceManager(clazz.FullName, "", null);
+ if (resourceBundle != null)
+ {
+ System.Object obj = resourceBundle.GetObject(key);
+ if (obj == null)
+ {
+ System.Console.Error.WriteLine("WARN: Message with key:" + key + " and locale: " + System.Threading.Thread.CurrentThread.CurrentCulture + " not found.");
+ }
+ }
+ }
+ catch (System.Resources.MissingManifestResourceException)
+ {
+ System.Console.Error.WriteLine("WARN: Message with key:" + key + " and locale: " + System.Threading.Thread.CurrentThread.CurrentCulture + " not found.");
+ }
+ catch (System.Exception)
+ {
+ // ignore all other errors and exceptions
+ // since this code is just a test to see if the message is present on the
+ // system
+ }
+ }
+
+ /*
+ * Make a class field accessible
+ */
+ //@SuppressWarnings("unchecked")
+ private static void MakeAccessible(System.Reflection.FieldInfo field)
+ {
+ if (System.Security.SecurityManager.SecurityEnabled)
+ {
+ //field.setAccessible(true); // {{Aroush-2.9}} java.lang.reflect.AccessibleObject.setAccessible
+ }
+ else
+ {
+ //AccessController.doPrivileged(new AnonymousClassPrivilegedAction(field)); // {{Aroush-2.9}} java.security.AccessController.doPrivileged
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/QueryParser/CharStream.cs b/src/core/QueryParser/CharStream.cs
new file mode 100644
index 0000000..cfcdb4f
--- /dev/null
+++ b/src/core/QueryParser/CharStream.cs
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
+/* JavaCCOptions:STATIC=false */
+
+using System;
+
+namespace Lucene.Net.QueryParsers
+{
+
+ /// <summary> This interface describes a character stream that maintains line and
+ /// column number positions of the characters. It also has the capability
+ /// to backup the stream to some extent. An implementation of this
+ /// interface is used in the TokenManager implementation generated by
+ /// JavaCCParser.
+ ///
+ /// All the methods except backup can be implemented in any fashion. backup
+ /// needs to be implemented correctly for the correct operation of the lexer.
+ /// Rest of the methods are all used to get information like line number,
+ /// column number and the String that constitutes a token and are not used
+ /// by the lexer. Hence their implementation won't affect the generated lexer's
+ /// operation.
+ /// </summary>
+
+ public interface ICharStream
+ {
+ /// <summary> Returns the next character from the selected input. The method
+ /// of selecting the input is the responsibility of the class
+ /// implementing this interface. Can throw any java.io.IOException.
+ /// </summary>
+ char ReadChar();
+
+ /// <summary> Returns the column position of the character last read.</summary>
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="EndColumn">
+ /// </seealso>
+ [Obsolete]
+ int Column { get; }
+
+ /// <summary> Returns the line number of the character last read.</summary>
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="EndLine">
+ /// </seealso>
+ [Obsolete]
+ int Line { get; }
+
+ /// <summary> Returns the column number of the last character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int EndColumn { get; }
+
+ /// <summary> Returns the line number of the last character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int EndLine { get; }
+
+ /// <summary> Returns the column number of the first character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int BeginColumn { get; }
+
+ /// <summary> Returns the line number of the first character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int BeginLine { get; }
+
+ /// <summary> Backs up the input stream by amount steps. Lexer calls this method if it
+ /// had already read some characters, but could not use them to match a
+ /// (longer) token. So, they will be used again as the prefix of the next
+ /// token and it is the implemetation's responsibility to do this right.
+ /// </summary>
+ void Backup(int amount);
+
+ /// <summary> Returns the next character that marks the beginning of the next token.
+ /// All characters must remain in the buffer between two successive calls
+ /// to this method to implement backup correctly.
+ /// </summary>
+ char BeginToken();
+
+ /// <summary> Returns a string made up of characters from the marked token beginning
+ /// to the current buffer position. Implementations have the choice of returning
+ /// anything that they want to. For example, for efficiency, one might decide
+ /// to just return null, which is a valid implementation.
+ /// </summary>
+ string Image { get; }
+
+ /// <summary> Returns an array of characters that make up the suffix of length 'len' for
+ /// the currently matched token. This is used to build up the matched string
+ /// for use in actions in the case of MORE. A simple and inefficient
+ /// implementation of this is as follows :
+ ///
+ /// {
+ /// String t = GetImage();
+ /// return t.substring(t.length() - len, t.length()).toCharArray();
+ /// }
+ /// </summary>
+ char[] GetSuffix(int len);
+
+ /// <summary> The lexer calls this function to indicate that it is done with the stream
+ /// and hence implementations can free any resources held by this class.
+ /// Again, the body of this function can be just empty and it will not
+ /// affect the lexer's operation.
+ /// </summary>
+ void Done();
+ }
+ /* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
+} \ No newline at end of file
diff --git a/src/core/QueryParser/FastCharStream.cs b/src/core/QueryParser/FastCharStream.cs
new file mode 100644
index 0000000..62876f3
--- /dev/null
+++ b/src/core/QueryParser/FastCharStream.cs
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// FastCharStream.java
+
+using System;
+
+namespace Lucene.Net.QueryParsers
+{
+
+ /// <summary>An efficient implementation of JavaCC's CharStream interface. <p/>Note that
+ /// this does not do line-number counting, but instead keeps track of the
+ /// character position of the token in the input, as required by Lucene's <see cref="Lucene.Net.Analysis.Token" />
+ /// API.
+ ///
+ /// </summary>
+ public sealed class FastCharStream : ICharStream
+ {
+ internal char[] buffer = null;
+
+ internal int bufferLength = 0; // end of valid chars
+ internal int bufferPosition = 0; // next char to read
+
+ internal int tokenStart = 0; // offset in buffer
+ internal int bufferStart = 0; // position in file of buffer
+
+ internal System.IO.TextReader input; // source of chars
+
+ /// <summary>Constructs from a Reader. </summary>
+ public FastCharStream(System.IO.TextReader r)
+ {
+ input = r;
+ }
+
+ public char ReadChar()
+ {
+ if (bufferPosition >= bufferLength)
+ Refill();
+ return buffer[bufferPosition++];
+ }
+
+ private void Refill()
+ {
+ int newPosition = bufferLength - tokenStart;
+
+ if (tokenStart == 0)
+ {
+ // token won't fit in buffer
+ if (buffer == null)
+ {
+ // first time: alloc buffer
+ buffer = new char[2048];
+ }
+ else if (bufferLength == buffer.Length)
+ {
+ // grow buffer
+ char[] newBuffer = new char[buffer.Length * 2];
+ Array.Copy(buffer, 0, newBuffer, 0, bufferLength);
+ buffer = newBuffer;
+ }
+ }
+ else
+ {
+ // shift token to front
+ Array.Copy(buffer, tokenStart, buffer, 0, newPosition);
+ }
+
+ bufferLength = newPosition; // update state
+ bufferPosition = newPosition;
+ bufferStart += tokenStart;
+ tokenStart = 0;
+
+ int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
+ if (charsRead <= 0)
+ throw new System.IO.IOException("read past eof");
+ else
+ bufferLength += charsRead;
+ }
+
+ public char BeginToken()
+ {
+ tokenStart = bufferPosition;
+ return ReadChar();
+ }
+
+ public void Backup(int amount)
+ {
+ bufferPosition -= amount;
+ }
+
+ public string Image
+ {
+ get { return new System.String(buffer, tokenStart, bufferPosition - tokenStart); }
+ }
+
+ public char[] GetSuffix(int len)
+ {
+ char[] value_Renamed = new char[len];
+ Array.Copy(buffer, bufferPosition - len, value_Renamed, 0, len);
+ return value_Renamed;
+ }
+
+ public void Done()
+ {
+ try
+ {
+ input.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ System.Console.Error.WriteLine("Caught: " + e + "; ignoring.");
+ }
+ }
+
+ public int Column
+ {
+ get { return bufferStart + bufferPosition; }
+ }
+
+ public int Line
+ {
+ get { return 1; }
+ }
+
+ public int EndColumn
+ {
+ get { return bufferStart + bufferPosition; }
+ }
+
+ public int EndLine
+ {
+ get { return 1; }
+ }
+
+ public int BeginColumn
+ {
+ get { return bufferStart + tokenStart; }
+ }
+
+ public int BeginLine
+ {
+ get { return 1; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/QueryParser/MultiFieldQueryParser.cs b/src/core/QueryParser/MultiFieldQueryParser.cs
new file mode 100644
index 0000000..f506f34
--- /dev/null
+++ b/src/core/QueryParser/MultiFieldQueryParser.cs
@@ -0,0 +1,370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Search;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using BooleanClause = Lucene.Net.Search.BooleanClause;
+using BooleanQuery = Lucene.Net.Search.BooleanQuery;
+using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery;
+using PhraseQuery = Lucene.Net.Search.PhraseQuery;
+using Query = Lucene.Net.Search.Query;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.QueryParsers
+{
+
+ /// <summary> A QueryParser which constructs queries to search multiple fields.
+ ///
+ /// </summary>
+ /// <version> $Revision: 829231 $
+ /// </version>
+ public class MultiFieldQueryParser : QueryParser
+ {
+ protected internal string[] fields;
+ protected internal IDictionary<string, float> boosts;
+
+ /// <summary> Creates a MultiFieldQueryParser. Allows passing of a map with term to
+ /// Boost, and the boost to apply to each term.
+ ///
+ /// <p/>
+ /// It will, when parse(String query) is called, construct a query like this
+ /// (assuming the query consists of two terms and you specify the two fields
+ /// <c>title</c> and <c>body</c>):
+ /// <p/>
+ ///
+ /// <code>
+ /// (title:term1 body:term1) (title:term2 body:term2)
+ /// </code>
+ ///
+ /// <p/>
+ /// When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+ /// <p/>
+ ///
+ /// <code>
+ /// +(title:term1 body:term1) +(title:term2 body:term2)
+ /// </code>
+ ///
+ /// <p/>
+ /// When you pass a boost (title=>5 body=>10) you can get
+ /// <p/>
+ ///
+ /// <code>
+ /// +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+ /// </code>
+ ///
+ /// <p/>
+ /// In other words, all the query's terms must appear, but it doesn't matter
+ /// in what fields they appear.
+ /// <p/>
+ /// </summary>
+ public MultiFieldQueryParser(Version matchVersion, string[] fields, Analyzer analyzer, IDictionary<string, float> boosts)
+ : this(matchVersion, fields, analyzer)
+ {
+ this.boosts = boosts;
+ }
+
+ /// <summary> Creates a MultiFieldQueryParser.
+ ///
+ /// <p/>
+ /// It will, when parse(String query) is called, construct a query like this
+ /// (assuming the query consists of two terms and you specify the two fields
+ /// <c>title</c> and <c>body</c>):
+ /// <p/>
+ ///
+ /// <code>
+ /// (title:term1 body:term1) (title:term2 body:term2)
+ /// </code>
+ ///
+ /// <p/>
+ /// When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+ /// <p/>
+ ///
+ /// <code>
+ /// +(title:term1 body:term1) +(title:term2 body:term2)
+ /// </code>
+ ///
+ /// <p/>
+ /// In other words, all the query's terms must appear, but it doesn't matter
+ /// in what fields they appear.
+ /// <p/>
+ /// </summary>
+ public MultiFieldQueryParser(Version matchVersion, System.String[] fields, Analyzer analyzer)
+ : base(matchVersion, null, analyzer)
+ {
+ this.fields = fields;
+ }
+
+ protected internal override Query GetFieldQuery(string field, string queryText, int slop)
+ {
+ if (field == null)
+ {
+ IList<BooleanClause> clauses = new List<BooleanClause>();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ Query q = base.GetFieldQuery(fields[i], queryText);
+ if (q != null)
+ {
+ //If the user passes a map of boosts
+ if (boosts != null)
+ {
+ //Get the boost from the map and apply them
+ Single boost = boosts[fields[i]];
+ q.Boost = boost;
+ }
+ ApplySlop(q, slop);
+ clauses.Add(new BooleanClause(q, Occur.SHOULD));
+ }
+ }
+ if (clauses.Count == 0)
+ // happens for stopwords
+ return null;
+ return GetBooleanQuery(clauses, true);
+ }
+ Query q2 = base.GetFieldQuery(field, queryText);
+ ApplySlop(q2, slop);
+ return q2;
+ }
+
+ private void ApplySlop(Query q, int slop)
+ {
+ if (q is PhraseQuery)
+ {
+ ((PhraseQuery)q).Slop = slop;
+ }
+ else if (q is MultiPhraseQuery)
+ {
+ ((MultiPhraseQuery)q).Slop = slop;
+ }
+ }
+
+
+ protected internal override Query GetFieldQuery(System.String field, System.String queryText)
+ {
+ return GetFieldQuery(field, queryText, 0);
+ }
+
+
+ protected internal override Query GetFuzzyQuery(System.String field, System.String termStr, float minSimilarity)
+ {
+ if (field == null)
+ {
+ IList<BooleanClause> clauses = new List<BooleanClause>();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ clauses.Add(new BooleanClause(GetFuzzyQuery(fields[i], termStr, minSimilarity), Occur.SHOULD));
+ }
+ return GetBooleanQuery(clauses, true);
+ }
+ return base.GetFuzzyQuery(field, termStr, minSimilarity);
+ }
+
+ protected internal override Query GetPrefixQuery(System.String field, System.String termStr)
+ {
+ if (field == null)
+ {
+ IList<BooleanClause> clauses = new List<BooleanClause>();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ clauses.Add(new BooleanClause(GetPrefixQuery(fields[i], termStr), Occur.SHOULD));
+ }
+ return GetBooleanQuery(clauses, true);
+ }
+ return base.GetPrefixQuery(field, termStr);
+ }
+
+ protected internal override Query GetWildcardQuery(System.String field, System.String termStr)
+ {
+ if (field == null)
+ {
+ IList<BooleanClause> clauses = new List<BooleanClause>();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ clauses.Add(new BooleanClause(GetWildcardQuery(fields[i], termStr), Occur.SHOULD));
+ }
+ return GetBooleanQuery(clauses, true);
+ }
+ return base.GetWildcardQuery(field, termStr);
+ }
+
+
+ protected internal override Query GetRangeQuery(System.String field, System.String part1, System.String part2, bool inclusive)
+ {
+ if (field == null)
+ {
+ IList<BooleanClause> clauses = new List<BooleanClause>();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ clauses.Add(new BooleanClause(GetRangeQuery(fields[i], part1, part2, inclusive), Occur.SHOULD));
+ }
+ return GetBooleanQuery(clauses, true);
+ }
+ return base.GetRangeQuery(field, part1, part2, inclusive);
+ }
+
+ /// <summary> Parses a query which searches on the fields specified.
+ /// <p/>
+ /// If x fields are specified, this effectively constructs:
+ ///
+ /// <code>
+ /// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+ /// </code>
+ ///
+ /// </summary>
+ /// <param name="matchVersion">Lucene version to match; this is passed through to
+ /// QueryParser.
+ /// </param>
+ /// <param name="queries">Queries strings to parse
+ /// </param>
+ /// <param name="fields">Fields to search on
+ /// </param>
+ /// <param name="analyzer">Analyzer to use
+ /// </param>
+ /// <throws> ParseException </throws>
+ /// <summary> if query parsing fails
+ /// </summary>
+ /// <throws> IllegalArgumentException </throws>
+ /// <summary> if the length of the queries array differs from the length of
+ /// the fields array
+ /// </summary>
+ public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, Analyzer analyzer)
+ {
+ if (queries.Length != fields.Length)
+ throw new System.ArgumentException("queries.length != fields.length");
+ BooleanQuery bQuery = new BooleanQuery();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
+ Query q = qp.Parse(queries[i]);
+ if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0))
+ {
+ bQuery.Add(q, Occur.SHOULD);
+ }
+ }
+ return bQuery;
+ }
+
+ /// <summary> Parses a query, searching on the fields specified. Use this if you need
+ /// to specify certain fields as required, and others as prohibited.
+ /// <p/>
+ /// Uasge:
+ /// <code>
+ /// String[] fields = {&quot;filename&quot;, &quot;contents&quot;, &quot;description&quot;};
+ /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ /// BooleanClause.Occur.MUST,
+ /// BooleanClause.Occur.MUST_NOT};
+ /// MultiFieldQueryParser.parse(&quot;query&quot;, fields, flags, analyzer);
+ /// </code>
+ /// <p/>
+ /// The code above would construct a query:
+ ///
+ /// <code>
+ /// (filename:query) +(contents:query) -(description:query)
+ /// </code>
+ ///
+ /// </summary>
+ /// <param name="matchVersion">Lucene version to match; this is passed through to
+ /// QueryParser.
+ /// </param>
+ /// <param name="query">Query string to parse
+ /// </param>
+ /// <param name="fields">Fields to search on
+ /// </param>
+ /// <param name="flags">Flags describing the fields
+ /// </param>
+ /// <param name="analyzer">Analyzer to use
+ /// </param>
+ /// <throws> ParseException </throws>
+ /// <summary> if query parsing fails
+ /// </summary>
+ /// <throws> IllegalArgumentException </throws>
+ /// <summary> if the length of the fields array differs from the length of
+ /// the flags array
+ /// </summary>
+ public static Query Parse(Version matchVersion, System.String query, System.String[] fields, Occur[] flags, Analyzer analyzer)
+ {
+ if (fields.Length != flags.Length)
+ throw new System.ArgumentException("fields.length != flags.length");
+ BooleanQuery bQuery = new BooleanQuery();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
+ Query q = qp.Parse(query);
+ if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0))
+ {
+ bQuery.Add(q, flags[i]);
+ }
+ }
+ return bQuery;
+ }
+
+ /// <summary> Parses a query, searching on the fields specified. Use this if you need
+ /// to specify certain fields as required, and others as prohibited.
+ /// <p/>
+ /// Usage:
+ /// <code>
+ /// String[] query = {&quot;query1&quot;, &quot;query2&quot;, &quot;query3&quot;};
+ /// String[] fields = {&quot;filename&quot;, &quot;contents&quot;, &quot;description&quot;};
+ /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ /// BooleanClause.Occur.MUST,
+ /// BooleanClause.Occur.MUST_NOT};
+ /// MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+ /// </code>
+ /// <p/>
+ /// The code above would construct a query:
+ ///
+ /// <code>
+ /// (filename:query1) +(contents:query2) -(description:query3)
+ /// </code>
+ ///
+ /// </summary>
+ /// <param name="matchVersion">Lucene version to match; this is passed through to
+ /// QueryParser.
+ /// </param>
+ /// <param name="queries">Queries string to parse
+ /// </param>
+ /// <param name="fields">Fields to search on
+ /// </param>
+ /// <param name="flags">Flags describing the fields
+ /// </param>
+ /// <param name="analyzer">Analyzer to use
+ /// </param>
+ /// <throws> ParseException </throws>
+ /// <summary> if query parsing fails
+ /// </summary>
+ /// <throws> IllegalArgumentException </throws>
+ /// <summary> if the length of the queries, fields, and flags array differ
+ /// </summary>
+ public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, Occur[] flags, Analyzer analyzer)
+ {
+ if (!(queries.Length == fields.Length && queries.Length == flags.Length))
+ throw new System.ArgumentException("queries, fields, and flags array have have different length");
+ BooleanQuery bQuery = new BooleanQuery();
+ for (int i = 0; i < fields.Length; i++)
+ {
+ QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
+ Query q = qp.Parse(queries[i]);
+ if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0))
+ {
+ bQuery.Add(q, flags[i]);
+ }
+ }
+ return bQuery;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/QueryParser/ParseException.cs b/src/core/QueryParser/ParseException.cs
new file mode 100644
index 0000000..ab0fbca
--- /dev/null
+++ b/src/core/QueryParser/ParseException.cs
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
+/* JavaCCOptions:KEEP_LINE_COL=null */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.QueryParsers
+{
+
+ /// <summary> This exception is thrown when parse errors are encountered.
+ /// You can explicitly create objects of this exception type by
+ /// calling the method generateParseException in the generated
+ /// parser.
+ ///
+ /// You can modify this class to customize your error reporting
+ /// mechanisms so long as you retain the public fields.
+ /// </summary>
+ [Serializable]
+ public class ParseException:System.Exception
+ {
+ /// <summary> This method has the standard behavior when this object has been
+ /// created using the standard constructors. Otherwise, it uses
+ /// "currentToken" and "expectedTokenSequences" to generate a parse
+ /// error message and returns it. If this object has been created
+ /// due to a parse error, and you do not catch it (it gets thrown
+ /// from the parser), then this method is called during the printing
+ /// of the final stack trace, and hence the correct error message
+ /// gets displayed.
+ /// </summary>
+ public override System.String Message
+ {
+ get
+ {
+ if (!specialConstructor)
+ {
+ return base.Message;
+ }
+ System.Text.StringBuilder expected = new System.Text.StringBuilder();
+ int maxSize = 0;
+ for (int i = 0; i < expectedTokenSequences.Length; i++)
+ {
+ if (maxSize < expectedTokenSequences[i].Length)
+ {
+ maxSize = expectedTokenSequences[i].Length;
+ }
+ for (int j = 0; j < expectedTokenSequences[i].Length; j++)
+ {
+ expected.Append(tokenImage[expectedTokenSequences[i][j]]).Append(' ');
+ }
+ if (expectedTokenSequences[i][expectedTokenSequences[i].Length - 1] != 0)
+ {
+ expected.Append("...");
+ }
+ expected.Append(eol).Append(" ");
+ }
+ System.String retval = "Encountered \"";
+ Token tok = currentToken.next;
+ for (int i = 0; i < maxSize; i++)
+ {
+ if (i != 0)
+ retval += " ";
+ if (tok.kind == 0)
+ {
+ retval += tokenImage[0];
+ break;
+ }
+ retval += (" " + tokenImage[tok.kind]);
+ retval += " \"";
+ retval += Add_escapes(tok.image);
+ retval += " \"";
+ tok = tok.next;
+ }
+ retval += ("\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn);
+ retval += ("." + eol);
+ if (expectedTokenSequences.Length == 1)
+ {
+ retval += ("Was expecting:" + eol + " ");
+ }
+ else
+ {
+ retval += ("Was expecting one of:" + eol + " ");
+ }
+ retval += expected.ToString();
+ return retval;
+ }
+
+ }
+
+ /// <summary> This constructor is used by the method "generateParseException"
+ /// in the generated parser. Calling this constructor generates
+ /// a new object of this type with the fields "currentToken",
+ /// "expectedTokenSequences", and "tokenImage" set. The boolean
+ /// flag "specialConstructor" is also set to true to indicate that
+ /// this constructor was used to create this object.
+ /// This constructor calls its super class with the empty string
+ /// to force the "toString" method of parent class "Throwable" to
+ /// print the error message in the form:
+ /// ParseException: &lt;result of getMessage&gt;
+ /// </summary>
+ public ParseException(Token currentTokenVal, int[][] expectedTokenSequencesVal, System.String[] tokenImageVal):base("")
+ {
+ specialConstructor = true;
+ currentToken = currentTokenVal;
+ expectedTokenSequences = expectedTokenSequencesVal;
+ tokenImage = tokenImageVal;
+ }
+
+ /// <summary> The following constructors are for use by you for whatever
+ /// purpose you can think of. Constructing the exception in this
+ /// manner makes the exception behave in the normal way - i.e., as
+ /// documented in the class "Throwable". The fields "errorToken",
+ /// "expectedTokenSequences", and "tokenImage" do not contain
+ /// relevant information. The JavaCC generated code does not use
+ /// these constructors.
+ /// </summary>
+
+ public ParseException():base()
+ {
+ specialConstructor = false;
+ }
+
+ /// <summary>Constructor with message. </summary>
+ public ParseException(System.String message):base(message)
+ {
+ specialConstructor = false;
+ }
+
+ /// <summary>Constructor with message. </summary>
+ public ParseException(System.String message, System.Exception ex) : base(message, ex)
+ {
+ specialConstructor = false;
+ }
+
+ /// <summary> This variable determines which constructor was used to create
+ /// this object and thereby affects the semantics of the
+ /// "getMessage" method (see below).
+ /// </summary>
+ protected internal bool specialConstructor;
+
+ /// <summary> This is the last token that has been consumed successfully. If
+ /// this object has been created due to a parse error, the token
+ /// followng this token will (therefore) be the first error token.
+ /// </summary>
+ public Token currentToken;
+
+ /// <summary> Each entry in this array is an array of integers. Each array
+ /// of integers represents a sequence of tokens (by their ordinal
+ /// values) that is expected at this point of the parse.
+ /// </summary>
+ public int[][] expectedTokenSequences;
+
+ /// <summary> This is a reference to the "tokenImage" array of the generated
+ /// parser within which the parse error occurred. This array is
+ /// defined in the generated ...Constants interface.
+ /// </summary>
+ public System.String[] tokenImage;
+
+ /// <summary> The end of line string for this machine.</summary>
+ protected internal System.String eol = AppSettings.Get("line.separator", "\n");
+
+ /// <summary> Used to convert raw characters to their escaped version
+ /// when these raw version cannot be used as part of an ASCII
+ /// string literal.
+ /// </summary>
+ protected internal virtual System.String Add_escapes(System.String str)
+ {
+ System.Text.StringBuilder retval = new System.Text.StringBuilder();
+ char ch;
+ for (int i = 0; i < str.Length; i++)
+ {
+ switch (str[i])
+ {
+
+ case (char) (0):
+ continue;
+
+ case '\b':
+ retval.Append("\\b");
+ continue;
+
+ case '\t':
+ retval.Append("\\t");
+ continue;
+
+ case '\n':
+ retval.Append("\\n");
+ continue;
+
+ case '\f':
+ retval.Append("\\f");
+ continue;
+
+ case '\r':
+ retval.Append("\\r");
+ continue;
+
+ case '\"':
+ retval.Append("\\\"");
+ continue;
+
+ case '\'':
+ retval.Append("\\\'");
+ continue;
+
+ case '\\':
+ retval.Append("\\\\");
+ continue;
+
+ default:
+ if ((ch = str[i]) < 0x20 || ch > 0x7e)
+ {
+ System.String s = "0000" + System.Convert.ToString(ch, 16);
+ retval.Append("\\u" + s.Substring(s.Length - 4, (s.Length) - (s.Length - 4)));
+ }
+ else
+ {
+ retval.Append(ch);
+ }
+ continue;
+
+ }
+ }
+ return retval.ToString();
+ }
+ }
+ /* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
+} \ No newline at end of file
diff --git a/src/core/QueryParser/QueryParser.cs b/src/core/QueryParser/QueryParser.cs
new file mode 100644
index 0000000..e58ecea
--- /dev/null
+++ b/src/core/QueryParser/QueryParser.cs
@@ -0,0 +1,2095 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+/* Generated By:JavaCC: Do not edit this line. QueryParser.java */
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using CachingTokenFilter = Lucene.Net.Analysis.CachingTokenFilter;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using DateField = Lucene.Net.Documents.DateField;
+using DateTools = Lucene.Net.Documents.DateTools;
+using Term = Lucene.Net.Index.Term;
+using BooleanClause = Lucene.Net.Search.BooleanClause;
+using BooleanQuery = Lucene.Net.Search.BooleanQuery;
+using FuzzyQuery = Lucene.Net.Search.FuzzyQuery;
+using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery;
+using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery;
+using MultiTermQuery = Lucene.Net.Search.MultiTermQuery;
+using PhraseQuery = Lucene.Net.Search.PhraseQuery;
+using PrefixQuery = Lucene.Net.Search.PrefixQuery;
+using Query = Lucene.Net.Search.Query;
+using Single = Lucene.Net.Support.Single;
+using TermQuery = Lucene.Net.Search.TermQuery;
+using TermRangeQuery = Lucene.Net.Search.TermRangeQuery;
+using WildcardQuery = Lucene.Net.Search.WildcardQuery;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.QueryParsers
+{
+ /// <summary> This class is generated by JavaCC. The most important method is
+ /// <see cref="Parse(String)" />.
+ ///
+ /// The syntax for query strings is as follows:
+ /// A Query is a series of clauses.
+ /// A clause may be prefixed by:
+ /// <list type="bullet">
+ /// <item> a plus (<c>+</c>) or a minus (<c>-</c>) sign, indicating
+ /// that the clause is required or prohibited respectively; or</item>
+ /// <item> a term followed by a colon, indicating the field to be searched.
+ /// This enables one to construct queries which search multiple fields.</item>
+ /// </list>
+ ///
+ /// A clause may be either:
+ /// <list type="bullet">
+ /// <item> a term, indicating all the documents that contain this term; or</item>
+ /// <item> a nested query, enclosed in parentheses. Note that this may be used
+ /// with a <c>+</c>/<c>-</c> prefix to require any of a set of
+ /// terms.</item>
+ /// </list>
+ ///
+ /// Thus, in BNF, the query grammar is:
+ /// <code>
+ /// Query ::= ( Clause )*
+ /// Clause ::= ["+", "-"] [&lt;TERM&gt; ":"] ( &lt;TERM&gt; | "(" Query ")" )
+ /// </code>
+ ///
+ /// <p/>
+ /// Examples of appropriately formatted queries can be found in the <a
+ /// href="../../../../../../queryparsersyntax.html">query syntax
+ /// documentation</a>.
+ /// <p/>
+ ///
+ /// <p/>
+ /// In <see cref="TermRangeQuery" />s, QueryParser tries to detect date values, e.g.
+ /// <tt>date:[6/1/2005 TO 6/4/2005]</tt> produces a range query that searches
+ /// for "date" fields between 2005-06-01 and 2005-06-04. Note that the format
+ /// of the accepted input depends on the <see cref="Locale" />.
+ /// By default a date is converted into a search term using the deprecated
+ /// <see cref="DateField" /> for compatibility reasons.
+ /// To use the new <see cref="DateTools" /> to convert dates, a
+ /// <see cref="Lucene.Net.Documents.DateTools.Resolution" /> has to be set.
+ /// <p/>
+ /// <p/>
+ /// The date resolution that shall be used for RangeQueries can be set
+ /// using <see cref="SetDateResolution(DateTools.Resolution)" />
+ /// or <see cref="SetDateResolution(String, DateTools.Resolution)" />. The former
+ /// sets the default date resolution for all fields, whereas the latter can
+ /// be used to set field specific date resolutions. Field specific date
+ /// resolutions take, if set, precedence over the default date resolution.
+ /// <p/>
+ /// <p/>
+ /// If you use neither <see cref="DateField" /> nor <see cref="DateTools" /> in your
+ /// index, you can create your own
+ /// query parser that inherits QueryParser and overwrites
+ /// <see cref="GetRangeQuery(String, String, String, bool)" /> to
+ /// use a different method for date conversion.
+ /// <p/>
+ ///
+ /// <p/>Note that QueryParser is <em>not</em> thread-safe.<p/>
+ ///
+ /// <p/><b>NOTE</b>: there is a new QueryParser in contrib, which matches
+ /// the same syntax as this class, but is more modular,
+ /// enabling substantial customization to how a query is created.
+ ///
+ /// <p/><b>NOTE</b>: there is a new QueryParser in contrib, which matches
+ /// the same syntax as this class, but is more modular,
+ /// enabling substantial customization to how a query is created.
+ /// <b>NOTE</b>: You must specify the required <see cref="Version" /> compatibility when
+ /// creating QueryParser:
+ /// <list type="bullet">
+ /// <item>As of 2.9, <see cref="EnablePositionIncrements" /> is true by default.</item>
+ /// </list>
+ /// </summary>
+ public class QueryParser : QueryParserConstants
+ {
+
+ private static int CONJ_NONE = 0;
+ private static int CONJ_AND = 1;
+ private static int CONJ_OR = 2;
+
+ private static int MOD_NONE = 0;
+ private static int MOD_NOT = 10;
+ private static int MOD_REQ = 11;
+
+ // make it possible to call setDefaultOperator() without accessing
+ // the nested class:
+ /// <summary>Alternative form of QueryParser.Operator.AND </summary>
+ public static Operator AND_OPERATOR = Operator.AND;
+
+ /// <summary>Alternative form of QueryParser.Operator.OR </summary>
+ public static Operator OR_OPERATOR = Operator.OR;
+
+ /// <summary>The actual operator that parser uses to combine query terms </summary>
+ private Operator operator_Renamed = OR_OPERATOR;
+
+ private bool lowercaseExpandedTerms = true;
+ private RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
+ private bool allowLeadingWildcard = false;
+ private bool enablePositionIncrements = true;
+
+ // LUCENENET-423 - DateRange differences with Java and .NET
+ private bool _useJavaStyleDateRangeParsing = false;
+
+ private Analyzer analyzer;
+ private String field;
+ private int phraseSlop = 0;
+ private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
+ private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
+ private System.Globalization.CultureInfo locale = System.Globalization.CultureInfo.CurrentCulture;
+
+ // the default date resolution
+ private DateTools.Resolution dateResolution = null;
+ // maps field names to date resolutions
+ private IDictionary<String, DateTools.Resolution> fieldToDateResolution = null;
+
+ // The collator to use when determining range inclusion,
+ // for use when constructing RangeQuerys.
+ private System.Globalization.CompareInfo rangeCollator = null;
+
+ /* The default operator_Renamed for parsing queries.
+ * Use {@link QueryParser#setDefaultOperator} to change it.
+ */
+
+ public enum Operator
+ {
+ OR,
+ AND
+ }
+
+ /* Constructs a query parser.
+ * @param matchVersion Lucene version to match. See <a href="#version">above</a>)
+ * @param f the default field for query terms.
+ * @param a used to find terms in the query text.
+ */
+
+ public QueryParser(Version matchVersion, String f, Analyzer a)
+ : this(new FastCharStream(new StringReader("")))
+ {
+ analyzer = a;
+ field = f;
+ if (matchVersion.OnOrAfter(Version.LUCENE_29))
+ {
+ enablePositionIncrements = true;
+ }
+ else
+ {
+ enablePositionIncrements = false;
+ }
+
+ // LUCENENET-423 - DateRange differences with Java and .NET
+ if (matchVersion.OnOrAfter(Version.LUCENE_30))
+ {
+ _useJavaStyleDateRangeParsing = true;
+ }
+ }
+
+ /// <summary>Parses a query string, returning a {@link Lucene.Net.Search.Query}.</summary>
+ /// <param name="query"> the query string to be parsed.
+ /// </param>
+ /// <throws> ParseException if the parsing fails </throws>
+ public virtual Query Parse(String query)
+ {
+ ReInit(new FastCharStream(new StringReader(query)));
+ try
+ {
+ // TopLevelQuery is a Query followed by the end-of-input (EOF)
+ Query res = TopLevelQuery(field);
+ return res ?? NewBooleanQuery(false);
+ }
+ catch (ParseException tme)
+ {
+ // rethrow to include the original query:
+ throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme);
+ }
+ catch (TokenMgrError tme)
+ {
+ throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme);
+ }
+ catch (BooleanQuery.TooManyClauses tmc)
+ {
+ throw new ParseException("Cannot parse '" + query + "': too many bool clauses", tmc);
+ }
+ }
+
+ /// <value> Returns the analyzer. </value>
+ public virtual Analyzer Analyzer
+ {
+ get { return analyzer; }
+ }
+
+ /// <value> Returns the field. </value>
+ public virtual string Field
+ {
+ get { return field; }
+ }
+
+ /// <summary>
+ /// Gets or sets the minimal similarity for fuzzy queries.
+ /// Default is 0.5f.
+ /// </summary>
+ public virtual float FuzzyMinSim
+ {
+ get { return fuzzyMinSim; }
+ set { this.fuzzyMinSim = value; }
+ }
+
+ /// <summary> Gets or sets the prefix length for fuzzy queries. </summary>
+ /// <value> Returns the fuzzyPrefixLength. </value>
+ public virtual int FuzzyPrefixLength
+ {
+ get { return fuzzyPrefixLength; }
+ set { this.fuzzyPrefixLength = value; }
+ }
+
+ /// <summary> Gets or sets the default slop for phrases. If zero, then exact phrase matches
+ /// are required. Default value is zero.
+ /// </summary>
+ public virtual int PhraseSlop
+ {
+ set { this.phraseSlop = value; }
+ get { return phraseSlop; }
+ }
+
+ /// <summary> Set to <c>true</c> to allow leading wildcard characters.
+ /// <p/>
+ /// When set, <c>*</c> or <c>?</c> are allowed as
+ /// the first character of a PrefixQuery and WildcardQuery.
+ /// Note that this can produce very slow
+ /// queries on big indexes.
+ /// <p/>
+ /// Default: false.
+ /// </summary>
+ public virtual bool AllowLeadingWildcard
+ {
+ set { this.allowLeadingWildcard = value; }
+ get { return allowLeadingWildcard; }
+ }
+
+ /// <summary>Set to <c>true</c> to enable position increments in result query.
+ /// <p/>
+ /// When set, result phrase and multi-phrase queries will
+ /// be aware of position increments.
+ /// Useful when e.g. a StopFilter increases the position increment of
+ /// the token that follows an omitted token.
+ /// <p/>
+ /// Default: false.
+ /// </summary>
+ public virtual bool EnablePositionIncrements
+ {
+ set { this.enablePositionIncrements = value; }
+ get { return enablePositionIncrements; }
+ }
+
+ /// <summary> Gets or sets the boolean operator of the QueryParser.
+ /// In default mode (<c>OR_OPERATOR</c>) terms without any modifiers
+ /// are considered optional: for example <c>capital of Hungary</c> is equal to
+ /// <c>capital OR of OR Hungary</c>.<br/>
+ /// In <c>AND_OPERATOR</c> mode terms are considered to be in conjunction: the
+ /// above mentioned query is parsed as <c>capital AND of AND Hungary</c>
+ /// </summary>
+ public virtual Operator DefaultOperator
+ {
+ set { this.operator_Renamed = value; }
+ get { return operator_Renamed; }
+ }
+
+ /// <summary> Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
+ /// lower-cased or not. Default is <c>true</c>.
+ /// </summary>
+ public virtual bool LowercaseExpandedTerms
+ {
+ set { this.lowercaseExpandedTerms = value; }
+ get { return lowercaseExpandedTerms; }
+ }
+
+
+ /// <summary> By default QueryParser uses <see cref="MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT" />
+ /// when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
+ /// a) Runs faster b) Does not have the scarcity of terms unduly influence score
+ /// c) avoids any "TooManyBooleanClauses" exception.
+ /// However, if your application really needs to use the
+ /// old-fashioned BooleanQuery expansion rewriting and the above
+ /// points are not relevant then use this to change
+ /// the rewrite method.
+ /// </summary>
+ public virtual RewriteMethod MultiTermRewriteMethod
+ {
+ set { multiTermRewriteMethod = value; }
+ get { return multiTermRewriteMethod; }
+ }
+
+ /// <summary>Gets or sets locale used by date range parsing.</summary>
+ public virtual CultureInfo Locale
+ {
+ set { this.locale = value; }
+ get { return locale; }
+ }
+
+ /// <summary> Sets the default date resolution used by RangeQueries for fields for which no
+ /// specific date resolutions has been set. Field specific resolutions can be set
+ /// with {@link #SetDateResolution(String, DateTools.Resolution)}.
+ ///
+ /// </summary>
+ /// <param name="dateResolution">the default date resolution to set
+ /// </param>
+ public virtual void SetDateResolution(DateTools.Resolution dateResolution)
+ {
+ this.dateResolution = dateResolution;
+ }
+
+ /// <summary> Sets the date resolution used by RangeQueries for a specific field.
+ ///
+ /// </summary>
+ /// <param name="fieldName">field for which the date resolution is to be set
+ /// </param>
+ /// <param name="dateResolution">date resolution to set
+ /// </param>
+ public virtual void SetDateResolution(String fieldName, DateTools.Resolution dateResolution)
+ {
+ if (fieldName == null)
+ {
+ throw new ArgumentException("Field cannot be null.");
+ }
+
+ if (fieldToDateResolution == null)
+ {
+ // lazily initialize HashMap
+ fieldToDateResolution = new HashMap<String, DateTools.Resolution>();
+ }
+
+ fieldToDateResolution.Add(fieldName, dateResolution);
+ }
+
+ /// <summary> Returns the date resolution that is used by RangeQueries for the given field.
+ /// Returns null, if no default or field specific date resolution has been set
+ /// for the given field.
+ /// </summary>
+ public virtual DateTools.Resolution getDateResolution(String fieldName)
+ {
+ if (fieldName == null)
+ {
+ throw new ArgumentException("Field cannot be null.");
+ }
+
+ if (fieldToDateResolution == null)
+ {
+ // no field specific date resolutions set; return default date resolution instead
+ return this.dateResolution;
+ }
+
+ DateTools.Resolution resolution = fieldToDateResolution[fieldName];
+ if (resolution == null)
+ {
+ // no date resolutions set for the given field; return default date resolution instead
+ resolution = this.dateResolution;
+ }
+
+ return resolution;
+ }
+
+ /// <summary> Gets or sets the collator used to determine index term inclusion in ranges
+ /// for RangeQuerys.
+ /// <p/>
+ /// <strong>WARNING:</strong> Setting the rangeCollator to a non-null
+ /// collator using this method will cause every single index Term in the
+ /// Field referenced by lowerTerm and/or upperTerm to be examined.
+ /// Depending on the number of index Terms in this Field, the operation could
+ /// be very slow.
+ ///
+ /// </summary>
+ /// <value> the collator to use when constructing RangeQuerys </value>
+ public virtual CompareInfo RangeCollator
+ {
+ set { rangeCollator = value; }
+ get { return rangeCollator; }
+ }
+
+ protected internal virtual void AddClause(List<BooleanClause> clauses, int conj, int mods, Query q)
+ {
+ bool required, prohibited;
+
+ // If this term is introduced by AND, make the preceding term required,
+ // unless it's already prohibited
+ if (clauses.Count > 0 && conj == CONJ_AND)
+ {
+ BooleanClause c = clauses[clauses.Count - 1];
+ if (!c.IsProhibited)
+ c.Occur = Occur.MUST;
+ }
+
+ if (clauses.Count > 0 && operator_Renamed == AND_OPERATOR && conj == CONJ_OR)
+ {
+ // If this term is introduced by OR, make the preceding term optional,
+ // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
+ // notice if the input is a OR b, first term is parsed as required; without
+ // this modification a OR b would parsed as +a OR b
+ BooleanClause c = clauses[clauses.Count - 1];
+ if (!c.IsProhibited)
+ c.Occur = Occur.SHOULD;
+ }
+
+ // We might have been passed a null query; the term might have been
+ // filtered away by the analyzer.
+ if (q == null)
+ return;
+
+ if (operator_Renamed == OR_OPERATOR)
+ {
+ // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
+ // introduced by NOT or -; make sure not to set both.
+ prohibited = (mods == MOD_NOT);
+ required = (mods == MOD_REQ);
+ if (conj == CONJ_AND && !prohibited)
+ {
+ required = true;
+ }
+ }
+ else
+ {
+ // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
+ // if not PROHIBITED and not introduced by OR
+ prohibited = (mods == MOD_NOT);
+ required = (!prohibited && conj != CONJ_OR);
+ }
+ if (required && !prohibited)
+ clauses.Add(NewBooleanClause(q, Occur.MUST));
+ else if (!required && !prohibited)
+ clauses.Add(NewBooleanClause(q, Occur.SHOULD));
+ else if (!required && prohibited)
+ clauses.Add(NewBooleanClause(q, Occur.MUST_NOT));
+ else
+ throw new SystemException("Clause cannot be both required and prohibited");
+ }
+
+
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetFieldQuery(String field, String queryText)
+ {
+ // Use the analyzer to get all the tokens, and then build a TermQuery,
+ // PhraseQuery, or nothing based on the term count
+
+ TokenStream source;
+ try
+ {
+ source = analyzer.ReusableTokenStream(field, new StringReader(queryText));
+ source.Reset();
+ }
+ catch (IOException)
+ {
+ source = analyzer.TokenStream(field, new StringReader(queryText));
+ }
+ CachingTokenFilter buffer = new CachingTokenFilter(source);
+ ITermAttribute termAtt = null;
+ IPositionIncrementAttribute posIncrAtt = null;
+ int numTokens = 0;
+
+ bool success = false;
+ try
+ {
+ buffer.Reset();
+ success = true;
+ }
+ catch (IOException)
+ {
+ // success==false if we hit an exception
+ }
+ if (success)
+ {
+ if (buffer.HasAttribute<ITermAttribute>())
+ {
+ termAtt = buffer.GetAttribute<ITermAttribute>();
+ }
+ if (buffer.HasAttribute<IPositionIncrementAttribute>())
+ {
+ posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>();
+ }
+ }
+
+ int positionCount = 0;
+ bool severalTokensAtSamePosition = false;
+
+ bool hasMoreTokens = false;
+ if (termAtt != null)
+ {
+ try
+ {
+ hasMoreTokens = buffer.IncrementToken();
+ while (hasMoreTokens)
+ {
+ numTokens++;
+ int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
+ if (positionIncrement != 0)
+ {
+ positionCount += positionIncrement;
+ }
+ else
+ {
+ severalTokensAtSamePosition = true;
+ }
+ hasMoreTokens = buffer.IncrementToken();
+ }
+ }
+ catch (IOException)
+ {
+ // ignore
+ }
+ }
+ try
+ {
+ // rewind the buffer stream
+ buffer.Reset();
+
+ // close original stream - all tokens buffered
+ source.Close();
+ }
+ catch (IOException)
+ {
+ // ignore
+ }
+
+ if (numTokens == 0)
+ return null;
+ else if (numTokens == 1)
+ {
+ String term = null;
+ try
+ {
+ bool hasNext = buffer.IncrementToken();
+ Debug.Assert(hasNext);
+ term = termAtt.Term;
+ }
+ catch (IOException)
+ {
+ // safe to ignore, because we know the number of tokens
+ }
+ return NewTermQuery(new Term(field, term));
+ }
+ else
+ {
+ if (severalTokensAtSamePosition)
+ {
+ if (positionCount == 1)
+ {
+ // no phrase query:
+ BooleanQuery q = NewBooleanQuery(true);
+ for (int i = 0; i < numTokens; i++)
+ {
+ String term = null;
+ try
+ {
+ bool hasNext = buffer.IncrementToken();
+ Debug.Assert(hasNext);
+ term = termAtt.Term;
+ }
+ catch (IOException)
+ {
+ // safe to ignore, because we know the number of tokens
+ }
+
+ Query currentQuery = NewTermQuery(
+ new Term(field, term));
+ q.Add(currentQuery, Occur.SHOULD);
+ }
+ return q;
+ }
+ else
+ {
+ // phrase query:
+ MultiPhraseQuery mpq = NewMultiPhraseQuery();
+ mpq.Slop = phraseSlop;
+ List<Term> multiTerms = new List<Term>();
+ int position = -1;
+ for (int i = 0; i < numTokens; i++)
+ {
+ String term = null;
+ int positionIncrement = 1;
+ try
+ {
+ bool hasNext = buffer.IncrementToken();
+ Debug.Assert(hasNext == true);
+ term = termAtt.Term;
+ if (posIncrAtt != null)
+ {
+ positionIncrement = posIncrAtt.PositionIncrement;
+ }
+ }
+ catch (IOException)
+ {
+ // safe to ignore, because we know the number of tokens
+ }
+
+ if (positionIncrement > 0 && multiTerms.Count > 0)
+ {
+ if (enablePositionIncrements)
+ {
+ mpq.Add(multiTerms.ToArray(), position);
+ }
+ else
+ {
+ mpq.Add(multiTerms.ToArray());
+ }
+ multiTerms.Clear();
+ }
+ position += positionIncrement;
+ multiTerms.Add(new Term(field, term));
+ }
+ if (enablePositionIncrements)
+ {
+ mpq.Add(multiTerms.ToArray(), position);
+ }
+ else
+ {
+ mpq.Add(multiTerms.ToArray());
+ }
+ return mpq;
+ }
+ }
+ else
+ {
+ PhraseQuery pq = NewPhraseQuery();
+ pq.Slop = phraseSlop;
+ int position = -1;
+
+
+ for (int i = 0; i < numTokens; i++)
+ {
+ String term = null;
+ int positionIncrement = 1;
+
+ try
+ {
+ bool hasNext = buffer.IncrementToken();
+ Debug.Assert(hasNext == true);
+ term = termAtt.Term;
+ if (posIncrAtt != null)
+ {
+ positionIncrement = posIncrAtt.PositionIncrement;
+ }
+ }
+ catch (IOException)
+ {
+ // safe to ignore, because we know the number of tokens
+ }
+
+ if (enablePositionIncrements)
+ {
+ position += positionIncrement;
+ pq.Add(new Term(field, term), position);
+ }
+ else
+ {
+ pq.Add(new Term(field, term));
+ }
+ }
+ return pq;
+ }
+ }
+ }
+
+
+ /// <summary> Base implementation delegates to {@link #GetFieldQuery(String,String)}.
+ /// This method may be overridden, for example, to return
+ /// a SpanNearQuery instead of a PhraseQuery.
+ ///
+ /// </summary>
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetFieldQuery(String field, String queryText, int slop)
+ {
+ Query query = GetFieldQuery(field, queryText);
+
+ if (query is PhraseQuery)
+ {
+ ((PhraseQuery)query).Slop = slop;
+ }
+ if (query is MultiPhraseQuery)
+ {
+ ((MultiPhraseQuery)query).Slop = slop;
+ }
+
+ return query;
+ }
+
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetRangeQuery(String field,
+ String part1,
+ String part2,
+ bool inclusive)
+ {
+ if (lowercaseExpandedTerms)
+ {
+ part1 = part1.ToLower();
+ part2 = part2.ToLower();
+ }
+
+ try
+ {
+ DateTime d1, d2;
+ if (_useJavaStyleDateRangeParsing)
+ {
+ // TODO: This doesn't emulate java perfectly.
+ // Java allows parsing of the string up to the end of the pattern
+ // and then ignores everything else. .NET will throw an exception,
+ // so this will fail in those cases, though the code below is clear
+ // that users can only specify the date, not the time.
+ var shortFormat = locale.DateTimeFormat.ShortDatePattern;
+ d1 = DateTime.ParseExact(part1, shortFormat, locale);
+ d2 = DateTime.ParseExact(part2, shortFormat, locale);
+ }
+ else
+ {
+ d1 = DateTime.Parse(part1, locale);
+ d2 = DateTime.Parse(part2, locale);
+ }
+
+ if (inclusive)
+ {
+ // The user can only specify the date, not the time, so make sure
+ // the time is set to the latest possible time of that date to really
+ // include all documents:
+ var cal = locale.Calendar;
+ d2 = cal.AddHours(d2, 23);
+ d2 = cal.AddMinutes(d2, 59);
+ d2 = cal.AddSeconds(d2, 59);
+ d2 = cal.AddMilliseconds(d2, 999);
+ }
+ DateTools.Resolution resolution = getDateResolution(field);
+ if (resolution == null)
+ {
+ // no default or field specific date resolution has been set,
+ // use deprecated DateField to maintain compatibility with
+ // pre-1.9 Lucene versions.
+ part1 = DateField.DateToString(d1);
+ part2 = DateField.DateToString(d2);
+ }
+ else
+ {
+ part1 = DateTools.DateToString(d1, resolution);
+ part2 = DateTools.DateToString(d2, resolution);
+ }
+ }
+ catch (Exception)
+ {
+ }
+
+ return NewRangeQuery(field, part1, part2, inclusive);
+ }
+
+ /// <summary> Builds a new BooleanQuery instance</summary>
+ /// <param name="disableCoord">disable coord
+ /// </param>
+ /// <returns> new BooleanQuery instance
+ /// </returns>
+ protected internal virtual BooleanQuery NewBooleanQuery(bool disableCoord)
+ {
+ return new BooleanQuery(disableCoord);
+ }
+
+ /// <summary> Builds a new BooleanClause instance</summary>
+ /// <param name="q">sub query
+ /// </param>
+ /// <param name="occur">how this clause should occur when matching documents
+ /// </param>
+ /// <returns> new BooleanClause instance
+ /// </returns>
+ protected internal virtual BooleanClause NewBooleanClause(Query q, Occur occur)
+ {
+ return new BooleanClause(q, occur);
+ }
+
+ /// <summary> Builds a new TermQuery instance</summary>
+ /// <param name="term">term
+ /// </param>
+ /// <returns> new TermQuery instance
+ /// </returns>
+ protected internal virtual Query NewTermQuery(Term term)
+ {
+ return new TermQuery(term);
+ }
+
+ /// <summary> Builds a new PhraseQuery instance</summary>
+ /// <returns> new PhraseQuery instance
+ /// </returns>
+ protected internal virtual PhraseQuery NewPhraseQuery()
+ {
+ return new PhraseQuery();
+ }
+
+ /// <summary> Builds a new MultiPhraseQuery instance</summary>
+ /// <returns> new MultiPhraseQuery instance
+ /// </returns>
+ protected internal virtual MultiPhraseQuery NewMultiPhraseQuery()
+ {
+ return new MultiPhraseQuery();
+ }
+
+ /// <summary> Builds a new PrefixQuery instance</summary>
+ /// <param name="prefix">Prefix term
+ /// </param>
+ /// <returns> new PrefixQuery instance
+ /// </returns>
+ protected internal virtual Query NewPrefixQuery(Term prefix)
+ {
+ return new PrefixQuery(prefix) { RewriteMethod = multiTermRewriteMethod };
+ }
+
+ /// <summary> Builds a new FuzzyQuery instance</summary>
+ /// <param name="term">Term
+ /// </param>
+ /// <param name="minimumSimilarity">minimum similarity
+ /// </param>
+ /// <param name="prefixLength">prefix length
+ /// </param>
+ /// <returns> new FuzzyQuery Instance
+ /// </returns>
+ protected internal virtual Query NewFuzzyQuery(Term term, float minimumSimilarity, int prefixLength)
+ {
+ // FuzzyQuery doesn't yet allow constant score rewrite
+ return new FuzzyQuery(term, minimumSimilarity, prefixLength);
+ }
+
+ /// <summary> Builds a new TermRangeQuery instance</summary>
+ /// <param name="field">Field
+ /// </param>
+ /// <param name="part1">min
+ /// </param>
+ /// <param name="part2">max
+ /// </param>
+ /// <param name="inclusive">true if range is inclusive
+ /// </param>
+ /// <returns> new TermRangeQuery instance
+ /// </returns>
+ protected internal virtual Query NewRangeQuery(String field, String part1, String part2, bool inclusive)
+ {
+ return new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator) { RewriteMethod = multiTermRewriteMethod };
+ }
+
+ /// <summary> Builds a new MatchAllDocsQuery instance</summary>
+ /// <returns> new MatchAllDocsQuery instance
+ /// </returns>
+ protected internal virtual Query NewMatchAllDocsQuery()
+ {
+ return new MatchAllDocsQuery();
+ }
+
+ /// <summary> Builds a new WildcardQuery instance</summary>
+ /// <param name="t">wildcard term
+ /// </param>
+ /// <returns> new WildcardQuery instance
+ /// </returns>
+ protected internal virtual Query NewWildcardQuery(Term t)
+ {
+ return new WildcardQuery(t) { RewriteMethod = multiTermRewriteMethod };
+ }
+
+ /// <summary> Factory method for generating query, given a set of clauses.
+ /// By default creates a boolean query composed of clauses passed in.
+ ///
+ /// Can be overridden by extending classes, to modify query being
+ /// returned.
+ ///
+ /// </summary>
+ /// <param name="clauses">List that contains {@link BooleanClause} instances
+ /// to join.
+ ///
+ /// </param>
+ /// <returns> Resulting {@link Query} object.
+ /// </returns>
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetBooleanQuery(IList<BooleanClause> clauses)
+ {
+ return GetBooleanQuery(clauses, false);
+ }
+
+ /// <summary> Factory method for generating query, given a set of clauses.
+ /// By default creates a boolean query composed of clauses passed in.
+ ///
+ /// Can be overridden by extending classes, to modify query being
+ /// returned.
+ ///
+ /// </summary>
+ /// <param name="clauses">List that contains {@link BooleanClause} instances
+ /// to join.
+ /// </param>
+ /// <param name="disableCoord">true if coord scoring should be disabled.
+ ///
+ /// </param>
+ /// <returns> Resulting {@link Query} object.
+ /// </returns>
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetBooleanQuery(IList<BooleanClause> clauses, bool disableCoord)
+ {
+ if (clauses.Count == 0)
+ {
+ return null; // all clause words were filtered away by the analyzer.
+ }
+ BooleanQuery query = NewBooleanQuery(disableCoord);
+ foreach (var clause in clauses)
+ {
+ query.Add(clause);
+ }
+ return query;
+ }
+
+ /// <summary> Factory method for generating a query. Called when parser
+ /// parses an input term token that contains one or more wildcard
+ /// characters (? and *), but is not a prefix term token (one
+ /// that has just a single * character at the end)
+ /// <p/>
+ /// Depending on settings, prefix term may be lower-cased
+ /// automatically. It will not go through the default Analyzer,
+ /// however, since normal Analyzers are unlikely to work properly
+ /// with wildcard templates.
+ /// <p/>
+ /// Can be overridden by extending classes, to provide custom handling for
+ /// wildcard queries, which may be necessary due to missing analyzer calls.
+ ///
+ /// </summary>
+ /// <param name="field">Name of the field query will use.
+ /// </param>
+ /// <param name="termStr">Term token that contains one or more wild card
+ /// characters (? or *), but is not simple prefix term
+ ///
+ /// </param>
+ /// <returns> Resulting {@link Query} built for the term
+ /// </returns>
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetWildcardQuery(String field, String termStr)
+ {
+ if ("*".Equals(field))
+ {
+ if ("*".Equals(termStr)) return NewMatchAllDocsQuery();
+ }
+ if (!allowLeadingWildcard && (termStr.StartsWith("*") || termStr.StartsWith("?")))
+ throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
+ if (lowercaseExpandedTerms)
+ {
+ termStr = termStr.ToLower();
+ }
+ Term t = new Term(field, termStr);
+ return NewWildcardQuery(t);
+ }
+
+ /// <summary> Factory method for generating a query (similar to
+ /// {@link #getWildcardQuery}). Called when parser parses an input term
+ /// token that uses prefix notation; that is, contains a single '*' wildcard
+ /// character as its last character. Since this is a special case
+ /// of generic wildcard term, and such a query can be optimized easily,
+ /// this usually results in a different query object.
+ /// <p/>
+ /// Depending on settings, a prefix term may be lower-cased
+ /// automatically. It will not go through the default Analyzer,
+ /// however, since normal Analyzers are unlikely to work properly
+ /// with wildcard templates.
+ /// <p/>
+ /// Can be overridden by extending classes, to provide custom handling for
+ /// wild card queries, which may be necessary due to missing analyzer calls.
+ ///
+ /// </summary>
+ /// <param name="field">Name of the field query will use.
+ /// </param>
+ /// <param name="termStr">Term token to use for building term for the query
+ /// (<b>without</b> trailing '*' character!)
+ ///
+ /// </param>
+ /// <returns> Resulting {@link Query} built for the term
+ /// </returns>
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetPrefixQuery(String field, String termStr)
+ {
+ if (!allowLeadingWildcard && termStr.StartsWith("*"))
+ throw new ParseException("'*' not allowed as first character in PrefixQuery");
+ if (lowercaseExpandedTerms)
+ {
+ termStr = termStr.ToLower();
+ }
+ Term t = new Term(field, termStr);
+ return NewPrefixQuery(t);
+ }
+
+ /// <summary> Factory method for generating a query (similar to
+ /// {@link #getWildcardQuery}). Called when parser parses
+ /// an input term token that has the fuzzy suffix (~) appended.
+ ///
+ /// </summary>
+ /// <param name="field">Name of the field query will use.
+ /// </param>
+ /// <param name="termStr">Term token to use for building term for the query
+ ///
+ /// </param>
+ /// <returns> Resulting {@link Query} built for the term
+ /// </returns>
+ /// <exception cref="ParseException">throw in overridden method to disallow
+ /// </exception>
+ protected internal virtual Query GetFuzzyQuery(String field, String termStr, float minSimilarity)
+ {
+ if (lowercaseExpandedTerms)
+ {
+ termStr = termStr.ToLower();
+ }
+ Term t = new Term(field, termStr);
+ return NewFuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
+ }
+
+
+ /// <summary> Returns a String where the escape char has been
+ /// removed, or kept only once if there was a double escape.
+ ///
+ /// Supports escaped unicode characters, e. g. translates
+ /// <c>\\u0041</c> to <c>A</c>.
+ ///
+ /// </summary>
+ private String DiscardEscapeChar(String input)
+ {
+ // Create char array to hold unescaped char sequence
+ char[] output = new char[input.Length];
+
+ // The Length of the output can be less than the input
+ // due to discarded escape chars. This variable holds
+ // the actual Length of the output
+ int Length = 0;
+
+ // We remember whether the last processed character was
+ // an escape character
+ bool lastCharWasEscapeChar = false;
+
+ // The multiplier the current unicode digit must be multiplied with.
+ // E. g. the first digit must be multiplied with 16^3, the second with 16^2...
+ int codePointMultiplier = 0;
+
+ // Used to calculate the codepoint of the escaped unicode character
+ int codePoint = 0;
+
+ for (int i = 0; i < input.Length; i++)
+ {
+ char curChar = input[i];
+ if (codePointMultiplier > 0)
+ {
+ codePoint += HexToInt(curChar) * codePointMultiplier;
+ codePointMultiplier = Number.URShift(codePointMultiplier, 4);
+ if (codePointMultiplier == 0)
+ {
+ output[Length++] = (char)codePoint;
+ codePoint = 0;
+ }
+ }
+ else if (lastCharWasEscapeChar)
+ {
+ if (curChar == 'u')
+ {
+ // found an escaped unicode character
+ codePointMultiplier = 16 * 16 * 16;
+ }
+ else
+ {
+ // this character was escaped
+ output[Length] = curChar;
+ Length++;
+ }
+ lastCharWasEscapeChar = false;
+ }
+ else
+ {
+ if (curChar == '\\')
+ {
+ lastCharWasEscapeChar = true;
+ }
+ else
+ {
+ output[Length] = curChar;
+ Length++;
+ }
+ }
+ }
+
+ if (codePointMultiplier > 0)
+ {
+ throw new ParseException("Truncated unicode escape sequence.");
+ }
+
+ if (lastCharWasEscapeChar)
+ {
+ throw new ParseException("Term can not end with escape character.");
+ }
+
+ return new String(output, 0, Length);
+ }
+
+ /// <summary>Returns the numeric value of the hexadecimal character </summary>
+ private static int HexToInt(char c)
+ {
+ if ('0' <= c && c <= '9')
+ {
+ return c - '0';
+ }
+ else if ('a' <= c && c <= 'f')
+ {
+ return c - 'a' + 10;
+ }
+ else if ('A' <= c && c <= 'F')
+ {
+ return c - 'A' + 10;
+ }
+ else
+ {
+ throw new ParseException("None-hex character in unicode escape sequence: " + c);
+ }
+ }
+
+ /// <summary> Returns a String where those characters that QueryParser
+ /// expects to be escaped are escaped by a preceding <c>\</c>.
+ /// </summary>
+ public static String Escape(String s)
+ {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < s.Length; i++)
+ {
+ char c = s[i];
+ // These characters are part of the query syntax and must be escaped
+ if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
+ || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
+ || c == '*' || c == '?' || c == '|' || c == '&')
+ {
+ sb.Append('\\');
+ }
+ sb.Append(c);
+ }
+ return sb.ToString();
+ }
+
+ /// <summary> Command line tool to test QueryParser, using {@link Lucene.Net.Analysis.SimpleAnalyzer}.
+ /// Usage:<br/>
+ /// <c>java Lucene.Net.QueryParsers.QueryParser &lt;input&gt;</c>
+ /// </summary>
+ [STAThread]
+ public static void Main(String[] args)
+ {
+ if (args.Length == 0)
+ {
+ Console.WriteLine("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
+ Environment.Exit(0);
+ }
+ QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
+ Query q = qp.Parse(args[0]);
+ Console.WriteLine(q.ToString("field"));
+ }
+
+ // * Query ::= ( Clause )*
+ // * Clause ::= ["+", "-"] [<TermToken> ":"] ( <TermToken> | "(" Query ")" )
+ public int Conjunction()
+ {
+ int ret = CONJ_NONE;
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case AndToken:
+ case OrToken:
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case AndToken:
+ Jj_consume_token(AndToken);
+ ret = CONJ_AND;
+ break;
+ case OrToken:
+ Jj_consume_token(OrToken);
+ ret = CONJ_OR;
+ break;
+ default:
+ jj_la1[0] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ break;
+ default:
+ jj_la1[1] = jj_gen;
+ break;
+ }
+ {
+ if (true) return ret;
+ }
+ throw new ApplicationException("Missing return statement in function");
+ }
+
+ public int Modifiers()
+ {
+ int ret = MOD_NONE;
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case NotToken:
+ case PlusToken:
+ case MinusToken:
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case PlusToken:
+ Jj_consume_token(PlusToken);
+ ret = MOD_REQ;
+ break;
+ case MinusToken:
+ Jj_consume_token(MinusToken);
+ ret = MOD_NOT;
+ break;
+ case NotToken:
+ Jj_consume_token(NotToken);
+ ret = MOD_NOT;
+ break;
+ default:
+ jj_la1[2] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ break;
+ default:
+ jj_la1[3] = jj_gen;
+ break;
+ }
+ {
+ if (true) return ret;
+ }
+ throw new Exception("Missing return statement in function");
+ }
+
+ // This makes sure that there is no garbage after the query string
+ public Query TopLevelQuery(String field)
+ {
+ Query q;
+ q = Query(field);
+ Jj_consume_token(0);
+ {
+ if (true) return q;
+ }
+ throw new Exception("Missing return statement in function");
+ }
+
+ public Query Query(String field)
+ {
+ List<BooleanClause> clauses = new List<BooleanClause>();
+ Query q, firstQuery = null;
+ int conj, mods;
+ mods = Modifiers();
+ q = Clause(field);
+ AddClause(clauses, CONJ_NONE, mods, q);
+ if (mods == MOD_NONE)
+ firstQuery = q;
+ while (true)
+ {
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case AndToken:
+ case OrToken:
+ case NotToken:
+ case PlusToken:
+ case MinusToken:
+ case LParanToken:
+ case StarToken:
+ case QuotedToken:
+ case TermToken:
+ case PrefixTermToken:
+ case WildTermToken:
+ case RangeInStartToken:
+ case RangeExStartToken:
+ case NumberToken:
+ break;
+ default:
+ jj_la1[4] = jj_gen;
+ goto label_1;
+ }
+
+ conj = Conjunction();
+ mods = Modifiers();
+ q = Clause(field);
+ AddClause(clauses, conj, mods, q);
+ }
+
+ label_1:
+
+ if (clauses.Count == 1 && firstQuery != null)
+ {
+ if (true) return firstQuery;
+ }
+
+ return GetBooleanQuery(clauses);
+ }
+
+ public Query Clause(String field)
+ {
+ Query q;
+ Token fieldToken = null, boost = null;
+ if (Jj_2_1(2))
+ {
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case TermToken:
+ fieldToken = Jj_consume_token(TermToken);
+ Jj_consume_token(ColonToken);
+ field = DiscardEscapeChar(fieldToken.image);
+ break;
+ case StarToken:
+ Jj_consume_token(StarToken);
+ Jj_consume_token(ColonToken);
+ field = "*";
+ break;
+ default:
+ jj_la1[5] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ }
+ else
+ {
+ ;
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case StarToken:
+ case QuotedToken:
+ case TermToken:
+ case PrefixTermToken:
+ case WildTermToken:
+ case RangeInStartToken:
+ case RangeExStartToken:
+ case NumberToken:
+ q = Term(field);
+ break;
+ case LParanToken:
+ Jj_consume_token(LParanToken);
+ q = Query(field);
+ Jj_consume_token(RParenToken);
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case CaratToken:
+ Jj_consume_token(CaratToken);
+ boost = Jj_consume_token(NumberToken);
+ break;
+ default:
+ jj_la1[6] = jj_gen;
+ break;
+ }
+ break;
+ default:
+ jj_la1[7] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ if (boost != null)
+ {
+ try
+ {
+ float f = Single.Parse(boost.image);
+ q.Boost = f;
+ }
+ catch (Exception)
+ {
+ }
+ }
+ {
+ if (true) return q;
+ }
+ throw new Exception("Missing return statement in function");
+ }
+
+ public Query Term(String field)
+ {
+ Token term, boost = null, fuzzySlop = null, goop1, goop2;
+ bool prefix = false;
+ bool wildcard = false;
+ bool fuzzy = false;
+ Query q;
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case StarToken:
+ case TermToken:
+ case PrefixTermToken:
+ case WildTermToken:
+ case NumberToken:
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case TermToken:
+ term = Jj_consume_token(TermToken);
+ break;
+ case StarToken:
+ term = Jj_consume_token(StarToken);
+ wildcard = true;
+ break;
+ case PrefixTermToken:
+ term = Jj_consume_token(PrefixTermToken);
+ prefix = true;
+ break;
+ case WildTermToken:
+ term = Jj_consume_token(WildTermToken);
+ wildcard = true;
+ break;
+ case NumberToken:
+ term = Jj_consume_token(NumberToken);
+ break;
+ default:
+ jj_la1[8] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case FuzzySlopToken:
+ fuzzySlop = Jj_consume_token(FuzzySlopToken);
+ fuzzy = true;
+ break;
+ default:
+ jj_la1[9] = jj_gen;
+ break;
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case CaratToken:
+ Jj_consume_token(CaratToken);
+ boost = Jj_consume_token(NumberToken);
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case FuzzySlopToken:
+ fuzzySlop = Jj_consume_token(FuzzySlopToken);
+ fuzzy = true;
+ break;
+ default:
+ jj_la1[10] = jj_gen;
+ break;
+ }
+ break;
+ default:
+ jj_la1[11] = jj_gen;
+ break;
+ }
+ String termImage = DiscardEscapeChar(term.image);
+ if (wildcard)
+ {
+ q = GetWildcardQuery(field, termImage);
+ }
+ else if (prefix)
+ {
+ q = GetPrefixQuery(field,
+ DiscardEscapeChar(term.image.Substring(0, (term.image.Length - 1) - (0))));
+ }
+ else if (fuzzy)
+ {
+ float fms = fuzzyMinSim;
+ try
+ {
+ fms = Single.Parse(fuzzySlop.image.Substring(1));
+ }
+ catch (Exception)
+ {
+ }
+ if (fms < 0.0f || fms > 1.0f)
+ {
+ {
+ if (true)
+ throw new ParseException(
+ "Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
+ }
+ }
+ q = GetFuzzyQuery(field, termImage, fms);
+ }
+ else
+ {
+ q = GetFieldQuery(field, termImage);
+ }
+ break;
+ case RangeInStartToken:
+ Jj_consume_token(RangeInStartToken);
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case RangeInGoopToken:
+ goop1 = Jj_consume_token(RangeInGoopToken);
+ break;
+ case RangeInQuotedToken:
+ goop1 = Jj_consume_token(RangeInQuotedToken);
+ break;
+ default:
+ jj_la1[12] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case RangeInToToken:
+ Jj_consume_token(RangeInToToken);
+ break;
+ default:
+ jj_la1[13] = jj_gen;
+ break;
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case RangeInGoopToken:
+ goop2 = Jj_consume_token(RangeInGoopToken);
+ break;
+ case RangeInQuotedToken:
+ goop2 = Jj_consume_token(RangeInQuotedToken);
+ break;
+ default:
+ jj_la1[14] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ Jj_consume_token(RangeInEndToken);
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case CaratToken:
+ Jj_consume_token(CaratToken);
+ boost = Jj_consume_token(NumberToken);
+ break;
+ default:
+ jj_la1[15] = jj_gen;
+ break;
+ }
+ if (goop1.kind == RangeInQuotedToken)
+ {
+ goop1.image = goop1.image.Substring(1, (goop1.image.Length - 1) - (1));
+ }
+ if (goop2.kind == RangeInQuotedToken)
+ {
+ goop2.image = goop2.image.Substring(1, (goop2.image.Length - 1) - (1));
+ }
+ q = GetRangeQuery(field, DiscardEscapeChar(goop1.image), DiscardEscapeChar(goop2.image), true);
+ break;
+ case RangeExStartToken:
+ Jj_consume_token(RangeExStartToken);
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case RangeExGoopToken:
+ goop1 = Jj_consume_token(RangeExGoopToken);
+ break;
+ case RangeExQuotedToken:
+ goop1 = Jj_consume_token(RangeExQuotedToken);
+ break;
+ default:
+ jj_la1[16] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case RangeExToToken:
+ Jj_consume_token(RangeExToToken);
+ break;
+ default:
+ jj_la1[17] = jj_gen;
+ break;
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case RangeExGoopToken:
+ goop2 = Jj_consume_token(RangeExGoopToken);
+ break;
+ case RangeExQuotedToken:
+ goop2 = Jj_consume_token(RangeExQuotedToken);
+ break;
+ default:
+ jj_la1[18] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ Jj_consume_token(RangeExEndToken);
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case CaratToken:
+ Jj_consume_token(CaratToken);
+ boost = Jj_consume_token(NumberToken);
+ break;
+ default:
+ jj_la1[19] = jj_gen;
+ break;
+ }
+ if (goop1.kind == RangeExQuotedToken)
+ {
+ goop1.image = goop1.image.Substring(1, (goop1.image.Length - 1) - (1));
+ }
+ if (goop2.kind == RangeExQuotedToken)
+ {
+ goop2.image = goop2.image.Substring(1, (goop2.image.Length - 1) - (1));
+ }
+
+ q = GetRangeQuery(field, DiscardEscapeChar(goop1.image), DiscardEscapeChar(goop2.image), false);
+ break;
+ case QuotedToken:
+ term = Jj_consume_token(QuotedToken);
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case FuzzySlopToken:
+ fuzzySlop = Jj_consume_token(FuzzySlopToken);
+ break;
+ default:
+ jj_la1[20] = jj_gen;
+ break;
+ }
+ switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk)
+ {
+ case CaratToken:
+ Jj_consume_token(CaratToken);
+ boost = Jj_consume_token(NumberToken);
+ break;
+ default:
+ jj_la1[21] = jj_gen;
+ break;
+ }
+ int s = phraseSlop;
+
+ if (fuzzySlop != null)
+ {
+ try
+ {
+ s = (int)Single.Parse(fuzzySlop.image.Substring(1));
+ }
+ catch (Exception)
+ {
+ }
+ }
+ q = GetFieldQuery(field, DiscardEscapeChar(term.image.Substring(1, (term.image.Length - 1) - (1))),
+ s);
+ break;
+ default:
+ jj_la1[22] = jj_gen;
+ Jj_consume_token(-1);
+ throw new ParseException();
+ }
+ if (boost != null)
+ {
+ float f = (float)1.0;
+ try
+ {
+ f = Single.Parse(boost.image);
+ }
+ catch (Exception)
+ {
+ /* Should this be handled somehow? (defaults to "no boost", if
+ * boost number is invalid)
+ */
+ }
+
+ // avoid boosting null queries, such as those caused by stop words
+ if (q != null)
+ {
+ q.Boost = f;
+ }
+ }
+ {
+ if (true) return q;
+ }
+ throw new Exception("Missing return statement in function");
+ }
+
+ private bool Jj_2_1(int xla)
+ {
+ jj_la = xla;
+ jj_lastpos = jj_scanpos = token;
+ try
+ {
+ return !Jj_3_1();
+ }
+ catch (LookaheadSuccess)
+ {
+ return true;
+ }
+ finally
+ {
+ Jj_save(0, xla);
+ }
+ }
+
+ private bool Jj_3R_2()
+ {
+ if (jj_scan_token(TermToken)) return true;
+ if (jj_scan_token(ColonToken)) return true;
+ return false;
+ }
+
+ private bool Jj_3_1()
+ {
+ Token xsp;
+ xsp = jj_scanpos;
+ if (Jj_3R_2())
+ {
+ jj_scanpos = xsp;
+ if (Jj_3R_3()) return true;
+ }
+ return false;
+ }
+
+ private bool Jj_3R_3()
+ {
+ if (jj_scan_token(StarToken)) return true;
+ if (jj_scan_token(ColonToken)) return true;
+ return false;
+ }
+
+ /* Generated Token Manager. */
+ public QueryParserTokenManager token_source;
+ /* Current token. */
+ public Token token;
+ /* Next token. */
+ public Token jj_nt;
+ private int jj_ntk;
+ private Token jj_scanpos, jj_lastpos;
+ private int jj_la;
+ private int jj_gen;
+ private int[] jj_la1 = new int[23];
+ private static int[] jj_la1_0;
+ private static int[] jj_la1_1;
+
+ private static void Jj_la1_init_0()
+ {
+ jj_la1_0 = new int[]
+ {
+ 0x300, 0x300, 0x1c00, 0x1c00, 0x3ed3f00, 0x90000, 0x20000, 0x3ed2000, 0x2690000, 0x100000,
+ 0x100000, 0x20000, 0x30000000, 0x4000000, 0x30000000, 0x20000, 0x0, 0x40000000, 0x0, 0x20000
+ , 0x100000, 0x20000, 0x3ed0000,
+ };
+ }
+
+ private static void Jj_la1_init_1()
+ {
+ jj_la1_1 = new int[]
+ {
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0,
+ 0x3, 0x0, 0x0, 0x0, 0x0,
+ };
+ }
+
+ private JJCalls[] jj_2_rtns = new JJCalls[1];
+ private bool jj_rescan = false;
+ private int jj_gc = 0;
+
+ /// <summary>Constructor with user supplied CharStream. </summary>
+ protected internal QueryParser(ICharStream stream)
+ {
+ token_source = new QueryParserTokenManager(stream);
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 23; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ /// <summary>Reinitialise. </summary>
+ public void ReInit(ICharStream stream)
+ {
+ token_source.ReInit(stream);
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 23; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ /// <summary>Constructor with generated Token Manager. </summary>
+ protected QueryParser(QueryParserTokenManager tm)
+ {
+ token_source = tm;
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 23; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ /// <summary>Reinitialise. </summary>
+ public void ReInit(QueryParserTokenManager tm)
+ {
+ token_source = tm;
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 23; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ private Token Jj_consume_token(int kind)
+ {
+ Token oldToken;
+ if ((oldToken = token).next != null) token = token.next;
+ else token = token.next = token_source.GetNextToken();
+ jj_ntk = -1;
+ if (token.kind == kind)
+ {
+ jj_gen++;
+ if (++jj_gc > 100)
+ {
+ jj_gc = 0;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ {
+ JJCalls c = jj_2_rtns[i];
+ while (c != null)
+ {
+ if (c.gen < jj_gen) c.first = null;
+ c = c.next;
+ }
+ }
+ }
+ return token;
+ }
+ token = oldToken;
+ jj_kind = kind;
+ throw GenerateParseException();
+ }
+
+ [Serializable]
+ private sealed class LookaheadSuccess : System.Exception
+ {
+ }
+
+ private LookaheadSuccess jj_ls = new LookaheadSuccess();
+ private bool jj_scan_token(int kind)
+ {
+ if (jj_scanpos == jj_lastpos)
+ {
+ jj_la--;
+ if (jj_scanpos.next == null)
+ {
+ jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.GetNextToken();
+ }
+ else
+ {
+ jj_lastpos = jj_scanpos = jj_scanpos.next;
+ }
+ }
+ else
+ {
+ jj_scanpos = jj_scanpos.next;
+ }
+ if (jj_rescan)
+ {
+ int i = 0;
+ Token tok = token;
+ while (tok != null && tok != jj_scanpos)
+ {
+ i++;
+ tok = tok.next;
+ }
+ if (tok != null) Jj_add_error_token(kind, i);
+ }
+ if (jj_scanpos.kind != kind) return true;
+ if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls;
+ return false;
+ }
+
+ /// <summary>Get the next Token. </summary>
+ public Token GetNextToken()
+ {
+ if (token.next != null) token = token.next;
+ else token = token.next = token_source.GetNextToken();
+ jj_ntk = -1;
+ jj_gen++;
+ return token;
+ }
+
+ /// <summary>Get the specific Token. </summary>
+ public Token getToken(int index)
+ {
+ Token t = token;
+ for (int i = 0; i < index; i++)
+ {
+ if (t.next != null) t = t.next;
+ else t = t.next = token_source.GetNextToken();
+ }
+ return t;
+ }
+
+ private int Jj_ntk()
+ {
+ if ((jj_nt = token.next) == null)
+ return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
+ else
+ return (jj_ntk = jj_nt.kind);
+ }
+
+ private List<int[]> jj_expentries = new List<int[]>();
+ private int[] jj_expentry;
+ private int jj_kind = -1;
+ private int[] jj_lasttokens = new int[100];
+ private int jj_endpos;
+
+ private void Jj_add_error_token(int kind, int pos)
+ {
+ if (pos >= 100) return;
+ if (pos == jj_endpos + 1)
+ {
+ jj_lasttokens[jj_endpos++] = kind;
+ }
+ else if (jj_endpos != 0)
+ {
+ jj_expentry = new int[jj_endpos];
+ for (int i = 0; i < jj_endpos; i++)
+ {
+ jj_expentry[i] = jj_lasttokens[i];
+ }
+
+ foreach (var oldentry in jj_expentries)
+ {
+ if (oldentry.Length == jj_expentry.Length)
+ {
+ for (int i = 0; i < jj_expentry.Length; i++)
+ {
+ if (oldentry[i] != jj_expentry[i])
+ {
+ continue;
+ }
+ }
+ jj_expentries.Add(jj_expentry);
+ break;
+ }
+ }
+ if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
+ }
+ }
+
+ /// <summary>Generate ParseException. </summary>
+ public virtual ParseException GenerateParseException()
+ {
+ jj_expentries.Clear();
+ bool[] la1tokens = new bool[34];
+ if (jj_kind >= 0)
+ {
+ la1tokens[jj_kind] = true;
+ jj_kind = -1;
+ }
+ for (int i = 0; i < 23; i++)
+ {
+ if (jj_la1[i] == jj_gen)
+ {
+ for (int j = 0; j < 32; j++)
+ {
+ if ((jj_la1_0[i] & (1 << j)) != 0)
+ {
+ la1tokens[j] = true;
+ }
+ if ((jj_la1_1[i] & (1 << j)) != 0)
+ {
+ la1tokens[32 + j] = true;
+ }
+ }
+ }
+ }
+ for (int i = 0; i < 34; i++)
+ {
+ if (la1tokens[i])
+ {
+ jj_expentry = new int[1];
+ jj_expentry[0] = i;
+ jj_expentries.Add(jj_expentry);
+ }
+ }
+ jj_endpos = 0;
+ Jj_rescan_token();
+ Jj_add_error_token(0, 0);
+ int[][] exptokseq = new int[jj_expentries.Count][];
+ for (int i = 0; i < jj_expentries.Count; i++)
+ {
+ exptokseq[i] = jj_expentries[i];
+ }
+ return new ParseException(token, exptokseq, tokenImage);
+ }
+
+ /// <summary>Enable tracing. </summary>
+ public void Enable_tracing()
+ {
+ }
+
+ /// <summary>Disable tracing. </summary>
+ public void Disable_tracing()
+ {
+ }
+
+ private void Jj_rescan_token()
+ {
+ jj_rescan = true;
+ for (int i = 0; i < 1; i++)
+ {
+ try
+ {
+ JJCalls p = jj_2_rtns[i];
+ do
+ {
+ if (p.gen > jj_gen)
+ {
+ jj_la = p.arg;
+ jj_lastpos = jj_scanpos = p.first;
+ switch (i)
+ {
+ case 0:
+ Jj_3_1();
+ break;
+ }
+ }
+ p = p.next;
+ } while (p != null);
+ }
+ catch (LookaheadSuccess)
+ {
+ }
+ }
+ jj_rescan = false;
+ }
+
+ private void Jj_save(int index, int xla)
+ {
+ JJCalls p = jj_2_rtns[index];
+ while (p.gen > jj_gen)
+ {
+ if (p.next == null)
+ {
+ p = p.next = new JJCalls();
+ break;
+ }
+ p = p.next;
+ }
+ p.gen = jj_gen + xla - jj_la;
+ p.first = token;
+ p.arg = xla;
+ }
+
+ internal sealed class JJCalls
+ {
+ internal int gen;
+ internal Token first;
+ internal int arg;
+ internal JJCalls next;
+ }
+
+ static QueryParser()
+ {
+ {
+ Jj_la1_init_0();
+ Jj_la1_init_1();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/QueryParser/QueryParserConstants.cs b/src/core/QueryParser/QueryParserConstants.cs
new file mode 100644
index 0000000..54e0c1f
--- /dev/null
+++ b/src/core/QueryParser/QueryParserConstants.cs
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Generated By:JavaCC: Do not edit this line. QueryParserConstants.java */
+
+using System;
+
+namespace Lucene.Net.QueryParsers
+{
+
+
+ /// <summary> Token literal values and constants.
+ /// Generated by org.javacc.parser.OtherFilesGen#start()
+ /// </summary>
+ public class QueryParserConstants
+ {
+ /// <summary>End of File. </summary>
+ protected internal const int EndOfFileToken = 0;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int NumCharToken = 1;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int EscapedCharToken = 2;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int TermStartCharToken = 3;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int TermCharToken = 4;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int WhitespaceToken = 5;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int QuotedCharToken = 6;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int AndToken = 8;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int OrToken = 9;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int NotToken = 10;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int PlusToken = 11;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int MinusToken = 12;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int LParanToken = 13;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RParenToken = 14;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int ColonToken = 15;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int StarToken = 16;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int CaratToken = 17;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int QuotedToken = 18;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int TermToken = 19;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int FuzzySlopToken = 20;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int PrefixTermToken = 21;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int WildTermToken = 22;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeInStartToken = 23;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeExStartToken = 24;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int NumberToken = 25;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeInToToken = 26;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeInEndToken = 27;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeInQuotedToken = 28;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeInGoopToken = 29;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeExToToken = 30;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeExEndToken = 31;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeExQuotedToken = 32;
+ /// <summary>RegularExpression Id. </summary>
+ protected internal const int RangeExGoopToken = 33;
+ /// <summary>Lexical state. </summary>
+ protected internal const int BoostToken = 0;
+ /// <summary>Lexical state. </summary>
+ protected const int RangeExToken = 1;
+ /// <summary>Lexical state. </summary>
+ protected internal const int RangeInToken = 2;
+ /// <summary>Lexical state. </summary>
+ protected internal const int DefaultToken = 3;
+ /// <summary>Literal token values. </summary>
+ protected internal static System.String[] tokenImage = new System.String[] {
+ "<EOF>",
+ "<_NUM_CHAR>",
+ "<_ESCAPED_CHAR>",
+ "<_TERM_START_CHAR>",
+ "<_TERM_CHAR>",
+ "<_WHITESPACE>",
+ "<_QUOTED_CHAR>",
+ "<token of kind 7>",
+ "<AND>",
+ "<OR>",
+ "<NOT>",
+ "\"+\"",
+ "\"-\"",
+ "\"(\"",
+ "\")\"",
+ "\":\"",
+ "\"*\"",
+ "\"^\"",
+ "<QUOTED>",
+ "<TERM>",
+ "<FUZZY_SLOP>",
+ "<PREFIXTERM>",
+ "<WILDTERM>",
+ "\"[\"",
+ "\"{\"",
+ "<NUMBER>",
+ "\"TO\"",
+ "\"]\"",
+ "<RANGEIN_QUOTED>",
+ "<RANGEIN_GOOP>",
+ "\"TO\"",
+ "\"}\"",
+ "<RANGEEX_QUOTED>",
+ "<RANGEEX_GOOP>"
+ };
+ }
+} \ No newline at end of file
diff --git a/src/core/QueryParser/QueryParserTokenManager.cs b/src/core/QueryParser/QueryParserTokenManager.cs
new file mode 100644
index 0000000..239d824
--- /dev/null
+++ b/src/core/QueryParser/QueryParserTokenManager.cs
@@ -0,0 +1,1462 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
+
+namespace Lucene.Net.QueryParsers
+{
+
+ /// <summary>Token Manager. </summary>
+ public class QueryParserTokenManager : QueryParserConstants
+ {
+ private void InitBlock()
+ {
+ System.IO.StreamWriter temp_writer;
+ temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
+ temp_writer.AutoFlush = true;
+ debugStream = temp_writer;
+ }
+
+ /// <summary>Debug output. </summary>
+ public System.IO.StreamWriter debugStream;
+ /// <summary>Set debug output. </summary>
+ public virtual void SetDebugStream(System.IO.StreamWriter ds)
+ {
+ debugStream = ds;
+ }
+ private int JjStopStringLiteralDfa_3(int pos, long active0)
+ {
+ switch (pos)
+ {
+
+ default:
+ return - 1;
+
+ }
+ }
+ private int JjStartNfa_3(int pos, long active0)
+ {
+ return JjMoveNfa_3(JjStopStringLiteralDfa_3(pos, active0), pos + 1);
+ }
+ private int JjStopAtPos(int pos, int kind)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = pos;
+ return pos + 1;
+ }
+ private int JjMoveStringLiteralDfa0_3()
+ {
+ switch (curChar)
+ {
+
+ case (char) (40):
+ return JjStopAtPos(0, 13);
+
+ case (char) (41):
+ return JjStopAtPos(0, 14);
+
+ case (char) (42):
+ return JjStartNfaWithStates_3(0, 16, 36);
+
+ case (char) (43):
+ return JjStopAtPos(0, 11);
+
+ case (char) (45):
+ return JjStopAtPos(0, 12);
+
+ case (char) (58):
+ return JjStopAtPos(0, 15);
+
+ case (char) (91):
+ return JjStopAtPos(0, 23);
+
+ case (char) (94):
+ return JjStopAtPos(0, 17);
+
+ case (char) (123):
+ return JjStopAtPos(0, 24);
+
+ default:
+ return JjMoveNfa_3(0, 0);
+
+ }
+ }
+ private int JjStartNfaWithStates_3(int pos, int kind, int state)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = pos;
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ return pos + 1;
+ }
+ return JjMoveNfa_3(state, pos + 1);
+ }
+ internal static readonly ulong[] jjbitVec0 = new ulong[]{0x1L, 0x0L, 0x0L, 0x0L};
+ internal static readonly ulong[] jjbitVec1 = new ulong[]{0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL};
+ internal static readonly ulong[] jjbitVec3 = new ulong[]{0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL};
+ internal static readonly ulong[] jjbitVec4 = new ulong[]{0xfffefffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL};
+ private int JjMoveNfa_3(int startState, int curPos)
+ {
+ int startsAt = 0;
+ jjnewStateCnt = 36;
+ int i = 1;
+ jjstateSet[0] = startState;
+ int kind = 0x7fffffff;
+ for (; ; )
+ {
+ if (++jjround == 0x7fffffff)
+ ReInitRounds();
+ if (curChar < 64)
+ {
+ ulong l = (ulong) (1L << (int) curChar);
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 36:
+ case 25:
+ if ((0xfbfffcf8ffffd9ffL & l) == (ulong) 0L)
+ break;
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 0:
+ if ((0xfbffd4f8ffffd9ffL & l) != (ulong) 0L)
+ {
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ }
+ else if ((0x100002600L & l) != 0L)
+ {
+ if (kind > 7)
+ kind = 7;
+ }
+ else if (curChar == 34)
+ JjCheckNAddStates(0, 2);
+ else if (curChar == 33)
+ {
+ if (kind > 10)
+ kind = 10;
+ }
+ if ((0x7bffd0f8ffffd9ffL & l) != 0L)
+ {
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddStates(3, 7);
+ }
+ else if (curChar == 42)
+ {
+ if (kind > 21)
+ kind = 21;
+ }
+ if (curChar == 38)
+ jjstateSet[jjnewStateCnt++] = 4;
+ break;
+
+ case 4:
+ if (curChar == 38 && kind > 8)
+ kind = 8;
+ break;
+
+ case 5:
+ if (curChar == 38)
+ jjstateSet[jjnewStateCnt++] = 4;
+ break;
+
+ case 13:
+ if (curChar == 33 && kind > 10)
+ kind = 10;
+ break;
+
+ case 14:
+ if (curChar == 34)
+ JjCheckNAddStates(0, 2);
+ break;
+
+ case 15:
+ if ((0xfffffffbffffffffL & l) != (ulong) 0L)
+ JjCheckNAddStates(0, 2);
+ break;
+
+ case 17:
+ JjCheckNAddStates(0, 2);
+ break;
+
+ case 18:
+ if (curChar == 34 && kind > 18)
+ kind = 18;
+ break;
+
+ case 20:
+ if ((0x3ff000000000000L & l) == 0L)
+ break;
+ if (kind > 20)
+ kind = 20;
+ JjAddStates(8, 9);
+ break;
+
+ case 21:
+ if (curChar == 46)
+ JjCheckNAdd(22);
+ break;
+
+ case 22:
+ if ((0x3ff000000000000L & l) == 0L)
+ break;
+ if (kind > 20)
+ kind = 20;
+ JjCheckNAdd(22);
+ break;
+
+ case 23:
+ if (curChar == 42 && kind > 21)
+ kind = 21;
+ break;
+
+ case 24:
+ if ((0xfbffd4f8ffffd9ffL & l) == (ulong) 0L)
+ break;
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 27:
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 28:
+ if ((0x7bffd0f8ffffd9ffL & l) == 0L)
+ break;
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddStates(3, 7);
+ break;
+
+ case 29:
+ if ((0x7bfff8f8ffffd9ffL & l) == 0L)
+ break;
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddTwoStates(29, 30);
+ break;
+
+ case 31:
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddTwoStates(29, 30);
+ break;
+
+ case 32:
+ if ((0x7bfff8f8ffffd9ffL & l) != 0L)
+ JjCheckNAddStates(10, 12);
+ break;
+
+ case 34:
+ JjCheckNAddStates(10, 12);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else if (curChar < 128)
+ {
+ ulong l = (ulong) (1L << (curChar & 63));
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 36:
+ if ((0x97ffffff87ffffffL & l) != (ulong) 0L)
+ {
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ }
+ else if (curChar == 92)
+ JjCheckNAddTwoStates(27, 27);
+ break;
+
+ case 0:
+ if ((0x97ffffff87ffffffL & l) != (ulong) 0L)
+ {
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddStates(3, 7);
+ }
+ else if (curChar == 92)
+ JjCheckNAddStates(13, 15);
+ else if (curChar == 126)
+ {
+ if (kind > 20)
+ kind = 20;
+ jjstateSet[jjnewStateCnt++] = 20;
+ }
+ if ((0x97ffffff87ffffffL & l) != (ulong) 0L)
+ {
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ }
+ if (curChar == 78)
+ jjstateSet[jjnewStateCnt++] = 11;
+ else if (curChar == 124)
+ jjstateSet[jjnewStateCnt++] = 8;
+ else if (curChar == 79)
+ jjstateSet[jjnewStateCnt++] = 6;
+ else if (curChar == 65)
+ jjstateSet[jjnewStateCnt++] = 2;
+ break;
+
+ case 1:
+ if (curChar == 68 && kind > 8)
+ kind = 8;
+ break;
+
+ case 2:
+ if (curChar == 78)
+ jjstateSet[jjnewStateCnt++] = 1;
+ break;
+
+ case 3:
+ if (curChar == 65)
+ jjstateSet[jjnewStateCnt++] = 2;
+ break;
+
+ case 6:
+ if (curChar == 82 && kind > 9)
+ kind = 9;
+ break;
+
+ case 7:
+ if (curChar == 79)
+ jjstateSet[jjnewStateCnt++] = 6;
+ break;
+
+ case 8:
+ if (curChar == 124 && kind > 9)
+ kind = 9;
+ break;
+
+ case 9:
+ if (curChar == 124)
+ jjstateSet[jjnewStateCnt++] = 8;
+ break;
+
+ case 10:
+ if (curChar == 84 && kind > 10)
+ kind = 10;
+ break;
+
+ case 11:
+ if (curChar == 79)
+ jjstateSet[jjnewStateCnt++] = 10;
+ break;
+
+ case 12:
+ if (curChar == 78)
+ jjstateSet[jjnewStateCnt++] = 11;
+ break;
+
+ case 15:
+ if ((0xffffffffefffffffL & l) != (ulong) 0L)
+ JjCheckNAddStates(0, 2);
+ break;
+
+ case 16:
+ if (curChar == 92)
+ jjstateSet[jjnewStateCnt++] = 17;
+ break;
+
+ case 17:
+ JjCheckNAddStates(0, 2);
+ break;
+
+ case 19:
+ if (curChar != 126)
+ break;
+ if (kind > 20)
+ kind = 20;
+ jjstateSet[jjnewStateCnt++] = 20;
+ break;
+
+ case 24:
+ if ((0x97ffffff87ffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 25:
+ if ((0x97ffffff87ffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 26:
+ if (curChar == 92)
+ JjCheckNAddTwoStates(27, 27);
+ break;
+
+ case 27:
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 28:
+ if ((0x97ffffff87ffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddStates(3, 7);
+ break;
+
+ case 29:
+ if ((0x97ffffff87ffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddTwoStates(29, 30);
+ break;
+
+ case 30:
+ if (curChar == 92)
+ JjCheckNAddTwoStates(31, 31);
+ break;
+
+ case 31:
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddTwoStates(29, 30);
+ break;
+
+ case 32:
+ if ((0x97ffffff87ffffffL & l) != (ulong) 0L)
+ JjCheckNAddStates(10, 12);
+ break;
+
+ case 33:
+ if (curChar == 92)
+ JjCheckNAddTwoStates(34, 34);
+ break;
+
+ case 34:
+ JjCheckNAddStates(10, 12);
+ break;
+
+ case 35:
+ if (curChar == 92)
+ JjCheckNAddStates(13, 15);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else
+ {
+ int hiByte = (int) (curChar >> 8);
+ int i1 = hiByte >> 6;
+ ulong l1 = (ulong) (1L << (hiByte & 63));
+ int i2 = (curChar & 0xff) >> 6;
+ ulong l2 = (ulong) (1L << (curChar & 63));
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 36:
+ case 25:
+ if (!JjCanMove_2(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 0:
+ if (JjCanMove_0(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 7)
+ kind = 7;
+ }
+ if (JjCanMove_2(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ }
+ if (JjCanMove_2(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddStates(3, 7);
+ }
+ break;
+
+ case 15:
+ case 17:
+ if (JjCanMove_1(hiByte, i1, i2, l1, l2))
+ JjCheckNAddStates(0, 2);
+ break;
+
+ case 24:
+ if (!JjCanMove_2(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 27:
+ if (!JjCanMove_1(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 22)
+ kind = 22;
+ JjCheckNAddTwoStates(25, 26);
+ break;
+
+ case 28:
+ if (!JjCanMove_2(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddStates(3, 7);
+ break;
+
+ case 29:
+ if (!JjCanMove_2(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddTwoStates(29, 30);
+ break;
+
+ case 31:
+ if (!JjCanMove_1(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 19)
+ kind = 19;
+ JjCheckNAddTwoStates(29, 30);
+ break;
+
+ case 32:
+ if (JjCanMove_2(hiByte, i1, i2, l1, l2))
+ JjCheckNAddStates(10, 12);
+ break;
+
+ case 34:
+ if (JjCanMove_1(hiByte, i1, i2, l1, l2))
+ JjCheckNAddStates(10, 12);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ if (kind != 0x7fffffff)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = curPos;
+ kind = 0x7fffffff;
+ }
+ ++curPos;
+ if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt)))
+ return curPos;
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ return curPos;
+ }
+ }
+ }
+ private int JjStopStringLiteralDfa_1(int pos, long active0)
+ {
+ switch (pos)
+ {
+
+ case 0:
+ if ((active0 & 0x40000000L) != 0L)
+ {
+ jjmatchedKind = 33;
+ return 6;
+ }
+ return - 1;
+
+ default:
+ return - 1;
+
+ }
+ }
+ private int JjStartNfa_1(int pos, long active0)
+ {
+ return JjMoveNfa_1(JjStopStringLiteralDfa_1(pos, active0), pos + 1);
+ }
+ private int JjMoveStringLiteralDfa0_1()
+ {
+ switch (curChar)
+ {
+
+ case (char) (84):
+ return JjMoveStringLiteralDfa1_1(0x40000000L);
+
+ case (char) (125):
+ return JjStopAtPos(0, 31);
+
+ default:
+ return JjMoveNfa_1(0, 0);
+
+ }
+ }
+ private int JjMoveStringLiteralDfa1_1(long active0)
+ {
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ JjStopStringLiteralDfa_1(0, active0);
+ return 1;
+ }
+ switch (curChar)
+ {
+
+ case (char) (79):
+ if ((active0 & 0x40000000L) != 0L)
+ return JjStartNfaWithStates_1(1, 30, 6);
+ break;
+
+ default:
+ break;
+
+ }
+ return JjStartNfa_1(0, active0);
+ }
+ private int JjStartNfaWithStates_1(int pos, int kind, int state)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = pos;
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ return pos + 1;
+ }
+ return JjMoveNfa_1(state, pos + 1);
+ }
+ private int JjMoveNfa_1(int startState, int curPos)
+ {
+ int startsAt = 0;
+ jjnewStateCnt = 7;
+ int i = 1;
+ jjstateSet[0] = startState;
+ int kind = 0x7fffffff;
+ for (; ; )
+ {
+ if (++jjround == 0x7fffffff)
+ ReInitRounds();
+ if (curChar < 64)
+ {
+ ulong l = (ulong) (1L << (int) curChar);
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 0:
+ if ((0xfffffffeffffffffL & l) != (ulong) 0L)
+ {
+ if (kind > 33)
+ kind = 33;
+ JjCheckNAdd(6);
+ }
+ if ((0x100002600L & l) != 0L)
+ {
+ if (kind > 7)
+ kind = 7;
+ }
+ else if (curChar == 34)
+ JjCheckNAddTwoStates(2, 4);
+ break;
+
+ case 1:
+ if (curChar == 34)
+ JjCheckNAddTwoStates(2, 4);
+ break;
+
+ case 2:
+ if ((0xfffffffbffffffffL & l) != (ulong) 0L)
+ JjCheckNAddStates(16, 18);
+ break;
+
+ case 3:
+ if (curChar == 34)
+ JjCheckNAddStates(16, 18);
+ break;
+
+ case 5:
+ if (curChar == 34 && kind > 32)
+ kind = 32;
+ break;
+
+ case 6:
+ if ((0xfffffffeffffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 33)
+ kind = 33;
+ JjCheckNAdd(6);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else if (curChar < 128)
+ {
+ ulong l = (ulong) (1L << (curChar & 63));
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 0:
+ case 6:
+ if ((0xdfffffffffffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 33)
+ kind = 33;
+ JjCheckNAdd(6);
+ break;
+
+ case 2:
+ JjAddStates(16, 18);
+ break;
+
+ case 4:
+ if (curChar == 92)
+ jjstateSet[jjnewStateCnt++] = 3;
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else
+ {
+ int hiByte = (int) (curChar >> 8);
+ int i1 = hiByte >> 6;
+ ulong l1 = (ulong) (1L << (hiByte & 63));
+ int i2 = (curChar & 0xff) >> 6;
+ ulong l2 = (ulong) (1L << (curChar & 63));
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 0:
+ if (JjCanMove_0(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 7)
+ kind = 7;
+ }
+ if (JjCanMove_1(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 33)
+ kind = 33;
+ JjCheckNAdd(6);
+ }
+ break;
+
+ case 2:
+ if (JjCanMove_1(hiByte, i1, i2, l1, l2))
+ JjAddStates(16, 18);
+ break;
+
+ case 6:
+ if (!JjCanMove_1(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 33)
+ kind = 33;
+ JjCheckNAdd(6);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ if (kind != 0x7fffffff)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = curPos;
+ kind = 0x7fffffff;
+ }
+ ++curPos;
+ if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt)))
+ return curPos;
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ return curPos;
+ }
+ }
+ }
+ private int JjMoveStringLiteralDfa0_0()
+ {
+ return JjMoveNfa_0(0, 0);
+ }
+ private int JjMoveNfa_0(int startState, int curPos)
+ {
+ int startsAt = 0;
+ jjnewStateCnt = 3;
+ int i = 1;
+ jjstateSet[0] = startState;
+ int kind = 0x7fffffff;
+ for (; ; )
+ {
+ if (++jjround == 0x7fffffff)
+ ReInitRounds();
+ if (curChar < 64)
+ {
+ ulong l = (ulong) (1L << (int) curChar);
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 0:
+ if ((0x3ff000000000000L & l) == 0L)
+ break;
+ if (kind > 25)
+ kind = 25;
+ JjAddStates(19, 20);
+ break;
+
+ case 1:
+ if (curChar == 46)
+ JjCheckNAdd(2);
+ break;
+
+ case 2:
+ if ((0x3ff000000000000L & l) == 0L)
+ break;
+ if (kind > 25)
+ kind = 25;
+ JjCheckNAdd(2);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else if (curChar < 128)
+ {
+ ulong l = (ulong) (1L << (curChar & 63));
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else
+ {
+ int hiByte = (int) (curChar >> 8);
+ int i1 = hiByte >> 6;
+ long l1 = 1L << (hiByte & 63);
+ int i2 = (curChar & 0xff) >> 6;
+ long l2 = 1L << (curChar & 63);
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ if (kind != 0x7fffffff)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = curPos;
+ kind = 0x7fffffff;
+ }
+ ++curPos;
+ if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt)))
+ return curPos;
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ return curPos;
+ }
+ }
+ }
+ private int JjStopStringLiteralDfa_2(int pos, long active0)
+ {
+ switch (pos)
+ {
+
+ case 0:
+ if ((active0 & 0x4000000L) != 0L)
+ {
+ jjmatchedKind = 29;
+ return 6;
+ }
+ return - 1;
+
+ default:
+ return - 1;
+
+ }
+ }
+ private int JjStartNfa_2(int pos, long active0)
+ {
+ return JjMoveNfa_2(JjStopStringLiteralDfa_2(pos, active0), pos + 1);
+ }
+ private int JjMoveStringLiteralDfa0_2()
+ {
+ switch (curChar)
+ {
+
+ case (char) (84):
+ return JjMoveStringLiteralDfa1_2(0x4000000L);
+
+ case (char) (93):
+ return JjStopAtPos(0, 27);
+
+ default:
+ return JjMoveNfa_2(0, 0);
+
+ }
+ }
+ private int JjMoveStringLiteralDfa1_2(long active0)
+ {
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ JjStopStringLiteralDfa_2(0, active0);
+ return 1;
+ }
+ switch (curChar)
+ {
+
+ case (char) (79):
+ if ((active0 & 0x4000000L) != 0L)
+ return JjStartNfaWithStates_2(1, 26, 6);
+ break;
+
+ default:
+ break;
+
+ }
+ return JjStartNfa_2(0, active0);
+ }
+ private int JjStartNfaWithStates_2(int pos, int kind, int state)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = pos;
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ return pos + 1;
+ }
+ return JjMoveNfa_2(state, pos + 1);
+ }
+ private int JjMoveNfa_2(int startState, int curPos)
+ {
+ int startsAt = 0;
+ jjnewStateCnt = 7;
+ int i = 1;
+ jjstateSet[0] = startState;
+ int kind = 0x7fffffff;
+ for (; ; )
+ {
+ if (++jjround == 0x7fffffff)
+ ReInitRounds();
+ if (curChar < 64)
+ {
+ ulong l = (ulong) (1L << (int) curChar);
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 0:
+ if ((0xfffffffeffffffffL & l) != (ulong) 0L)
+ {
+ if (kind > 29)
+ kind = 29;
+ JjCheckNAdd(6);
+ }
+ if ((0x100002600L & l) != 0L)
+ {
+ if (kind > 7)
+ kind = 7;
+ }
+ else if (curChar == 34)
+ JjCheckNAddTwoStates(2, 4);
+ break;
+
+ case 1:
+ if (curChar == 34)
+ JjCheckNAddTwoStates(2, 4);
+ break;
+
+ case 2:
+ if ((0xfffffffbffffffffL & l) != (ulong) 0L)
+ JjCheckNAddStates(16, 18);
+ break;
+
+ case 3:
+ if (curChar == 34)
+ JjCheckNAddStates(16, 18);
+ break;
+
+ case 5:
+ if (curChar == 34 && kind > 28)
+ kind = 28;
+ break;
+
+ case 6:
+ if ((0xfffffffeffffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 29)
+ kind = 29;
+ JjCheckNAdd(6);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else if (curChar < 128)
+ {
+ ulong l = (ulong) (1L << (curChar & 63));
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 0:
+ case 6:
+ if ((0xffffffffdfffffffL & l) == (ulong) 0L)
+ break;
+ if (kind > 29)
+ kind = 29;
+ JjCheckNAdd(6);
+ break;
+
+ case 2:
+ JjAddStates(16, 18);
+ break;
+
+ case 4:
+ if (curChar == 92)
+ jjstateSet[jjnewStateCnt++] = 3;
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ else
+ {
+ int hiByte = (int) (curChar >> 8);
+ int i1 = hiByte >> 6;
+ ulong l1 = (ulong) (1L << (hiByte & 63));
+ int i2 = (curChar & 0xff) >> 6;
+ ulong l2 = (ulong) (1L << (curChar & 63));
+ do
+ {
+ switch (jjstateSet[--i])
+ {
+
+ case 0:
+ if (JjCanMove_0(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 7)
+ kind = 7;
+ }
+ if (JjCanMove_1(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 29)
+ kind = 29;
+ JjCheckNAdd(6);
+ }
+ break;
+
+ case 2:
+ if (JjCanMove_1(hiByte, i1, i2, l1, l2))
+ JjAddStates(16, 18);
+ break;
+
+ case 6:
+ if (!JjCanMove_1(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 29)
+ kind = 29;
+ JjCheckNAdd(6);
+ break;
+
+ default: break;
+
+ }
+ }
+ while (i != startsAt);
+ }
+ if (kind != 0x7fffffff)
+ {
+ jjmatchedKind = kind;
+ jjmatchedPos = curPos;
+ kind = 0x7fffffff;
+ }
+ ++curPos;
+ if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt)))
+ return curPos;
+ try
+ {
+ curChar = input_stream.ReadChar();
+ }
+ catch (System.IO.IOException)
+ {
+ return curPos;
+ }
+ }
+ }
+ internal static readonly int[] jjnextStates = new int[]{15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, 2, 4, 5, 0, 1};
+ private static bool JjCanMove_0(int hiByte, int i1, int i2, ulong l1, ulong l2)
+ {
+ switch (hiByte)
+ {
+
+ case 48:
+ return ((jjbitVec0[i2] & l2) != (ulong) 0L);
+
+ default:
+ return false;
+
+ }
+ }
+ private static bool JjCanMove_1(int hiByte, int i1, int i2, ulong l1, ulong l2)
+ {
+ switch (hiByte)
+ {
+
+ case 0:
+ return ((jjbitVec3[i2] & l2) != (ulong) 0L);
+
+ default:
+ if ((jjbitVec1[i1] & l1) != (ulong) 0L)
+ return true;
+ return false;
+
+ }
+ }
+ private static bool JjCanMove_2(int hiByte, int i1, int i2, ulong l1, ulong l2)
+ {
+ switch (hiByte)
+ {
+
+ case 0:
+ return ((jjbitVec3[i2] & l2) != (ulong) 0L);
+
+ case 48:
+ return ((jjbitVec1[i2] & l2) != (ulong) 0L);
+
+ default:
+ if ((jjbitVec4[i1] & l1) != (ulong) 0L)
+ return true;
+ return false;
+
+ }
+ }
+
+ /// <summary>Token literal values. </summary>
+ public static readonly System.String[] jjstrLiteralImages = new System.String[]{"", null, null, null, null, null, null, null, null, null, null, "\x002B", "\x002D", "\x0028", "\x0029", "\x003A", "\x002A", "\x005E", null, null, null, null, null, "\x005B", "\x007B", null, "\x0054\x004F", "\x005D", null, null, "\x0054\x004F", "\x007D", null, null};
+
+ /// <summary>Lexer state names. </summary>
+ public static readonly System.String[] lexStateNames = new System.String[]{"Boost", "RangeEx", "RangeIn", "DEFAULT"};
+
+ /// <summary>Lex State array. </summary>
+ public static readonly int[] jjnewLexState = new int[]{- 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, 0, - 1, - 1, - 1, - 1, - 1, 2, 1, 3, - 1, 3, - 1, - 1, - 1, 3, - 1, - 1};
+ internal static readonly ulong[] jjtoToken = new ulong[]{0x3ffffff01L};
+ internal static readonly long[] jjtoSkip = new long[]{0x80L};
+ protected internal ICharStream input_stream;
+ private uint[] jjrounds = new uint[36];
+ private int[] jjstateSet = new int[72];
+ protected internal char curChar;
+ /// <summary>Constructor. </summary>
+ public QueryParserTokenManager(ICharStream stream)
+ {
+ InitBlock();
+ input_stream = stream;
+ }
+
+ /// <summary>Constructor. </summary>
+ public QueryParserTokenManager(ICharStream stream, int lexState):this(stream)
+ {
+ SwitchTo(lexState);
+ }
+
+ /// <summary>Reinitialise parser. </summary>
+ public virtual void ReInit(ICharStream stream)
+ {
+ jjmatchedPos = jjnewStateCnt = 0;
+ curLexState = defaultLexState;
+ input_stream = stream;
+ ReInitRounds();
+ }
+ private void ReInitRounds()
+ {
+ int i;
+ jjround = 0x80000001;
+ for (i = 36; i-- > 0; )
+ jjrounds[i] = 0x80000000;
+ }
+
+ /// <summary>Reinitialise parser. </summary>
+ public virtual void ReInit(ICharStream stream, int lexState)
+ {
+ ReInit(stream);
+ SwitchTo(lexState);
+ }
+
+ /// <summary>Switch to specified lex state. </summary>
+ public virtual void SwitchTo(int lexState)
+ {
+ if (lexState >= 4 || lexState < 0)
+ throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE);
+ else
+ curLexState = lexState;
+ }
+
+ protected internal virtual Token JjFillToken()
+ {
+ Token t;
+ System.String curTokenImage;
+ int beginLine;
+ int endLine;
+ int beginColumn;
+ int endColumn;
+ System.String im = jjstrLiteralImages[jjmatchedKind];
+ curTokenImage = (im == null)?input_stream.Image:im;
+ beginLine = input_stream.BeginLine;
+ beginColumn = input_stream.BeginColumn;
+ endLine = input_stream.EndLine;
+ endColumn = input_stream.EndColumn;
+ t = Token.NewToken(jjmatchedKind, curTokenImage);
+
+ t.beginLine = beginLine;
+ t.endLine = endLine;
+ t.beginColumn = beginColumn;
+ t.endColumn = endColumn;
+
+ return t;
+ }
+
+ internal int curLexState = 3;
+ internal int defaultLexState = 3;
+ internal int jjnewStateCnt;
+ internal uint jjround;
+ internal int jjmatchedPos;
+ internal int jjmatchedKind;
+
+ /// <summary>Get the next Token. </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual Token GetNextToken()
+ {
+ Token matchedToken;
+ int curPos = 0;
+
+ for (; ; )
+ {
+ try
+ {
+ curChar = input_stream.BeginToken();
+ }
+ catch (System.IO.IOException)
+ {
+ jjmatchedKind = 0;
+ matchedToken = JjFillToken();
+ return matchedToken;
+ }
+
+ switch (curLexState)
+ {
+
+ case 0:
+ jjmatchedKind = 0x7fffffff;
+ jjmatchedPos = 0;
+ curPos = JjMoveStringLiteralDfa0_0();
+ break;
+
+ case 1:
+ jjmatchedKind = 0x7fffffff;
+ jjmatchedPos = 0;
+ curPos = JjMoveStringLiteralDfa0_1();
+ break;
+
+ case 2:
+ jjmatchedKind = 0x7fffffff;
+ jjmatchedPos = 0;
+ curPos = JjMoveStringLiteralDfa0_2();
+ break;
+
+ case 3:
+ jjmatchedKind = 0x7fffffff;
+ jjmatchedPos = 0;
+ curPos = JjMoveStringLiteralDfa0_3();
+ break;
+ }
+ if (jjmatchedKind != 0x7fffffff)
+ {
+ if (jjmatchedPos + 1 < curPos)
+ input_stream.Backup(curPos - jjmatchedPos - 1);
+ if ((jjtoToken[jjmatchedKind >> 6] & ((ulong) 1L << (jjmatchedKind & 63))) != (ulong) 0L)
+ {
+ matchedToken = JjFillToken();
+ if (jjnewLexState[jjmatchedKind] != - 1)
+ curLexState = jjnewLexState[jjmatchedKind];
+ return matchedToken;
+ }
+ else
+ {
+ if (jjnewLexState[jjmatchedKind] != - 1)
+ curLexState = jjnewLexState[jjmatchedKind];
+ goto EOFLoop;
+ }
+ }
+ int error_line = input_stream.EndLine;
+ int error_column = input_stream.EndColumn;
+ System.String error_after = null;
+ bool EOFSeen = false;
+ try
+ {
+ input_stream.ReadChar(); input_stream.Backup(1);
+ }
+ catch (System.IO.IOException)
+ {
+ EOFSeen = true;
+ error_after = curPos <= 1?"":input_stream.Image;
+ if (curChar == '\n' || curChar == '\r')
+ {
+ error_line++;
+ error_column = 0;
+ }
+ else
+ error_column++;
+ }
+ if (!EOFSeen)
+ {
+ input_stream.Backup(1);
+ error_after = curPos <= 1?"":input_stream.Image;
+ }
+ throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR);
+
+EOFLoop: ;
+ }
+ }
+
+ private void JjCheckNAdd(int state)
+ {
+ if (jjrounds[state] != jjround)
+ {
+ jjstateSet[jjnewStateCnt++] = state;
+ jjrounds[state] = jjround;
+ }
+ }
+ private void JjAddStates(int start, int end)
+ {
+ do
+ {
+ jjstateSet[jjnewStateCnt++] = jjnextStates[start];
+ }
+ while (start++ != end);
+ }
+ private void JjCheckNAddTwoStates(int state1, int state2)
+ {
+ JjCheckNAdd(state1);
+ JjCheckNAdd(state2);
+ }
+
+ private void JjCheckNAddStates(int start, int end)
+ {
+ do
+ {
+ JjCheckNAdd(jjnextStates[start]);
+ }
+ while (start++ != end);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/QueryParser/Token.cs b/src/core/QueryParser/Token.cs
new file mode 100644
index 0000000..e3c51f8
--- /dev/null
+++ b/src/core/QueryParser/Token.cs
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
+/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
+
+using System;
+
+namespace Lucene.Net.QueryParsers
+{
+
+ /// <summary> Describes the input token stream.</summary>
+
+ public class Token
+ {
+
+ /// <summary> An integer that describes the kind of this token. This numbering
+ /// system is determined by JavaCCParser, and a table of these numbers is
+ /// stored in the file ...Constants.java.
+ /// </summary>
+ public int kind;
+
+ /// <summary>The line number of the first character of this Token. </summary>
+ public int beginLine;
+ /// <summary>The column number of the first character of this Token. </summary>
+ public int beginColumn;
+ /// <summary>The line number of the last character of this Token. </summary>
+ public int endLine;
+ /// <summary>The column number of the last character of this Token. </summary>
+ public int endColumn;
+
+ /// <summary> The string image of the token.</summary>
+ public System.String image;
+
+ /// <summary> A reference to the next regular (non-special) token from the input
+ /// stream. If this is the last token from the input stream, or if the
+ /// token manager has not read tokens beyond this one, this field is
+ /// set to null. This is true only if this token is also a regular
+ /// token. Otherwise, see below for a description of the contents of
+ /// this field.
+ /// </summary>
+ public Token next;
+
+ /// <summary> This field is used to access special tokens that occur prior to this
+ /// token, but after the immediately preceding regular (non-special) token.
+ /// If there are no such special tokens, this field is set to null.
+ /// When there are more than one such special token, this field refers
+ /// to the last of these special tokens, which in turn refers to the next
+ /// previous special token through its specialToken field, and so on
+ /// until the first special token (whose specialToken field is null).
+ /// The next fields of special tokens refer to other special tokens that
+ /// immediately follow it (without an intervening regular token). If there
+ /// is no such token, this field is null.
+ /// </summary>
+ public Token specialToken;
+
+ /// <summary> An optional attribute value of the Token.
+ /// Tokens which are not used as syntactic sugar will often contain
+ /// meaningful values that will be used later on by the compiler or
+ /// interpreter. This attribute value is often different from the image.
+ /// Any subclass of Token that actually wants to return a non-null value can
+ /// override this method as appropriate.
+ /// </summary>
+ public virtual object Value
+ {
+ get { return null; }
+ }
+
+ /// <summary> No-argument constructor</summary>
+ public Token()
+ {
+ }
+
+ /// <summary> Constructs a new token for the specified Image.</summary>
+ public Token(int kind):this(kind, null)
+ {
+ }
+
+ /// <summary> Constructs a new token for the specified Image and Kind.</summary>
+ public Token(int kind, System.String image)
+ {
+ this.kind = kind;
+ this.image = image;
+ }
+
+ /// <summary> Returns the image.</summary>
+ public override System.String ToString()
+ {
+ return image;
+ }
+
+ /// <summary> Returns a new Token object, by default. However, if you want, you
+ /// can create and return subclass objects based on the value of ofKind.
+ /// Simply add the cases to the switch for all those special cases.
+ /// For example, if you have a subclass of Token called IDToken that
+ /// you want to create if ofKind is ID, simply add something like :
+ ///
+ /// case MyParserConstants.ID : return new IDToken(ofKind, image);
+ ///
+ /// to the following switch statement. Then you can cast matchedToken
+ /// variable to the appropriate type and use sit in your lexical actions.
+ /// </summary>
+ public static Token NewToken(int ofKind, System.String image)
+ {
+ switch (ofKind)
+ {
+
+ default: return new Token(ofKind, image);
+
+ }
+ }
+
+ public static Token NewToken(int ofKind)
+ {
+ return NewToken(ofKind, null);
+ }
+ }
+ /* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
+} \ No newline at end of file
diff --git a/src/core/QueryParser/TokenMgrError.cs b/src/core/QueryParser/TokenMgrError.cs
new file mode 100644
index 0000000..d69f88c
--- /dev/null
+++ b/src/core/QueryParser/TokenMgrError.cs
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
+/* JavaCCOptions: */
+
+using System;
+
+namespace Lucene.Net.QueryParsers
+{
+
+ /// <summary>Token Manager Error. </summary>
+ [Serializable]
+ public class TokenMgrError:System.ApplicationException
+ {
+ /// <summary> You can also modify the body of this method to customize your error messages.
+ /// For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
+ /// of end-users concern, so you can return something like :
+ ///
+ /// "Internal Error : Please file a bug report .... "
+ ///
+ /// from this method for such cases in the release version of your parser.
+ /// </summary>
+ public override System.String Message
+ {
+ get
+ {
+ return base.Message;
+ }
+
+ }
+
+ /*
+ * Ordinals for various reasons why an Error of this type can be thrown.
+ */
+
+ /// <summary> Lexical error occurred.</summary>
+ internal const int LEXICAL_ERROR = 0;
+
+ /// <summary> An attempt was made to create a second instance of a static token manager.</summary>
+ internal const int STATIC_LEXER_ERROR = 1;
+
+ /// <summary> Tried to change to an invalid lexical state.</summary>
+ internal const int INVALID_LEXICAL_STATE = 2;
+
+ /// <summary> Detected (and bailed out of) an infinite loop in the token manager.</summary>
+ internal const int LOOP_DETECTED = 3;
+
+ /// <summary> Indicates the reason why the exception is thrown. It will have
+ /// one of the above 4 values.
+ /// </summary>
+ internal int errorCode;
+
+ /// <summary> Replaces unprintable characters by their escaped (or unicode escaped)
+ /// equivalents in the given string
+ /// </summary>
+ protected internal static System.String addEscapes(System.String str)
+ {
+ System.Text.StringBuilder retval = new System.Text.StringBuilder();
+ char ch;
+ for (int i = 0; i < str.Length; i++)
+ {
+ switch (str[i])
+ {
+
+ case (char) (0):
+ continue;
+
+ case '\b':
+ retval.Append("\\b");
+ continue;
+
+ case '\t':
+ retval.Append("\\t");
+ continue;
+
+ case '\n':
+ retval.Append("\\n");
+ continue;
+
+ case '\f':
+ retval.Append("\\f");
+ continue;
+
+ case '\r':
+ retval.Append("\\r");
+ continue;
+
+ case '\"':
+ retval.Append("\\\"");
+ continue;
+
+ case '\'':
+ retval.Append("\\\'");
+ continue;
+
+ case '\\':
+ retval.Append("\\\\");
+ continue;
+
+ default:
+ if ((ch = str[i]) < 0x20 || ch > 0x7e)
+ {
+ System.String s = "0000" + System.Convert.ToString(ch, 16);
+ retval.Append("\\u" + s.Substring(s.Length - 4, (s.Length) - (s.Length - 4)));
+ }
+ else
+ {
+ retval.Append(ch);
+ }
+ continue;
+
+ }
+ }
+ return retval.ToString();
+ }
+
+ /// <summary> Returns a detailed message for the Error when it is thrown by the
+ /// token manager to indicate a lexical error.
+ /// Parameters :
+ /// EOFSeen : indicates if EOF caused the lexical error
+ /// curLexState : lexical state in which this error occurred
+ /// errorLine : line number when the error occurred
+ /// errorColumn : column number when the error occurred
+ /// errorAfter : prefix that was seen before this error occurred
+ /// curchar : the offending character
+ /// Note: You can customize the lexical error message by modifying this method.
+ /// </summary>
+ protected internal static System.String LexicalError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar)
+ {
+ return ("Lexical error at line " + errorLine + ", column " + errorColumn + ". Encountered: " + (EOFSeen?"<EOF> ":("\"" + addEscapes(System.Convert.ToString(curChar)) + "\"") + " (" + (int) curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\"");
+ }
+
+ /*
+ * Constructors of various flavors follow.
+ */
+
+ /// <summary>No arg constructor. </summary>
+ public TokenMgrError()
+ {
+ }
+
+ /// <summary>Constructor with message and reason. </summary>
+ public TokenMgrError(System.String message, int reason):base(message)
+ {
+ errorCode = reason;
+ }
+
+ /// <summary>Full Constructor. </summary>
+ public TokenMgrError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar, int reason):this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason)
+ {
+ }
+ }
+ /* JavaCC - OriginalChecksum=1c94e13236c7e0121e49427992341ee3 (do not edit this line) */
+} \ No newline at end of file
diff --git a/src/core/Search/BooleanClause.cs b/src/core/Search/BooleanClause.cs
new file mode 100644
index 0000000..ac37b80
--- /dev/null
+++ b/src/core/Search/BooleanClause.cs
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>A clause in a BooleanQuery. </summary>
+ [Serializable]
+ public class BooleanClause
+ {
+ private Occur occur;
+
+ /// <summary>Constructs a BooleanClause.</summary>
+ public BooleanClause(Query query, Occur occur)
+ {
+ this._query = query;
+ this.occur = occur;
+ }
+
+ public virtual Occur Occur
+ {
+ get { return occur; }
+ set { this.occur = value; }
+ }
+
+ private Query _query;
+
+ /// <summary>The query whose matching documents are combined by the boolean query.</summary>
+ public virtual Query Query
+ {
+ get { return _query; }
+ set { this._query = value; }
+ }
+
+ public virtual bool IsProhibited
+ {
+ get { return Occur.MUST_NOT.Equals(occur); }
+ }
+
+ public virtual bool IsRequired
+ {
+ get { return Occur.MUST.Equals(occur); }
+ }
+
+
+ /// <summary>Returns true if <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (o == null || !(o is BooleanClause))
+ return false;
+ BooleanClause other = (BooleanClause) o;
+ return this.Query.Equals(other.Query) && this.occur.Equals(other.occur);
+ }
+
+ /// <summary>Returns a hash code value for this object.</summary>
+ public override int GetHashCode()
+ {
+ return Query.GetHashCode() ^ (Occur.MUST.Equals(occur)?1:0) ^ (Occur.MUST_NOT.Equals(occur)?2:0);
+ }
+
+
+ public override System.String ToString()
+ {
+ return OccurExtensions.ToString(occur) + Query;
+ }
+ }
+
+ public enum Occur
+ {
+ MUST,
+ SHOULD,
+ MUST_NOT
+ }
+
+ public static class OccurExtensions
+ {
+ public static System.String ToString(this Occur occur)
+ {
+ if (occur == Occur.MUST)
+ return "+";
+ if (occur == Occur.MUST_NOT)
+ return "-";
+ return "";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/BooleanQuery.cs b/src/core/Search/BooleanQuery.cs
new file mode 100644
index 0000000..22b6371
--- /dev/null
+++ b/src/core/Search/BooleanQuery.cs
@@ -0,0 +1,599 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Occur = Lucene.Net.Search.Occur;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>A Query that matches documents matching boolean combinations of other
+ /// queries, e.g. <see cref="TermQuery" />s, <see cref="PhraseQuery" />s or other
+ /// BooleanQuerys.
+ /// </summary>
+ [Serializable]
+ public class BooleanQuery : Query, System.Collections.Generic.IEnumerable<BooleanClause>, System.ICloneable
+ {
+ [Serializable]
+ private class AnonymousClassSimilarityDelegator:SimilarityDelegator
+ {
+ private void InitBlock(BooleanQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private BooleanQuery enclosingInstance;
+ public BooleanQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassSimilarityDelegator(BooleanQuery enclosingInstance, Lucene.Net.Search.Similarity Param1):base(Param1)
+ {
+ InitBlock(enclosingInstance);
+ }
+ public override float Coord(int overlap, int maxOverlap)
+ {
+ return 1.0f;
+ }
+ }
+
+ private static int _maxClauses = 1024;
+
+ /// <summary>Thrown when an attempt is made to add more than <see cref="MaxClauseCount" />
+ /// clauses. This typically happens if
+ /// a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery
+ /// is expanded to many terms during search.
+ /// </summary>
+ [Serializable]
+ public class TooManyClauses:System.SystemException
+ {
+ public override System.String Message
+ {
+ get
+ {
+ return "maxClauseCount is set to " + Lucene.Net.Search.BooleanQuery._maxClauses;
+ }
+
+ }
+ }
+
+ /// <summary>Gets or sets the maximum number of clauses permitted, 1024 by default.
+ /// Attempts to add more than the permitted number of clauses cause <see cref="TooManyClauses" />
+ /// to be thrown.
+ /// </summary>
+ public static int MaxClauseCount
+ {
+ get { return _maxClauses; }
+ set
+ {
+ if (value < 1)
+ throw new ArgumentException("maxClauseCount must be >= 1");
+ _maxClauses = value;
+ }
+ }
+
+ private EquatableList<BooleanClause> clauses = new EquatableList<BooleanClause>();
+ private bool disableCoord;
+
+ /// <summary>Constructs an empty boolean query. </summary>
+ public BooleanQuery()
+ {
+ }
+
+ /// <summary>Constructs an empty boolean query.
+ ///
+ /// <see cref="Similarity.Coord(int,int)" /> may be disabled in scoring, as
+ /// appropriate. For example, this score factor does not make sense for most
+ /// automatically generated queries, like <see cref="WildcardQuery" /> and <see cref="FuzzyQuery" />
+ ///.
+ ///
+ /// </summary>
+ /// <param name="disableCoord">disables <see cref="Similarity.Coord(int,int)" /> in scoring.
+ /// </param>
+ public BooleanQuery(bool disableCoord)
+ {
+ this.disableCoord = disableCoord;
+ }
+
+ /// <summary>Returns true iff <see cref="Similarity.Coord(int,int)" /> is disabled in
+ /// scoring for this query instance.
+ /// </summary>
+ /// <seealso cref="BooleanQuery(bool)">
+ /// </seealso>
+ public virtual bool IsCoordDisabled()
+ {
+ return disableCoord;
+ }
+
+ // Implement coord disabling.
+ // Inherit javadoc.
+ public override Similarity GetSimilarity(Searcher searcher)
+ {
+ Similarity result = base.GetSimilarity(searcher);
+ if (disableCoord)
+ {
+ // disable coord as requested
+ result = new AnonymousClassSimilarityDelegator(this, result);
+ }
+ return result;
+ }
+
+ protected internal int minNrShouldMatch = 0;
+
+ /// <summary>
+ /// Specifies a minimum number of the optional BooleanClauses
+ /// which must be satisfied.
+ /// <para>
+ /// By default no optional clauses are necessary for a match
+ /// (unless there are no required clauses). If this method is used,
+ /// then the specified number of clauses is required.
+ /// </para>
+ /// <para>
+ /// Use of this method is totally independent of specifying that
+ /// any specific clauses are required (or prohibited). This number will
+ /// only be compared against the number of matching optional clauses.
+ /// </para>
+ /// </summary>
+ public virtual int MinimumNumberShouldMatch
+ {
+ set { this.minNrShouldMatch = value; }
+ get { return minNrShouldMatch; }
+ }
+
+ /// <summary>Adds a clause to a boolean query.
+ ///
+ /// </summary>
+ /// <throws> TooManyClauses if the new number of clauses exceeds the maximum clause number </throws>
+ /// <seealso cref="MaxClauseCount">
+ /// </seealso>
+ public virtual void Add(Query query, Occur occur)
+ {
+ Add(new BooleanClause(query, occur));
+ }
+
+ /// <summary>Adds a clause to a boolean query.</summary>
+ /// <throws> TooManyClauses if the new number of clauses exceeds the maximum clause number </throws>
+ /// <seealso cref="MaxClauseCount">
+ /// </seealso>
+ public virtual void Add(BooleanClause clause)
+ {
+ if (clauses.Count >= _maxClauses)
+ throw new TooManyClauses();
+
+ clauses.Add(clause);
+ }
+
+ /// <summary>Returns the set of clauses in this query. </summary>
+ public virtual BooleanClause[] GetClauses()
+ {
+ return clauses.ToArray();
+ }
+
+ /// <summary>Returns the list of clauses in this query. </summary>
+ public virtual System.Collections.Generic.List<BooleanClause> Clauses
+ {
+ get { return clauses; }
+ }
+
+ /// <summary>
+ /// Returns an iterator on the clauses in this query.
+ /// </summary>
+ /// <returns></returns>
+ public System.Collections.Generic.IEnumerator<BooleanClause> GetEnumerator()
+ {
+ return clauses.GetEnumerator();
+ }
+ /// <summary> Expert: the Weight for BooleanQuery, used to
+ /// normalize, score and explain these queries.
+ ///
+ /// <p/>NOTE: this API and implementation is subject to
+ /// change suddenly in the next release.<p/>
+ /// </summary>
+ [Serializable]
+ protected internal class BooleanWeight:Weight
+ {
+ private void InitBlock(BooleanQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private BooleanQuery enclosingInstance;
+ public BooleanQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /// <summary>The Similarity implementation. </summary>
+ protected internal Similarity similarity;
+ protected internal System.Collections.Generic.List<Weight> weights;
+
+ public BooleanWeight(BooleanQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+ weights = new System.Collections.Generic.List<Weight>(Enclosing_Instance.clauses.Count);
+ for (int i = 0; i < Enclosing_Instance.clauses.Count; i++)
+ {
+ weights.Add(Enclosing_Instance.clauses[i].Query.CreateWeight(searcher));
+ }
+ }
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ public override float Value
+ {
+ get { return Enclosing_Instance.Boost; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ float sum = 0.0f;
+ for (int i = 0; i < weights.Count; i++)
+ {
+ // call sumOfSquaredWeights for all clauses in case of side effects
+ float s = weights[i].GetSumOfSquaredWeights(); // sum sub weights
+ if (!Enclosing_Instance.clauses[i].IsProhibited)
+ // only add to sum for non-prohibited clauses
+ sum += s;
+ }
+
+ sum *= Enclosing_Instance.Boost*Enclosing_Instance.Boost; // boost each sub-weight
+
+ return sum;
+ }
+
+
+ public override void Normalize(float norm)
+ {
+ norm *= Enclosing_Instance.Boost; // incorporate boost
+ foreach (Weight w in weights)
+ {
+ // normalize all clauses, (even if prohibited in case of side affects)
+ w.Normalize(norm);
+ }
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+ int minShouldMatch = Enclosing_Instance.MinimumNumberShouldMatch;
+ ComplexExplanation sumExpl = new ComplexExplanation();
+ sumExpl.Description = "sum of:";
+ int coord = 0;
+ int maxCoord = 0;
+ float sum = 0.0f;
+ bool fail = false;
+ int shouldMatchCount = 0;
+ System.Collections.Generic.IEnumerator<BooleanClause> cIter = Enclosing_Instance.clauses.GetEnumerator();
+ for (System.Collections.Generic.IEnumerator<Weight> wIter = weights.GetEnumerator(); wIter.MoveNext(); )
+ {
+ cIter.MoveNext();
+ Weight w = wIter.Current;
+ BooleanClause c = cIter.Current;
+ if (w.Scorer(reader, true, true) == null)
+ {
+ continue;
+ }
+ Explanation e = w.Explain(reader, doc);
+ if (!c.IsProhibited)
+ maxCoord++;
+ if (e.IsMatch)
+ {
+ if (!c.IsProhibited)
+ {
+ sumExpl.AddDetail(e);
+ sum += e.Value;
+ coord++;
+ }
+ else
+ {
+ Explanation r = new Explanation(0.0f, "match on prohibited clause (" + c.Query.ToString() + ")");
+ r.AddDetail(e);
+ sumExpl.AddDetail(r);
+ fail = true;
+ }
+ if (c.Occur == Occur.SHOULD)
+ shouldMatchCount++;
+ }
+ else if (c.IsRequired)
+ {
+ Explanation r = new Explanation(0.0f, "no match on required clause (" + c.Query.ToString() + ")");
+ r.AddDetail(e);
+ sumExpl.AddDetail(r);
+ fail = true;
+ }
+ }
+ if (fail)
+ {
+ System.Boolean tempAux = false;
+ sumExpl.Match = tempAux;
+ sumExpl.Value = 0.0f;
+ sumExpl.Description = "Failure to meet condition(s) of required/prohibited clause(s)";
+ return sumExpl;
+ }
+ else if (shouldMatchCount < minShouldMatch)
+ {
+ System.Boolean tempAux2 = false;
+ sumExpl.Match = tempAux2;
+ sumExpl.Value = 0.0f;
+ sumExpl.Description = "Failure to match minimum number " + "of optional clauses: " + minShouldMatch;
+ return sumExpl;
+ }
+
+ sumExpl.Match = 0 < coord?true:false;
+ sumExpl.Value = sum;
+
+ float coordFactor = similarity.Coord(coord, maxCoord);
+ if (coordFactor == 1.0f)
+ // coord is no-op
+ return sumExpl;
+ // eliminate wrapper
+ else
+ {
+ ComplexExplanation result = new ComplexExplanation(sumExpl.IsMatch, sum * coordFactor, "product of:");
+ result.AddDetail(sumExpl);
+ result.AddDetail(new Explanation(coordFactor, "coord(" + coord + "/" + maxCoord + ")"));
+ return result;
+ }
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ var required = new System.Collections.Generic.List<Scorer>();
+ var prohibited = new System.Collections.Generic.List<Scorer>();
+ var optional = new System.Collections.Generic.List<Scorer>();
+
+ System.Collections.Generic.IEnumerator<BooleanClause> cIter = Enclosing_Instance.clauses.GetEnumerator();
+ foreach (Weight w in weights)
+ {
+ cIter.MoveNext();
+ BooleanClause c = (BooleanClause) cIter.Current;
+ Scorer subScorer = w.Scorer(reader, true, false);
+ if (subScorer == null)
+ {
+ if (c.IsRequired)
+ {
+ return null;
+ }
+ }
+ else if (c.IsRequired)
+ {
+ required.Add(subScorer);
+ }
+ else if (c.IsProhibited)
+ {
+ prohibited.Add(subScorer);
+ }
+ else
+ {
+ optional.Add(subScorer);
+ }
+ }
+
+ // Check if we can return a BooleanScorer
+ if (!scoreDocsInOrder && topScorer && required.Count == 0 && prohibited.Count < 32)
+ {
+ return new BooleanScorer(similarity, Enclosing_Instance.minNrShouldMatch, optional, prohibited);
+ }
+
+ if (required.Count == 0 && optional.Count == 0)
+ {
+ // no required and optional clauses.
+ return null;
+ }
+ else if (optional.Count < Enclosing_Instance.minNrShouldMatch)
+ {
+ // either >1 req scorer, or there are 0 req scorers and at least 1
+ // optional scorer. Therefore if there are not enough optional scorers
+ // no documents will be matched by the query
+ return null;
+ }
+
+ // Return a BooleanScorer2
+ return new BooleanScorer2(similarity, Enclosing_Instance.minNrShouldMatch, required, prohibited, optional);
+ }
+
+ public override bool GetScoresDocsOutOfOrder()
+ {
+ int numProhibited = 0;
+ foreach (BooleanClause c in Enclosing_Instance.clauses)
+ {
+ if (c.IsRequired)
+ {
+ return false; // BS2 (in-order) will be used by scorer()
+ }
+ else if (c.IsProhibited)
+ {
+ ++numProhibited;
+ }
+ }
+
+ if (numProhibited > 32)
+ {
+ // cannot use BS
+ return false;
+ }
+
+ // scorer() will return an out-of-order scorer if requested.
+ return true;
+ }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new BooleanWeight(this, searcher);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ if (minNrShouldMatch == 0 && clauses.Count == 1)
+ {
+ // optimize 1-clause queries
+ BooleanClause c = clauses[0];
+ if (!c.IsProhibited)
+ {
+ // just return clause
+
+ Query query = c.Query.Rewrite(reader); // rewrite first
+
+ if (Boost != 1.0f)
+ {
+ // incorporate boost
+ if (query == c.Query)
+ // if rewrite was no-op
+ query = (Query) query.Clone(); // then clone before boost
+ query.Boost = Boost * query.Boost;
+ }
+
+ return query;
+ }
+ }
+
+ BooleanQuery clone = null; // recursively rewrite
+ for (int i = 0; i < clauses.Count; i++)
+ {
+ BooleanClause c = clauses[i];
+ Query query = c.Query.Rewrite(reader);
+ if (query != c.Query)
+ {
+ // clause rewrote: must clone
+ if (clone == null)
+ clone = (BooleanQuery) this.Clone();
+ clone.clauses[i] = new BooleanClause(query, c.Occur);
+ }
+ }
+ if (clone != null)
+ {
+ return clone; // some clauses rewrote
+ }
+ else
+ return this; // no clauses rewrote
+ }
+
+ // inherit javadoc
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ foreach(BooleanClause clause in clauses)
+ {
+ clause.Query.ExtractTerms(terms);
+ }
+ }
+
+ public override System.Object Clone()
+ {
+ BooleanQuery clone = (BooleanQuery) base.Clone();
+ clone.clauses = (EquatableList<BooleanClause>) this.clauses.Clone();
+ return clone;
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ bool needParens = (Boost != 1.0) || (MinimumNumberShouldMatch > 0);
+ if (needParens)
+ {
+ buffer.Append("(");
+ }
+
+ for (int i = 0; i < clauses.Count; i++)
+ {
+ BooleanClause c = clauses[i];
+ if (c.IsProhibited)
+ buffer.Append("-");
+ else if (c.IsRequired)
+ buffer.Append("+");
+
+ Query subQuery = c.Query;
+ if (subQuery != null)
+ {
+ if (subQuery is BooleanQuery)
+ {
+ // wrap sub-bools in parens
+ buffer.Append("(");
+ buffer.Append(subQuery.ToString(field));
+ buffer.Append(")");
+ }
+ else
+ {
+ buffer.Append(subQuery.ToString(field));
+ }
+ }
+ else
+ {
+ buffer.Append("null");
+ }
+
+ if (i != clauses.Count - 1)
+ buffer.Append(" ");
+ }
+
+ if (needParens)
+ {
+ buffer.Append(")");
+ }
+
+ if (MinimumNumberShouldMatch > 0)
+ {
+ buffer.Append('~');
+ buffer.Append(MinimumNumberShouldMatch);
+ }
+
+ if (Boost != 1.0f)
+ {
+ buffer.Append(ToStringUtils.Boost(Boost));
+ }
+
+ return buffer.ToString();
+ }
+
+ /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is BooleanQuery))
+ return false;
+ BooleanQuery other = (BooleanQuery)o;
+ return (this.Boost == other.Boost)
+ && this.clauses.Equals(other.clauses)
+ && this.MinimumNumberShouldMatch == other.MinimumNumberShouldMatch
+ && this.disableCoord == other.disableCoord;
+ }
+
+ /// <summary>Returns a hash code value for this object.</summary>
+ public override int GetHashCode()
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ clauses.GetHashCode() + MinimumNumberShouldMatch + (disableCoord ? 17 : 0);
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/BooleanScorer.cs b/src/core/Search/BooleanScorer.cs
new file mode 100644
index 0000000..1a4be8d
--- /dev/null
+++ b/src/core/Search/BooleanScorer.cs
@@ -0,0 +1,405 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /* Description from Doug Cutting (excerpted from
+ * LUCENE-1483):
+ *
+ * BooleanScorer uses a ~16k array to score windows of
+ * docs. So it scores docs 0-16k first, then docs 16-32k,
+ * etc. For each window it iterates through all query terms
+ * and accumulates a score in table[doc%16k]. It also stores
+ * in the table a bitmask representing which terms
+ * contributed to the score. Non-zero scores are chained in
+ * a linked list. At the end of scoring each window it then
+ * iterates through the linked list and, if the bitmask
+ * matches the boolean constraints, collects a hit. For
+ * boolean queries with lots of frequent terms this can be
+ * much faster, since it does not need to update a priority
+ * queue for each posting, instead performing constant-time
+ * operations per posting. The only downside is that it
+ * results in hits being delivered out-of-order within the
+ * window, which means it cannot be nested within other
+ * scorers. But it works well as a top-level scorer.
+ *
+ * The new BooleanScorer2 implementation instead works by
+ * merging priority queues of postings, albeit with some
+ * clever tricks. For example, a pure conjunction (all terms
+ * required) does not require a priority queue. Instead it
+ * sorts the posting streams at the start, then repeatedly
+ * skips the first to to the last. If the first ever equals
+ * the last, then there's a hit. When some terms are
+ * required and some terms are optional, the conjunction can
+ * be evaluated first, then the optional terms can all skip
+ * to the match and be added to the score. Thus the
+ * conjunction can reduce the number of priority queue
+ * updates for the optional terms. */
+
+ public sealed class BooleanScorer:Scorer
+ {
+ private void InitBlock()
+ {
+ bucketTable = new BucketTable();
+ }
+
+ private sealed class BooleanScorerCollector:Collector
+ {
+ private BucketTable bucketTable;
+ private int mask;
+ private Scorer scorer;
+
+ public BooleanScorerCollector(int mask, BucketTable bucketTable)
+ {
+ this.mask = mask;
+ this.bucketTable = bucketTable;
+ }
+ public override void Collect(int doc)
+ {
+ BucketTable table = bucketTable;
+ int i = doc & Lucene.Net.Search.BooleanScorer.BucketTable.MASK;
+ Bucket bucket = table.buckets[i];
+ if (bucket == null)
+ table.buckets[i] = bucket = new Bucket();
+
+ if (bucket.doc != doc)
+ {
+ // invalid bucket
+ bucket.doc = doc; // set doc
+ bucket.score = scorer.Score(); // initialize score
+ bucket.bits = mask; // initialize mask
+ bucket.coord = 1; // initialize coord
+
+ bucket.next = table.first; // push onto valid list
+ table.first = bucket;
+ }
+ else
+ {
+ // valid bucket
+ bucket.score += scorer.Score(); // increment score
+ bucket.bits |= mask; // add bits in mask
+ bucket.coord++; // increment coord
+ }
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ // not needed by this implementation
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ this.scorer = scorer;
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ // An internal class which is used in score(Collector, int) for setting the
+ // current score. This is required since Collector exposes a setScorer method
+ // and implementations that need the score will call scorer.score().
+ // Therefore the only methods that are implemented are score() and doc().
+ private sealed class BucketScorer:Scorer
+ {
+
+ internal float score;
+ internal int doc = NO_MORE_DOCS;
+
+ public BucketScorer():base(null)
+ {
+ }
+
+ public override int Advance(int target)
+ {
+ return NO_MORE_DOCS;
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ return NO_MORE_DOCS;
+ }
+
+ public override float Score()
+ {
+ return score;
+ }
+ }
+
+ internal sealed class Bucket
+ {
+ internal int doc = - 1; // tells if bucket is valid
+ internal float score; // incremental score
+ internal int bits; // used for bool constraints
+ internal int coord; // count of terms in score
+ internal Bucket next; // next valid bucket
+ }
+
+ /// <summary>A simple hash table of document scores within a range. </summary>
+ internal sealed class BucketTable
+ {
+ private void InitBlock()
+ {
+ buckets = new Bucket[SIZE];
+ }
+ public const int SIZE = 1 << 11;
+ public static readonly int MASK;
+
+ internal Bucket[] buckets;
+ internal Bucket first = null; // head of valid list
+
+ public BucketTable()
+ {
+ InitBlock();
+ }
+
+ public Collector NewCollector(int mask)
+ {
+ return new BooleanScorerCollector(mask, this);
+ }
+
+ public int Size()
+ {
+ return SIZE;
+ }
+ static BucketTable()
+ {
+ MASK = SIZE - 1;
+ }
+ }
+
+ internal sealed class SubScorer
+ {
+ public Scorer scorer;
+ public bool required = false;
+ public bool prohibited = false;
+ public Collector collector;
+ public SubScorer next;
+
+ public SubScorer(Scorer scorer, bool required, bool prohibited, Collector collector, SubScorer next)
+ {
+ this.scorer = scorer;
+ this.required = required;
+ this.prohibited = prohibited;
+ this.collector = collector;
+ this.next = next;
+ }
+ }
+
+ private SubScorer scorers = null;
+ private BucketTable bucketTable;
+ private int maxCoord = 1;
+ private float[] coordFactors;
+ private int requiredMask = 0;
+ private int prohibitedMask = 0;
+ private int nextMask = 1;
+ private int minNrShouldMatch;
+ private int end;
+ private Bucket current;
+ private int doc = - 1;
+
+ public /*internal*/ BooleanScorer(Similarity similarity, int minNrShouldMatch,
+ System.Collections.Generic.List<Scorer> optionalScorers, System.Collections.Generic.List<Scorer> prohibitedScorers)
+ : base(similarity)
+ {
+ InitBlock();
+ this.minNrShouldMatch = minNrShouldMatch;
+
+ if (optionalScorers != null && optionalScorers.Count > 0)
+ {
+ foreach (Scorer scorer in optionalScorers)
+ {
+ maxCoord++;
+ if (scorer.NextDoc() != NO_MORE_DOCS)
+ {
+ scorers = new SubScorer(scorer, false, false, bucketTable.NewCollector(0), scorers);
+ }
+ }
+ }
+
+ if (prohibitedScorers != null && prohibitedScorers.Count > 0)
+ {
+ foreach(Scorer scorer in prohibitedScorers)
+ {
+ int mask = nextMask;
+ nextMask = nextMask << 1;
+ prohibitedMask |= mask; // update prohibited mask
+ if (scorer.NextDoc() != NO_MORE_DOCS)
+ {
+ scorers = new SubScorer(scorer, false, true, bucketTable.NewCollector(mask), scorers);
+ }
+ }
+ }
+
+ coordFactors = new float[maxCoord];
+ Similarity sim = Similarity;
+ for (int i = 0; i < maxCoord; i++)
+ {
+ coordFactors[i] = sim.Coord(i, maxCoord - 1);
+ }
+ }
+
+ // firstDocID is ignored since nextDoc() initializes 'current'
+ public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID)
+ {
+ bool more;
+ Bucket tmp;
+ BucketScorer bs = new BucketScorer();
+ // The internal loop will set the score and doc before calling collect.
+ collector.SetScorer(bs);
+ do
+ {
+ bucketTable.first = null;
+
+ while (current != null)
+ {
+ // more queued
+
+ // check prohibited & required
+ if ((current.bits & prohibitedMask) == 0 && (current.bits & requiredMask) == requiredMask)
+ {
+
+ if (current.doc >= max)
+ {
+ tmp = current;
+ current = current.next;
+ tmp.next = bucketTable.first;
+ bucketTable.first = tmp;
+ continue;
+ }
+
+ if (current.coord >= minNrShouldMatch)
+ {
+ bs.score = current.score * coordFactors[current.coord];
+ bs.doc = current.doc;
+ collector.Collect(current.doc);
+ }
+ }
+
+ current = current.next; // pop the queue
+ }
+
+ if (bucketTable.first != null)
+ {
+ current = bucketTable.first;
+ bucketTable.first = current.next;
+ return true;
+ }
+
+ // refill the queue
+ more = false;
+ end += BucketTable.SIZE;
+ for (SubScorer sub = scorers; sub != null; sub = sub.next)
+ {
+ int subScorerDocID = sub.scorer.DocID();
+ if (subScorerDocID != NO_MORE_DOCS)
+ {
+ more |= sub.scorer.Score(sub.collector, end, subScorerDocID);
+ }
+ }
+ current = bucketTable.first;
+ }
+ while (current != null || more);
+
+ return false;
+ }
+
+ public override int Advance(int target)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ bool more;
+ do
+ {
+ while (bucketTable.first != null)
+ {
+ // more queued
+ current = bucketTable.first;
+ bucketTable.first = current.next; // pop the queue
+
+ // check prohibited & required, and minNrShouldMatch
+ if ((current.bits & prohibitedMask) == 0 && (current.bits & requiredMask) == requiredMask && current.coord >= minNrShouldMatch)
+ {
+ return doc = current.doc;
+ }
+ }
+
+ // refill the queue
+ more = false;
+ end += BucketTable.SIZE;
+ for (SubScorer sub = scorers; sub != null; sub = sub.next)
+ {
+ Scorer scorer = sub.scorer;
+ sub.collector.SetScorer(scorer);
+ int doc = scorer.DocID();
+ while (doc < end)
+ {
+ sub.collector.Collect(doc);
+ doc = scorer.NextDoc();
+ }
+ more |= (doc != NO_MORE_DOCS);
+ }
+ }
+ while (bucketTable.first != null || more);
+
+ return this.doc = NO_MORE_DOCS;
+ }
+
+ public override float Score()
+ {
+ return current.score * coordFactors[current.coord];
+ }
+
+ public override void Score(Collector collector)
+ {
+ Score(collector, System.Int32.MaxValue, NextDoc());
+ }
+
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("boolean(");
+ for (SubScorer sub = scorers; sub != null; sub = sub.next)
+ {
+ buffer.Append(sub.scorer.ToString());
+ buffer.Append(" ");
+ }
+ buffer.Append(")");
+ return buffer.ToString();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/BooleanScorer2.cs b/src/core/Search/BooleanScorer2.cs
new file mode 100644
index 0000000..3c8c611
--- /dev/null
+++ b/src/core/Search/BooleanScorer2.cs
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /* See the description in BooleanScorer.java, comparing
+ * BooleanScorer & BooleanScorer2 */
+
+ /// <summary>An alternative to BooleanScorer that also allows a minimum number
+ /// of optional scorers that should match.
+ /// <br/>Implements skipTo(), and has no limitations on the numbers of added scorers.
+ /// <br/>Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer.
+ /// </summary>
+ class BooleanScorer2 : Scorer
+ {
+ private class AnonymousClassDisjunctionSumScorer:DisjunctionSumScorer
+ {
+ private void InitBlock(BooleanScorer2 enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private BooleanScorer2 enclosingInstance;
+ public BooleanScorer2 Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassDisjunctionSumScorer(BooleanScorer2 enclosingInstance, System.Collections.Generic.IList<Scorer> scorers, int minNrShouldMatch)
+ : base(scorers, minNrShouldMatch)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private int lastScoredDoc = - 1;
+ // Save the score of lastScoredDoc, so that we don't compute it more than
+ // once in score().
+ private float lastDocScore = System.Single.NaN;
+ public override float Score()
+ {
+ int doc = DocID();
+ if (doc >= lastScoredDoc)
+ {
+ if (doc > lastScoredDoc)
+ {
+ lastDocScore = base.Score();
+ lastScoredDoc = doc;
+ }
+ Enclosing_Instance.coordinator.nrMatchers += base.nrMatchers;
+ }
+ return lastDocScore;
+ }
+ }
+ private class AnonymousClassConjunctionScorer:ConjunctionScorer
+ {
+ private void InitBlock(int requiredNrMatchers, BooleanScorer2 enclosingInstance)
+ {
+ this.requiredNrMatchers = requiredNrMatchers;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private int requiredNrMatchers;
+ private BooleanScorer2 enclosingInstance;
+ public BooleanScorer2 Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassConjunctionScorer(int requiredNrMatchers, BooleanScorer2 enclosingInstance, Lucene.Net.Search.Similarity defaultSimilarity, System.Collections.Generic.IList<Scorer> requiredScorers)
+ : base(defaultSimilarity, requiredScorers)
+ {
+ InitBlock(requiredNrMatchers, enclosingInstance);
+ }
+ private int lastScoredDoc = - 1;
+ // Save the score of lastScoredDoc, so that we don't compute it more than
+ // once in score().
+ private float lastDocScore = System.Single.NaN;
+ public override float Score()
+ {
+ int doc = DocID();
+ if (doc >= lastScoredDoc)
+ {
+ if (doc > lastScoredDoc)
+ {
+ lastDocScore = base.Score();
+ lastScoredDoc = doc;
+ }
+ Enclosing_Instance.coordinator.nrMatchers += requiredNrMatchers;
+ }
+ // All scorers match, so defaultSimilarity super.score() always has 1 as
+ // the coordination factor.
+ // Therefore the sum of the scores of the requiredScorers
+ // is used as score.
+ return lastDocScore;
+ }
+ }
+
+ private System.Collections.Generic.List<Scorer> requiredScorers;
+ private System.Collections.Generic.List<Scorer> optionalScorers;
+ private System.Collections.Generic.List<Scorer> prohibitedScorers;
+
+ private class Coordinator
+ {
+ public Coordinator(BooleanScorer2 enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(BooleanScorer2 enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private BooleanScorer2 enclosingInstance;
+ public BooleanScorer2 Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal float[] coordFactors = null;
+ internal int maxCoord = 0; // to be increased for each non prohibited scorer
+ internal int nrMatchers; // to be increased by score() of match counting scorers.
+
+ internal virtual void Init()
+ {
+ // use after all scorers have been added.
+ coordFactors = new float[maxCoord + 1];
+ Similarity sim = Enclosing_Instance.Similarity;
+ for (int i = 0; i <= maxCoord; i++)
+ {
+ coordFactors[i] = sim.Coord(i, maxCoord);
+ }
+ }
+ }
+
+ private Coordinator coordinator;
+
+ /// <summary>The scorer to which all scoring will be delegated,
+ /// except for computing and using the coordination factor.
+ /// </summary>
+ private Scorer countingSumScorer;
+
+ /// <summary>The number of optionalScorers that need to match (if there are any) </summary>
+ private int minNrShouldMatch;
+
+ private int doc = - 1;
+
+ /// <summary> Creates a <see cref="Scorer" /> with the given similarity and lists of required,
+ /// prohibited and optional scorers. In no required scorers are added, at least
+ /// one of the optional scorers will have to match during the search.
+ ///
+ /// </summary>
+ /// <param name="similarity">The similarity to be used.
+ /// </param>
+ /// <param name="minNrShouldMatch">The minimum number of optional added scorers that should match
+ /// during the search. In case no required scorers are added, at least
+ /// one of the optional scorers will have to match during the search.
+ /// </param>
+ /// <param name="required">the list of required scorers.
+ /// </param>
+ /// <param name="prohibited">the list of prohibited scorers.
+ /// </param>
+ /// <param name="optional">the list of optional scorers.
+ /// </param>
+ public BooleanScorer2(Similarity similarity, int minNrShouldMatch,
+ System.Collections.Generic.List<Scorer> required,
+ System.Collections.Generic.List<Scorer> prohibited,
+ System.Collections.Generic.List<Scorer> optional)
+ : base(similarity)
+ {
+ if (minNrShouldMatch < 0)
+ {
+ throw new System.ArgumentException("Minimum number of optional scorers should not be negative");
+ }
+ coordinator = new Coordinator(this);
+ this.minNrShouldMatch = minNrShouldMatch;
+
+ optionalScorers = optional;
+ coordinator.maxCoord += optional.Count;
+
+ requiredScorers = required;
+ coordinator.maxCoord += required.Count;
+
+ prohibitedScorers = prohibited;
+
+ coordinator.Init();
+ countingSumScorer = MakeCountingSumScorer();
+ }
+
+ /// <summary>Count a scorer as a single match. </summary>
+ private class SingleMatchScorer:Scorer
+ {
+ private void InitBlock(BooleanScorer2 enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private BooleanScorer2 enclosingInstance;
+ public BooleanScorer2 Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Scorer scorer;
+ private int lastScoredDoc = - 1;
+ // Save the score of lastScoredDoc, so that we don't compute it more than
+ // once in score().
+ private float lastDocScore = System.Single.NaN;
+
+ internal SingleMatchScorer(BooleanScorer2 enclosingInstance, Scorer scorer):base(scorer.Similarity)
+ {
+ InitBlock(enclosingInstance);
+ this.scorer = scorer;
+ }
+ public override float Score()
+ {
+ int doc = DocID();
+ if (doc >= lastScoredDoc)
+ {
+ if (doc > lastScoredDoc)
+ {
+ lastDocScore = scorer.Score();
+ lastScoredDoc = doc;
+ }
+ Enclosing_Instance.coordinator.nrMatchers++;
+ }
+ return lastDocScore;
+ }
+
+ public override int DocID()
+ {
+ return scorer.DocID();
+ }
+
+ public override int NextDoc()
+ {
+ return scorer.NextDoc();
+ }
+
+ public override int Advance(int target)
+ {
+ return scorer.Advance(target);
+ }
+ }
+
+ private Scorer CountingDisjunctionSumScorer(System.Collections.Generic.List<Scorer> scorers, int minNrShouldMatch)
+ {
+ // each scorer from the list counted as a single matcher
+ return new AnonymousClassDisjunctionSumScorer(this, scorers, minNrShouldMatch);
+ }
+
+ private static readonly Similarity defaultSimilarity;
+
+ private Scorer CountingConjunctionSumScorer(System.Collections.Generic.List<Scorer> requiredScorers)
+ {
+ // each scorer from the list counted as a single matcher
+ int requiredNrMatchers = requiredScorers.Count;
+ return new AnonymousClassConjunctionScorer(requiredNrMatchers, this, defaultSimilarity, requiredScorers);
+ }
+
+ private Scorer DualConjunctionSumScorer(Scorer req1, Scorer req2)
+ {
+ // non counting.
+ return new ConjunctionScorer(defaultSimilarity, new Scorer[]{req1, req2});
+ // All scorers match, so defaultSimilarity always has 1 as
+ // the coordination factor.
+ // Therefore the sum of the scores of two scorers
+ // is used as score.
+ }
+
+ /// <summary>Returns the scorer to be used for match counting and score summing.
+ /// Uses requiredScorers, optionalScorers and prohibitedScorers.
+ /// </summary>
+ private Scorer MakeCountingSumScorer()
+ {
+ // each scorer counted as a single matcher
+ return (requiredScorers.Count == 0)?MakeCountingSumScorerNoReq():MakeCountingSumScorerSomeReq();
+ }
+
+ private Scorer MakeCountingSumScorerNoReq()
+ {
+ // No required scorers
+ // minNrShouldMatch optional scorers are required, but at least 1
+ int nrOptRequired = (minNrShouldMatch < 1)?1:minNrShouldMatch;
+ Scorer requiredCountingSumScorer;
+ if (optionalScorers.Count > nrOptRequired)
+ requiredCountingSumScorer = CountingDisjunctionSumScorer(optionalScorers, nrOptRequired);
+ else if (optionalScorers.Count == 1)
+ requiredCountingSumScorer = new SingleMatchScorer(this, optionalScorers[0]);
+ else
+ requiredCountingSumScorer = CountingConjunctionSumScorer(optionalScorers);
+ return AddProhibitedScorers(requiredCountingSumScorer);
+ }
+
+ private Scorer MakeCountingSumScorerSomeReq()
+ {
+ // At least one required scorer.
+ if (optionalScorers.Count == minNrShouldMatch)
+ {
+ // all optional scorers also required.
+ var allReq = new System.Collections.Generic.List<Scorer>(requiredScorers);
+ allReq.AddRange(optionalScorers);
+ return AddProhibitedScorers(CountingConjunctionSumScorer(allReq));
+ }
+ else
+ {
+ // optionalScorers.size() > minNrShouldMatch, and at least one required scorer
+ Scorer requiredCountingSumScorer =
+ requiredScorers.Count == 1
+ ? new SingleMatchScorer(this, requiredScorers[0])
+ : CountingConjunctionSumScorer(requiredScorers);
+ if (minNrShouldMatch > 0)
+ {
+ // use a required disjunction scorer over the optional scorers
+ return AddProhibitedScorers(DualConjunctionSumScorer(requiredCountingSumScorer, CountingDisjunctionSumScorer(optionalScorers, minNrShouldMatch)));
+ }
+ else
+ {
+ // minNrShouldMatch == 0
+ return new ReqOptSumScorer(AddProhibitedScorers(requiredCountingSumScorer),
+ optionalScorers.Count == 1
+ ? new SingleMatchScorer(this, optionalScorers[0])
+ : CountingDisjunctionSumScorer(optionalScorers, 1));
+ }
+ }
+ }
+
+ /// <summary>Returns the scorer to be used for match counting and score summing.
+ /// Uses the given required scorer and the prohibitedScorers.
+ /// </summary>
+ /// <param name="requiredCountingSumScorer">A required scorer already built.
+ /// </param>
+ private Scorer AddProhibitedScorers(Scorer requiredCountingSumScorer)
+ {
+ return (prohibitedScorers.Count == 0)
+ ? requiredCountingSumScorer
+ : new ReqExclScorer(requiredCountingSumScorer,
+ ((prohibitedScorers.Count == 1)
+ ? prohibitedScorers[0]
+ : new DisjunctionSumScorer(prohibitedScorers)));
+ }
+
+ /// <summary>Scores and collects all matching documents.</summary>
+ /// <param name="collector">The collector to which all matching documents are passed through.
+ /// </param>
+ public override void Score(Collector collector)
+ {
+ collector.SetScorer(this);
+ while ((doc = countingSumScorer.NextDoc()) != NO_MORE_DOCS)
+ {
+ collector.Collect(doc);
+ }
+ }
+
+ public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID)
+ {
+ doc = firstDocID;
+ collector.SetScorer(this);
+ while (doc < max)
+ {
+ collector.Collect(doc);
+ doc = countingSumScorer.NextDoc();
+ }
+ return doc != NO_MORE_DOCS;
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ return doc = countingSumScorer.NextDoc();
+ }
+
+ public override float Score()
+ {
+ coordinator.nrMatchers = 0;
+ float sum = countingSumScorer.Score();
+ return sum * coordinator.coordFactors[coordinator.nrMatchers];
+ }
+
+ public override int Advance(int target)
+ {
+ return doc = countingSumScorer.Advance(target);
+ }
+
+ static BooleanScorer2()
+ {
+ defaultSimilarity = Search.Similarity.Default;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/CachingSpanFilter.cs b/src/core/Search/CachingSpanFilter.cs
new file mode 100644
index 0000000..89a6203
--- /dev/null
+++ b/src/core/Search/CachingSpanFilter.cs
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using System.Runtime.InteropServices;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Wraps another SpanFilter's result and caches it. The purpose is to allow
+ /// filters to simply filter, and then wrap with this class to add caching.
+ /// </summary>
+ [Serializable]
+ public class CachingSpanFilter:SpanFilter
+ {
+ private SpanFilter filter;
+
+ /// <summary> A transient Filter cache (internal because of test)</summary>
+ [NonSerialized]
+ internal CachingWrapperFilter.FilterCache<SpanFilterResult> cache;
+
+ /// <summary>
+ /// New deletions always result in a cache miss, by default
+ /// (<see cref="CachingWrapperFilter.DeletesMode.RECACHE" />.
+ /// <param name="filter">Filter to cache results of
+ /// </param>
+ /// </summary>
+ public CachingSpanFilter(SpanFilter filter): this(filter, CachingWrapperFilter.DeletesMode.RECACHE)
+ {
+
+ }
+
+ /// <summary>New deletions always result in a cache miss, specify the <paramref name="deletesMode"/></summary>
+ /// <param name="filter">Filter to cache results of</param>
+ /// <param name="deletesMode">See <see cref="CachingWrapperFilter.DeletesMode" /></param>
+ public CachingSpanFilter(SpanFilter filter, CachingWrapperFilter.DeletesMode deletesMode)
+ {
+ this.filter = filter;
+ if (deletesMode == CachingWrapperFilter.DeletesMode.DYNAMIC)
+ {
+ throw new System.ArgumentException("DeletesMode.DYNAMIC is not supported");
+ }
+ this.cache = new AnonymousFilterCache(deletesMode);
+ }
+
+ class AnonymousFilterCache : CachingWrapperFilter.FilterCache<SpanFilterResult>
+ {
+ public AnonymousFilterCache(CachingWrapperFilter.DeletesMode deletesMode) : base(deletesMode)
+ {
+ }
+
+ protected override SpanFilterResult MergeDeletes(IndexReader reader, SpanFilterResult docIdSet)
+ {
+ throw new System.ArgumentException("DeletesMode.DYNAMIC is not supported");
+ }
+ }
+
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ SpanFilterResult result = GetCachedResult(reader);
+ return result != null?result.DocIdSet:null;
+ }
+
+ // for testing
+ public int hitCount, missCount;
+
+ private SpanFilterResult GetCachedResult(IndexReader reader)
+ {
+ object coreKey = reader.FieldCacheKey;
+ object delCoreKey = reader.HasDeletions ? reader.DeletesCacheKey : coreKey;
+
+ SpanFilterResult result = cache.Get(reader, coreKey, delCoreKey);
+ if (result != null) {
+ hitCount++;
+ return result;
+ }
+
+ missCount++;
+ result = filter.BitSpans(reader);
+
+ cache.Put(coreKey, delCoreKey, result);
+ return result;
+ }
+
+
+ public override SpanFilterResult BitSpans(IndexReader reader)
+ {
+ return GetCachedResult(reader);
+ }
+
+ public override System.String ToString()
+ {
+ return "CachingSpanFilter(" + filter + ")";
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is CachingSpanFilter))
+ return false;
+ return this.filter.Equals(((CachingSpanFilter) o).filter);
+ }
+
+ public override int GetHashCode()
+ {
+ return filter.GetHashCode() ^ 0x1117BF25;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/CachingWrapperFilter.cs b/src/core/Search/CachingWrapperFilter.cs
new file mode 100644
index 0000000..4e8023a
--- /dev/null
+++ b/src/core/Search/CachingWrapperFilter.cs
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using OpenBitSetDISI = Lucene.Net.Util.OpenBitSetDISI;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Wraps another filter's result and caches it. The purpose is to allow
+ /// filters to simply filter, and then wrap with this class to add caching.
+ /// </summary>
+ [Serializable]
+ public class CachingWrapperFilter:Filter
+ {
+ protected internal Filter filter;
+
+ ///
+ /// Expert: Specifies how new deletions against a reopened
+ /// reader should be handled.
+ ///
+ /// <para>The default is IGNORE, which means the cache entry
+ /// will be re-used for a given segment, even when that
+ /// segment has been reopened due to changes in deletions.
+ /// This is a big performance gain, especially with
+ /// near-real-timer readers, since you don't hit a cache
+ /// miss on every reopened reader for prior segments.</para>
+ ///
+ /// <para>However, in some cases this can cause invalid query
+ /// results, allowing deleted documents to be returned.
+ /// This only happens if the main query does not rule out
+ /// deleted documents on its own, such as a toplevel
+ /// ConstantScoreQuery. To fix this, use RECACHE to
+ /// re-create the cached filter (at a higher per-reopen
+ /// cost, but at faster subsequent search performance), or
+ /// use DYNAMIC to dynamically intersect deleted docs (fast
+ /// reopen time but some hit to search performance).</para>
+ ///
+ public enum DeletesMode { IGNORE, RECACHE, DYNAMIC }
+
+ internal FilterCache<DocIdSet> cache;
+
+ [Serializable]
+ abstract internal class FilterCache<T> where T : class
+ {
+ /*
+ * A transient Filter cache (package private because of test)
+ */
+ // NOTE: not final so that we can dynamically re-init
+ // after de-serialize
+ volatile IDictionary<Object, T> cache;
+
+ private DeletesMode deletesMode;
+
+ public FilterCache(DeletesMode deletesMode)
+ {
+ this.deletesMode = deletesMode;
+ }
+
+ public T Get(IndexReader reader, object coreKey, object delCoreKey)
+ {
+ lock (this)
+ {
+ T value;
+
+ if (cache == null)
+ {
+ cache = new WeakDictionary<object, T>();
+ }
+
+ if (deletesMode == DeletesMode.IGNORE)
+ {
+ // key on core
+ value = cache[coreKey];
+ }
+ else if (deletesMode == DeletesMode.RECACHE)
+ {
+ // key on deletes, if any, else core
+ value = cache[delCoreKey];
+ }
+ else
+ {
+
+ System.Diagnostics.Debug.Assert(deletesMode == DeletesMode.DYNAMIC);
+
+ // first try for exact match
+ value = cache[delCoreKey];
+
+ if (value == null)
+ {
+ // now for core match, but dynamically AND NOT
+ // deletions
+ value = cache[coreKey];
+ if (value != null && reader.HasDeletions)
+ {
+ value = MergeDeletes(reader, value);
+ }
+ }
+ }
+ return value;
+ }
+
+ }
+
+ protected abstract T MergeDeletes(IndexReader reader, T value);
+
+ public void Put(object coreKey, object delCoreKey, T value)
+ {
+ lock (this)
+ {
+ if (deletesMode == DeletesMode.IGNORE)
+ {
+ cache[coreKey] = value;
+ }
+ else if (deletesMode == DeletesMode.RECACHE)
+ {
+ cache[delCoreKey] = value;
+ }
+ else
+ {
+ cache[coreKey] = value;
+ cache[delCoreKey] = value;
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// New deletes are ignored by default, which gives higher
+ /// cache hit rate on reopened readers. Most of the time
+ /// this is safe, because the filter will be AND'd with a
+ /// Query that fully enforces deletions. If instead you
+ /// need this filter to always enforce deletions, pass
+ /// either <see cref="DeletesMode.RECACHE" /> or
+ /// <see cref="DeletesMode.DYNAMIC"/>.
+ /// </summary>
+ /// <param name="filter">Filter to cache results of</param>
+ ///
+ public CachingWrapperFilter(Filter filter) : this(filter, DeletesMode.IGNORE)
+ {
+ }
+
+ /// <summary>
+ /// Expert: by default, the cached filter will be shared
+ /// across reopened segments that only had changes to their
+ /// deletions.
+ /// </summary>
+ /// <param name="filter">Filter to cache results of</param>
+ /// <param name="deletesMode">See <see cref="DeletesMode" /></param>
+ ///
+ public CachingWrapperFilter(Filter filter, DeletesMode deletesMode)
+ {
+ this.filter = filter;
+ cache = new AnonymousFilterCache(deletesMode);
+
+ //cache = new FilterCache(deletesMode)
+ // {
+ // public Object mergeDeletes(final IndexReader r, final Object docIdSet) {
+ // return new FilteredDocIdSet((DocIdSet) docIdSet) {
+ // protected boolean match(int docID) {
+ // return !r.isDeleted(docID);
+ // }
+ // };
+ // }
+ //};
+ }
+
+ class AnonymousFilterCache : FilterCache<DocIdSet>
+ {
+ class AnonymousFilteredDocIdSet : FilteredDocIdSet
+ {
+ IndexReader r;
+ public AnonymousFilteredDocIdSet(DocIdSet innerSet, IndexReader r) : base(innerSet)
+ {
+ this.r = r;
+ }
+ public override bool Match(int docid)
+ {
+ return !r.IsDeleted(docid);
+ }
+ }
+
+ public AnonymousFilterCache(DeletesMode deletesMode) : base(deletesMode)
+ { }
+
+ protected override DocIdSet MergeDeletes(IndexReader reader, DocIdSet docIdSet)
+ {
+ return new AnonymousFilteredDocIdSet(docIdSet, reader);
+ }
+ }
+
+ /// <summary>Provide the DocIdSet to be cached, using the DocIdSet provided
+ /// by the wrapped Filter.
+ /// This implementation returns the given DocIdSet.
+ /// </summary>
+ protected internal virtual DocIdSet DocIdSetToCache(DocIdSet docIdSet, IndexReader reader)
+ {
+ if (docIdSet == null)
+ {
+ // this is better than returning null, as the nonnull result can be cached
+ return DocIdSet.EMPTY_DOCIDSET;
+ }
+ else if (docIdSet.IsCacheable) {
+ return docIdSet;
+ }
+ else
+ {
+ DocIdSetIterator it = docIdSet.Iterator();
+ // null is allowed to be returned by iterator(),
+ // in this case we wrap with the empty set,
+ // which is cacheable.
+ return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.MaxDoc);
+ }
+ }
+
+ // for testing
+ public int hitCount, missCount;
+
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ object coreKey = reader.FieldCacheKey;
+ object delCoreKey = reader.HasDeletions ? reader.DeletesCacheKey : coreKey;
+
+ DocIdSet docIdSet = cache.Get(reader, coreKey, delCoreKey);
+
+ if (docIdSet != null)
+ {
+ hitCount++;
+ return docIdSet;
+ }
+ missCount++;
+ // cache miss
+ docIdSet = DocIdSetToCache(filter.GetDocIdSet(reader), reader);
+
+ if (docIdSet != null)
+ {
+ cache.Put(coreKey, delCoreKey, docIdSet);
+ }
+
+ return docIdSet;
+ }
+
+ public override System.String ToString()
+ {
+ return "CachingWrapperFilter(" + filter + ")";
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is CachingWrapperFilter))
+ return false;
+ return this.filter.Equals(((CachingWrapperFilter) o).filter);
+ }
+
+ public override int GetHashCode()
+ {
+ return filter.GetHashCode() ^ 0x1117BF25;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Collector.cs b/src/core/Search/Collector.cs
new file mode 100644
index 0000000..e1b02fe
--- /dev/null
+++ b/src/core/Search/Collector.cs
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> <p/>Expert: Collectors are primarily meant to be used to
+ /// gather raw results from a search, and implement sorting
+ /// or custom result filtering, collation, etc. <p/>
+ ///
+ /// <p/>Lucene's core collectors are derived from Collector.
+ /// Likely your application can use one of these classes, or
+ /// subclass <see cref="TopDocsCollector{T}" />, instead of
+ /// implementing Collector directly:
+ ///
+ /// <list type="bullet">
+ ///
+ /// <item><see cref="TopDocsCollector{T}" /> is an abstract base class
+ /// that assumes you will retrieve the top N docs,
+ /// according to some criteria, after collection is
+ /// done. </item>
+ ///
+ /// <item><see cref="TopScoreDocCollector" /> is a concrete subclass
+ /// <see cref="TopDocsCollector{T}" /> and sorts according to score +
+ /// docID. This is used internally by the <see cref="IndexSearcher" />
+ /// search methods that do not take an
+ /// explicit <see cref="Sort" />. It is likely the most frequently
+ /// used collector.</item>
+ ///
+ /// <item><see cref="TopFieldCollector" /> subclasses <see cref="TopDocsCollector{T}" />
+ /// and sorts according to a specified
+ /// <see cref="Sort" /> object (sort by field). This is used
+ /// internally by the <see cref="IndexSearcher" /> search methods
+ /// that take an explicit <see cref="Sort" />.</item>
+ ///
+ /// <item><see cref="TimeLimitingCollector" />, which wraps any other
+ /// Collector and aborts the search if it's taken too much
+ /// time.</item>
+ ///
+ /// <item><see cref="PositiveScoresOnlyCollector" /> wraps any other
+ /// Collector and prevents collection of hits whose score
+ /// is &lt;= 0.0</item>
+ ///
+ /// </list>
+ ///
+ /// <p/>Collector decouples the score from the collected doc:
+ /// the score computation is skipped entirely if it's not
+ /// needed. Collectors that do need the score should
+ /// implement the <see cref="SetScorer" /> method, to hold onto the
+ /// passed <see cref="Scorer" /> instance, and call <see cref="Scorer.Score()" />
+ /// within the collect method to compute the
+ /// current hit's score. If your collector may request the
+ /// score for a single hit multiple times, you should use
+ /// <see cref="ScoreCachingWrappingScorer" />. <p/>
+ ///
+ /// <p/><b>NOTE:</b> The doc that is passed to the collect
+ /// method is relative to the current reader. If your
+ /// collector needs to resolve this to the docID space of the
+ /// Multi*Reader, you must re-base it by recording the
+ /// docBase from the most recent setNextReader call. Here's
+ /// a simple example showing how to collect docIDs into a
+ /// BitSet:<p/>
+ ///
+ /// <code>
+ /// Searcher searcher = new IndexSearcher(indexReader);
+ /// final BitSet bits = new BitSet(indexReader.MaxDoc);
+ /// searcher.search(query, new Collector() {
+ /// private int docBase;
+ ///
+ /// <em>// ignore scorer</em>
+ /// public void setScorer(Scorer scorer) {
+ /// }
+ ///
+ /// <em>// accept docs out of order (for a BitSet it doesn't matter)</em>
+ /// public boolean acceptsDocsOutOfOrder() {
+ /// return true;
+ /// }
+ ///
+ /// public void collect(int doc) {
+ /// bits.set(doc + docBase);
+ /// }
+ ///
+ /// public void setNextReader(IndexReader reader, int docBase) {
+ /// this.docBase = docBase;
+ /// }
+ /// });
+ /// </code>
+ ///
+ /// <p/>Not all collectors will need to rebase the docID. For
+ /// example, a collector that simply counts the total number
+ /// of hits would skip it.<p/>
+ ///
+ /// <p/><b>NOTE:</b> Prior to 2.9, Lucene silently filtered
+ /// out hits with score &lt;= 0. As of 2.9, the core Collectors
+ /// no longer do that. It's very unusual to have such hits
+ /// (a negative query boost, or function query returning
+ /// negative custom scores, could cause it to happen). If
+ /// you need that behavior, use <see cref="PositiveScoresOnlyCollector" />
+ ///.<p/>
+ ///
+ /// <p/><b>NOTE:</b> This API is experimental and might change
+ /// in incompatible ways in the next release.<p/>
+ ///
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+ public abstract class Collector
+ {
+
+ /// <summary> Called before successive calls to <see cref="Collect(int)" />. Implementations
+ /// that need the score of the current document (passed-in to
+ /// <see cref="Collect(int)" />), should save the passed-in Scorer and call
+ /// scorer.score() when needed.
+ /// </summary>
+ public abstract void SetScorer(Scorer scorer);
+
+ /// <summary> Called once for every document matching a query, with the unbased document
+ /// number.
+ ///
+ /// <p/>
+ /// Note: This is called in an inner search loop. For good search performance,
+ /// implementations of this method should not call <see cref="Searcher.Doc(int)" /> or
+ /// <see cref="Lucene.Net.Index.IndexReader.Document(int)" /> on every hit.
+ /// Doing so can slow searches by an order of magnitude or more.
+ /// </summary>
+ public abstract void Collect(int doc);
+
+ /// <summary> Called before collecting from each IndexReader. All doc ids in
+ /// <see cref="Collect(int)" /> will correspond to reader.
+ ///
+ /// Add docBase to the current IndexReaders internal document id to re-base ids
+ /// in <see cref="Collect(int)" />.
+ ///
+ /// </summary>
+ /// <param name="reader">next IndexReader
+ /// </param>
+ /// <param name="docBase">
+ /// </param>
+ public abstract void SetNextReader(IndexReader reader, int docBase);
+
+ /// <summary>
+ /// Return <c>true</c> if this collector does not
+ /// require the matching docIDs to be delivered in int sort
+ /// order (smallest to largest) to <see cref="Collect" />.
+ /// <p/> Most Lucene Query implementations will visit
+ /// matching docIDs in order. However, some queries
+ /// (currently limited to certain cases of <see cref="BooleanQuery" />)
+ /// can achieve faster searching if the
+ /// <c>Collector</c> allows them to deliver the
+ /// docIDs out of order.
+ /// <p/> Many collectors don't mind getting docIDs out of
+ /// order, so it's important to return <c>true</c>
+ /// here.
+ /// </summary>
+ /// <value> </value>
+ public abstract bool AcceptsDocsOutOfOrder { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ComplexExplanation.cs b/src/core/Search/ComplexExplanation.cs
new file mode 100644
index 0000000..c794f18
--- /dev/null
+++ b/src/core/Search/ComplexExplanation.cs
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: Describes the score computation for document and query, and
+ /// can distinguish a match independent of a positive value.
+ /// </summary>
+ [Serializable]
+ public class ComplexExplanation:Explanation
+ {
+ private System.Boolean? match;
+
+ public ComplexExplanation():base()
+ {
+ }
+
+ public ComplexExplanation(bool match, float value_Renamed, System.String description):base(value_Renamed, description)
+ {
+ this.match = match;
+ }
+
+ /// <summary> The match status of this explanation node.</summary>
+ /// <value> May be null if match status is unknown
+ /// </value>
+ public virtual bool? Match
+ {
+ get { return match; }
+ set { match = value; }
+ }
+
+ /// <summary> Indicates whether or not this Explanation models a good match.
+ ///
+ /// <p/>
+ /// If the match status is explicitly set (i.e.: not null) this method
+ /// uses it; otherwise it defers to the superclass.
+ /// <p/>
+ /// </summary>
+ public override bool IsMatch
+ {
+ get
+ {
+ System.Boolean? m = Match;
+ return m ?? base.IsMatch;
+ }
+ }
+
+ protected internal override string Summary
+ {
+ get
+ {
+ if (!match.HasValue)
+ return base.Summary;
+
+ return Value + " = " + (IsMatch ? "(MATCH) " : "(NON-MATCH) ") + Description;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ConjunctionScorer.cs b/src/core/Search/ConjunctionScorer.cs
new file mode 100644
index 0000000..6befe06
--- /dev/null
+++ b/src/core/Search/ConjunctionScorer.cs
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Scorer for conjunctions, sets of queries, all of which are required. </summary>
+ class ConjunctionScorer:Scorer
+ {
+ private Scorer[] scorers;
+ private float coord;
+ private int lastDoc = - 1;
+
+ public ConjunctionScorer(Similarity similarity, System.Collections.Generic.ICollection<Scorer> scorers)
+ : this(similarity, scorers.ToArray())
+ {
+ }
+
+ public ConjunctionScorer(Similarity similarity, params Scorer[] scorers):base(similarity)
+ {
+ this.scorers = scorers;
+ coord = similarity.Coord(scorers.Length, scorers.Length);
+
+ for (int i = 0; i < scorers.Length; i++)
+ {
+ if (scorers[i].NextDoc() == NO_MORE_DOCS)
+ {
+ // If even one of the sub-scorers does not have any documents, this
+ // scorer should not attempt to do any more work.
+ lastDoc = NO_MORE_DOCS;
+ return ;
+ }
+ }
+
+ // Sort the array the first time...
+ // We don't need to sort the array in any future calls because we know
+ // it will already start off sorted (all scorers on same doc).
+
+ // note that this comparator is not consistent with equals!
+ System.Array.Sort(scorers, (a, b) => a.DocID() - b.DocID());
+
+ // NOTE: doNext() must be called before the re-sorting of the array later on.
+ // The reason is this: assume there are 5 scorers, whose first docs are 1,
+ // 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling
+ // doNext() here advances all the first scorers to 5 (or a larger doc ID
+ // they all agree on).
+ // However, if we re-sort before doNext() is called, the order will be 5, 3,
+ // 2, 1, 5 and then doNext() will stop immediately, since the first scorer's
+ // docs equals the last one. So the invariant that after calling doNext()
+ // all scorers are on the same doc ID is broken.);
+ if (DoNext() == NO_MORE_DOCS)
+ {
+ // The scorers did not agree on any document.
+ lastDoc = NO_MORE_DOCS;
+ return ;
+ }
+
+ // If first-time skip distance is any predictor of
+ // scorer sparseness, then we should always try to skip first on
+ // those scorers.
+ // Keep last scorer in it's last place (it will be the first
+ // to be skipped on), but reverse all of the others so that
+ // they will be skipped on in order of original high skip.
+ int end = scorers.Length - 1;
+ int max = end >> 1;
+ for (int i = 0; i < max; i++)
+ {
+ Scorer tmp = scorers[i];
+ int idx = end - i - 1;
+ scorers[i] = scorers[idx];
+ scorers[idx] = tmp;
+ }
+ }
+
+ private int DoNext()
+ {
+ int first = 0;
+ int doc = scorers[scorers.Length - 1].DocID();
+ Scorer firstScorer;
+ while ((firstScorer = scorers[first]).DocID() < doc)
+ {
+ doc = firstScorer.Advance(doc);
+ first = first == scorers.Length - 1?0:first + 1;
+ }
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ if (lastDoc == NO_MORE_DOCS)
+ {
+ return lastDoc;
+ }
+ else if (scorers[(scorers.Length - 1)].DocID() < target)
+ {
+ scorers[(scorers.Length - 1)].Advance(target);
+ }
+ return lastDoc = DoNext();
+ }
+
+ public override int DocID()
+ {
+ return lastDoc;
+ }
+
+ public override int NextDoc()
+ {
+ if (lastDoc == NO_MORE_DOCS)
+ {
+ return lastDoc;
+ }
+ else if (lastDoc == - 1)
+ {
+ return lastDoc = scorers[scorers.Length - 1].DocID();
+ }
+ scorers[(scorers.Length - 1)].NextDoc();
+ return lastDoc = DoNext();
+ }
+
+ public override float Score()
+ {
+ float sum = 0.0f;
+ for (int i = 0; i < scorers.Length; i++)
+ {
+ sum += scorers[i].Score();
+ }
+ return sum * coord;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ConstantScoreQuery.cs b/src/core/Search/ConstantScoreQuery.cs
new file mode 100644
index 0000000..ff29023
--- /dev/null
+++ b/src/core/Search/ConstantScoreQuery.cs
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A query that wraps a filter and simply returns a constant score equal to the
+ /// query boost for every document in the filter.
+ /// </summary>
+ [Serializable]
+ public class ConstantScoreQuery:Query
+ {
+ protected internal Filter internalFilter;
+
+ public ConstantScoreQuery(Filter filter)
+ {
+ this.internalFilter = filter;
+ }
+
+ /// <summary>Returns the encapsulated filter </summary>
+ public virtual Filter Filter
+ {
+ get { return internalFilter; }
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ return this;
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ // OK to not add any terms when used for MultiSearcher,
+ // but may not be OK for highlighting
+ }
+
+ [Serializable]
+ protected internal class ConstantWeight:Weight
+ {
+ private void InitBlock(ConstantScoreQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ConstantScoreQuery enclosingInstance;
+ public ConstantScoreQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private readonly Similarity similarity;
+ private float queryNorm;
+ private float queryWeight;
+
+ public ConstantWeight(ConstantScoreQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+ }
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ public override float Value
+ {
+ get { return queryWeight; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ queryWeight = Enclosing_Instance.Boost;
+ return queryWeight*queryWeight;
+ }
+
+ public override void Normalize(float norm)
+ {
+ this.queryNorm = norm;
+ queryWeight *= this.queryNorm;
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ return new ConstantScorer(enclosingInstance, similarity, reader, this);
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+
+ var cs = new ConstantScorer(enclosingInstance, similarity, reader, this);
+ bool exists = cs.docIdSetIterator.Advance(doc) == doc;
+
+ var result = new ComplexExplanation();
+
+ if (exists)
+ {
+ result.Description = "ConstantScoreQuery(" + Enclosing_Instance.internalFilter + "), product of:";
+ result.Value = queryWeight;
+ System.Boolean tempAux = true;
+ result.Match = tempAux;
+ result.AddDetail(new Explanation(Enclosing_Instance.Boost, "boost"));
+ result.AddDetail(new Explanation(queryNorm, "queryNorm"));
+ }
+ else
+ {
+ result.Description = "ConstantScoreQuery(" + Enclosing_Instance.internalFilter + ") doesn't match id " + doc;
+ result.Value = 0;
+ System.Boolean tempAux2 = false;
+ result.Match = tempAux2;
+ }
+ return result;
+ }
+ }
+
+ protected internal class ConstantScorer : Scorer
+ {
+ private void InitBlock(ConstantScoreQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ConstantScoreQuery enclosingInstance;
+ public ConstantScoreQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal DocIdSetIterator docIdSetIterator;
+ internal float theScore;
+ internal int doc = - 1;
+
+ public ConstantScorer(ConstantScoreQuery enclosingInstance, Similarity similarity, IndexReader reader, Weight w):base(similarity)
+ {
+ InitBlock(enclosingInstance);
+ theScore = w.Value;
+ DocIdSet docIdSet = Enclosing_Instance.internalFilter.GetDocIdSet(reader);
+ if (docIdSet == null)
+ {
+ docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.Iterator();
+ }
+ else
+ {
+ DocIdSetIterator iter = docIdSet.Iterator();
+ if (iter == null)
+ {
+ docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.Iterator();
+ }
+ else
+ {
+ docIdSetIterator = iter;
+ }
+ }
+ }
+
+ public override int NextDoc()
+ {
+ return docIdSetIterator.NextDoc();
+ }
+
+ public override int DocID()
+ {
+ return docIdSetIterator.DocID();
+ }
+
+ public override float Score()
+ {
+ return theScore;
+ }
+
+ public override int Advance(int target)
+ {
+ return docIdSetIterator.Advance(target);
+ }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new ConstantScoreQuery.ConstantWeight(this, searcher);
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(string field)
+ {
+ return "ConstantScore(" + internalFilter + (Boost == 1.0?")":"^" + Boost);
+ }
+
+ /// <summary>Returns true if <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is ConstantScoreQuery))
+ return false;
+ ConstantScoreQuery other = (ConstantScoreQuery) o;
+ return this.Boost == other.Boost && internalFilter.Equals(other.internalFilter);
+ }
+
+ /// <summary>Returns a hash code value for this object. </summary>
+ public override int GetHashCode()
+ {
+ // Simple add is OK since no existing filter hashcode has a float component.
+ return internalFilter.GetHashCode() + BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0);
+ }
+
+ override public System.Object Clone()
+ {
+ // {{Aroush-1.9}} is this all that we need to clone?!
+ ConstantScoreQuery clone = (ConstantScoreQuery)base.Clone();
+ clone.internalFilter = (Filter)this.internalFilter;
+ return clone;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/DefaultSimilarity.cs b/src/core/Search/DefaultSimilarity.cs
new file mode 100644
index 0000000..6acbbeb
--- /dev/null
+++ b/src/core/Search/DefaultSimilarity.cs
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using FieldInvertState = Lucene.Net.Index.FieldInvertState;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: Default scoring implementation. </summary>
+ [Serializable]
+ public class DefaultSimilarity:Similarity
+ {
+
+ /// <summary>Implemented as
+ /// <c>state.getBoost()*lengthNorm(numTerms)</c>, where
+ /// <c>numTerms</c> is <see cref="FieldInvertState.Length" /> if <see cref="DiscountOverlaps" />
+ /// is false, else it's <see cref="FieldInvertState.Length" />
+ /// - <see cref="FieldInvertState.NumOverlap" />
+ ///.
+ ///
+ /// <p/><b>WARNING</b>: This API is new and experimental, and may suddenly
+ /// change.<p/>
+ /// </summary>
+ public override float ComputeNorm(System.String field, FieldInvertState state)
+ {
+ int numTerms;
+ if (internalDiscountOverlaps)
+ numTerms = state.Length - state.NumOverlap;
+ else
+ numTerms = state.Length;
+ return (state.Boost * LengthNorm(field, numTerms));
+ }
+
+ /// <summary>Implemented as <c>1/sqrt(numTerms)</c>. </summary>
+ public override float LengthNorm(System.String fieldName, int numTerms)
+ {
+ return (float) (1.0 / System.Math.Sqrt(numTerms));
+ }
+
+ /// <summary>Implemented as <c>1/sqrt(sumOfSquaredWeights)</c>. </summary>
+ public override float QueryNorm(float sumOfSquaredWeights)
+ {
+ return (float) (1.0 / System.Math.Sqrt(sumOfSquaredWeights));
+ }
+
+ /// <summary>Implemented as <c>sqrt(freq)</c>. </summary>
+ public override float Tf(float freq)
+ {
+ return (float) System.Math.Sqrt(freq);
+ }
+
+ /// <summary>Implemented as <c>1 / (distance + 1)</c>. </summary>
+ public override float SloppyFreq(int distance)
+ {
+ return 1.0f / (distance + 1);
+ }
+
+ /// <summary>Implemented as <c>log(numDocs/(docFreq+1)) + 1</c>. </summary>
+ public override float Idf(int docFreq, int numDocs)
+ {
+ return (float) (System.Math.Log(numDocs / (double) (docFreq + 1)) + 1.0);
+ }
+
+ /// <summary>Implemented as <c>overlap / maxOverlap</c>. </summary>
+ public override float Coord(int overlap, int maxOverlap)
+ {
+ return overlap / (float) maxOverlap;
+ }
+
+ /// <seealso cref="DiscountOverlaps">
+ /// </seealso>
+ // Default false
+ protected internal bool internalDiscountOverlaps;
+
+ /// <summary>Determines whether overlap tokens (Tokens with
+ /// 0 position increment) are ignored when computing
+ /// norm. By default this is false, meaning overlap
+ /// tokens are counted just like non-overlap tokens.
+ ///
+ /// <p/><b>WARNING</b>: This API is new and experimental, and may suddenly
+ /// change.<p/>
+ ///
+ /// </summary>
+ /// <seealso cref="ComputeNorm">
+ /// </seealso>
+ public virtual bool DiscountOverlaps
+ {
+ get { return internalDiscountOverlaps; }
+ set { internalDiscountOverlaps = value; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/DisjunctionMaxQuery.cs b/src/core/Search/DisjunctionMaxQuery.cs
new file mode 100644
index 0000000..f59e3dc
--- /dev/null
+++ b/src/core/Search/DisjunctionMaxQuery.cs
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
+ /// score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
+ /// This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be
+ /// combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
+ /// not the sum of the field scores (as BooleanQuery would give).
+ /// If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
+ /// another gets a higher score than "albino" matching both fields.
+ /// To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
+ /// each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
+ /// The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
+ /// include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
+ /// in the multiple fields.
+ /// </summary>
+ [Serializable]
+ public class DisjunctionMaxQuery : Query, System.Collections.Generic.IEnumerable<Query>, System.ICloneable
+ {
+
+ /* The subqueries */
+ private EquatableList<Query> disjuncts = new EquatableList<Query>();
+
+ /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
+ private float tieBreakerMultiplier = 0.0f;
+
+ /// <summary>Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.</summary>
+ /// <param name="tieBreakerMultiplier">the score of each non-maximum disjunct for a document is multiplied by this weight
+ /// and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
+ /// 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
+ /// word in the lower scored field (i.e., one that is not in any higher scored field.
+ /// </param>
+ public DisjunctionMaxQuery(float tieBreakerMultiplier)
+ {
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ }
+
+ /// <summary> Creates a new DisjunctionMaxQuery</summary>
+ /// <param name="disjuncts">a Collection&lt;Query&gt; of all the disjuncts to add
+ /// </param>
+ /// <param name="tieBreakerMultiplier"> the weight to give to each matching non-maximum disjunct
+ /// </param>
+ public DisjunctionMaxQuery(System.Collections.Generic.ICollection<Query> disjuncts, float tieBreakerMultiplier)
+ {
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ Add(disjuncts);
+ }
+
+ /// <summary>Add a subquery to this disjunction</summary>
+ /// <param name="query">the disjunct added
+ /// </param>
+ public virtual void Add(Query query)
+ {
+ disjuncts.Add(query);
+ }
+
+ /// <summary>Add a collection of disjuncts to this disjunction
+ /// via Iterable
+ /// </summary>
+ public virtual void Add(System.Collections.Generic.ICollection<Query> disjuncts)
+ {
+ this.disjuncts.AddRange(disjuncts);
+ }
+
+ /// <summary>An Iterator&lt;Query&gt; over the disjuncts </summary>
+ public virtual System.Collections.Generic.IEnumerator<Query> GetEnumerator()
+ {
+ return disjuncts.GetEnumerator();
+ }
+
+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ /// <summary> Expert: the Weight for DisjunctionMaxQuery, used to
+ /// normalize, score and explain these queries.
+ ///
+ /// <p/>NOTE: this API and implementation is subject to
+ /// change suddenly in the next release.<p/>
+ /// </summary>
+ [Serializable]
+ protected internal class DisjunctionMaxWeight:Weight
+ {
+ private void InitBlock(DisjunctionMaxQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DisjunctionMaxQuery enclosingInstance;
+ public DisjunctionMaxQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /// <summary>The Similarity implementation. </summary>
+ protected internal Similarity similarity;
+
+ /// <summary>The Weights for our subqueries, in 1-1 correspondence with disjuncts </summary>
+ protected internal System.Collections.Generic.List<Weight> weights = new System.Collections.Generic.List<Weight>(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
+
+ /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
+ public DisjunctionMaxWeight(DisjunctionMaxQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = searcher.Similarity;
+ foreach(Query disjunctQuery in enclosingInstance.disjuncts)
+ {
+ weights.Add(disjunctQuery.CreateWeight(searcher));
+ }
+ }
+
+ /* Return our associated DisjunctionMaxQuery */
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ /* Return our boost */
+
+ public override float Value
+ {
+ get { return Enclosing_Instance.Boost; }
+ }
+
+ /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
+
+ public override float GetSumOfSquaredWeights()
+ {
+ float max = 0.0f, sum = 0.0f;
+ foreach (Weight currentWeight in weights)
+ {
+ float sub = currentWeight.GetSumOfSquaredWeights();
+ sum += sub;
+ max = System.Math.Max(max, sub);
+ }
+ float boost = Enclosing_Instance.Boost;
+ return (((sum - max) * Enclosing_Instance.tieBreakerMultiplier * Enclosing_Instance.tieBreakerMultiplier) + max) *
+ boost * boost;
+ }
+
+ /* Apply the computed normalization factor to our subqueries */
+ public override void Normalize(float norm)
+ {
+ norm *= Enclosing_Instance.Boost; // Incorporate our boost
+ foreach(Weight wt in weights)
+ {
+ wt.Normalize(norm);
+ }
+ }
+
+ /* Create the scorer used to score our associated DisjunctionMaxQuery */
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ Scorer[] scorers = new Scorer[weights.Count];
+ int idx = 0;
+ foreach(Weight w in weights)
+ {
+ Scorer subScorer = w.Scorer(reader, true, false);
+ if (subScorer != null && subScorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ scorers[idx++] = subScorer;
+ }
+ }
+ if (idx == 0)
+ return null; // all scorers did not have documents
+ DisjunctionMaxScorer result = new DisjunctionMaxScorer(Enclosing_Instance.tieBreakerMultiplier, similarity, scorers, idx);
+ return result;
+ }
+
+ /* Explain the score we computed for doc */
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+ if (Enclosing_Instance.disjuncts.Count == 1)
+ return weights[0].Explain(reader, doc);
+ ComplexExplanation result = new ComplexExplanation();
+ float max = 0.0f, sum = 0.0f;
+ result.Description = Enclosing_Instance.tieBreakerMultiplier == 0.0f?"max of:":"max plus " + Enclosing_Instance.tieBreakerMultiplier + " times others of:";
+ foreach(Weight wt in weights)
+ {
+ Explanation e = wt.Explain(reader, doc);
+ if (e.IsMatch)
+ {
+ System.Boolean tempAux = true;
+ result.Match = tempAux;
+ result.AddDetail(e);
+ sum += e.Value;
+ max = System.Math.Max(max, e.Value);
+ }
+ }
+ result.Value = max + (sum - max) * Enclosing_Instance.tieBreakerMultiplier;
+ return result;
+ }
+ } // end of DisjunctionMaxWeight inner class
+
+ /* Create the Weight used to score us */
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new DisjunctionMaxWeight(this, searcher);
+ }
+
+ /// <summary>Optimize our representation and our subqueries representations</summary>
+ /// <param name="reader">the IndexReader we query
+ /// </param>
+ /// <returns> an optimized copy of us (which may not be a copy if there is nothing to optimize)
+ /// </returns>
+ public override Query Rewrite(IndexReader reader)
+ {
+ int numDisjunctions = disjuncts.Count;
+ if (numDisjunctions == 1)
+ {
+ Query singleton = disjuncts[0];
+ Query result = singleton.Rewrite(reader);
+ if (Boost != 1.0f)
+ {
+ if (result == singleton)
+ result = (Query) result.Clone();
+ result.Boost = Boost * result.Boost;
+ }
+ return result;
+ }
+ DisjunctionMaxQuery clone = null;
+ for (int i = 0; i < numDisjunctions; i++)
+ {
+ Query clause = disjuncts[i];
+ Query rewrite = clause.Rewrite(reader);
+ if (rewrite != clause)
+ {
+ if (clone == null)
+ clone = (DisjunctionMaxQuery) this.Clone();
+ clone.disjuncts[i] = rewrite;
+ }
+ }
+ if (clone != null)
+ return clone;
+ else
+ return this;
+ }
+
+ /// <summary>Create a shallow copy of us -- used in rewriting if necessary</summary>
+ /// <returns> a copy of us (but reuse, don't copy, our subqueries)
+ /// </returns>
+ public override System.Object Clone()
+ {
+ DisjunctionMaxQuery clone = (DisjunctionMaxQuery) base.Clone();
+ clone.disjuncts = (EquatableList<Query>) this.disjuncts.Clone();
+ return clone;
+ }
+
+ // inherit javadoc
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ foreach(Query query in disjuncts)
+ {
+ query.ExtractTerms(terms);
+ }
+ }
+
+ /// <summary>Prettyprint us.</summary>
+ /// <param name="field">the field to which we are applied
+ /// </param>
+ /// <returns> a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
+ /// </returns>
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("(");
+ int numDisjunctions = disjuncts.Count;
+ for (int i = 0; i < numDisjunctions; i++)
+ {
+ Query subquery = disjuncts[i];
+ if (subquery is BooleanQuery)
+ {
+ // wrap sub-bools in parens
+ buffer.Append("(");
+ buffer.Append(subquery.ToString(field));
+ buffer.Append(")");
+ }
+ else
+ buffer.Append(subquery.ToString(field));
+ if (i != numDisjunctions - 1)
+ buffer.Append(" | ");
+ }
+ buffer.Append(")");
+ if (tieBreakerMultiplier != 0.0f)
+ {
+ buffer.Append("~");
+ buffer.Append(tieBreakerMultiplier);
+ }
+ if (Boost != 1.0)
+ {
+ buffer.Append("^");
+ buffer.Append(Boost);
+ }
+ return buffer.ToString();
+ }
+
+ /// <summary>Return true iff we represent the same query as o</summary>
+ /// <param name="o">another object
+ /// </param>
+ /// <returns> true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
+ /// </returns>
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is DisjunctionMaxQuery))
+ return false;
+ DisjunctionMaxQuery other = (DisjunctionMaxQuery) o;
+ return this.Boost == other.Boost && this.tieBreakerMultiplier == other.tieBreakerMultiplier && this.disjuncts.Equals(other.disjuncts);
+ }
+
+ /// <summary>Compute a hash code for hashing us</summary>
+ /// <returns> the hash code
+ /// </returns>
+ public override int GetHashCode()
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) + BitConverter.ToInt32(BitConverter.GetBytes(tieBreakerMultiplier), 0) + disjuncts.GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/DisjunctionMaxScorer.cs b/src/core/Search/DisjunctionMaxScorer.cs
new file mode 100644
index 0000000..a476b6b
--- /dev/null
+++ b/src/core/Search/DisjunctionMaxScorer.cs
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers
+ /// is generated in document number order. The score for each document is the maximum of the scores computed
+ /// by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
+ /// for the other subqueries that generate the document.
+ /// </summary>
+ class DisjunctionMaxScorer:Scorer
+ {
+
+ /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */
+ private Scorer[] subScorers;
+ private int numScorers;
+ /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
+ private float tieBreakerMultiplier;
+ private int doc = - 1;
+
+ /// <summary> Creates a new instance of DisjunctionMaxScorer
+ ///
+ /// </summary>
+ /// <param name="tieBreakerMultiplier">Multiplier applied to non-maximum-scoring subqueries for a
+ /// document as they are summed into the result.
+ /// </param>
+ /// <param name="similarity">-- not used since our definition involves neither coord nor terms
+ /// directly
+ /// </param>
+ /// <param name="subScorers">The sub scorers this Scorer should iterate on
+ /// </param>
+ /// <param name="numScorers">The actual number of scorers to iterate on. Note that the array's
+ /// length may be larger than the actual number of scorers.
+ /// </param>
+ public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity, Scorer[] subScorers, int numScorers):base(similarity)
+ {
+
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ // The passed subScorers array includes only scorers which have documents
+ // (DisjunctionMaxQuery takes care of that), and their nextDoc() was already
+ // called.
+ this.subScorers = subScorers;
+ this.numScorers = numScorers;
+
+ Heapify();
+ }
+
+ public override int NextDoc()
+ {
+ if (numScorers == 0)
+ return doc = NO_MORE_DOCS;
+ while (subScorers[0].DocID() == doc)
+ {
+ if (subScorers[0].NextDoc() != NO_MORE_DOCS)
+ {
+ HeapAdjust(0);
+ }
+ else
+ {
+ HeapRemoveRoot();
+ if (numScorers == 0)
+ {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+
+ return doc = subScorers[0].DocID();
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ /// <summary>Determine the current document score. Initially invalid, until <see cref="NextDoc()" /> is called the first time.</summary>
+ /// <returns> the score of the current generated document
+ /// </returns>
+ public override float Score()
+ {
+ int doc = subScorers[0].DocID();
+ float[] sum = new float[]{subScorers[0].Score()}, max = new float[]{sum[0]};
+ int size = numScorers;
+ ScoreAll(1, size, doc, sum, max);
+ ScoreAll(2, size, doc, sum, max);
+ return max[0] + (sum[0] - max[0]) * tieBreakerMultiplier;
+ }
+
+ // Recursively iterate all subScorers that generated last doc computing sum and max
+ private void ScoreAll(int root, int size, int doc, float[] sum, float[] max)
+ {
+ if (root < size && subScorers[root].DocID() == doc)
+ {
+ float sub = subScorers[root].Score();
+ sum[0] += sub;
+ max[0] = System.Math.Max(max[0], sub);
+ ScoreAll((root << 1) + 1, size, doc, sum, max);
+ ScoreAll((root << 1) + 2, size, doc, sum, max);
+ }
+ }
+
+ public override int Advance(int target)
+ {
+ if (numScorers == 0)
+ return doc = NO_MORE_DOCS;
+ while (subScorers[0].DocID() < target)
+ {
+ if (subScorers[0].Advance(target) != NO_MORE_DOCS)
+ {
+ HeapAdjust(0);
+ }
+ else
+ {
+ HeapRemoveRoot();
+ if (numScorers == 0)
+ {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+ return doc = subScorers[0].DocID();
+ }
+
+ // Organize subScorers into a min heap with scorers generating the earliest document on top.
+ private void Heapify()
+ {
+ for (int i = (numScorers >> 1) - 1; i >= 0; i--)
+ {
+ HeapAdjust(i);
+ }
+ }
+
+ /* The subtree of subScorers at root is a min heap except possibly for its root element.
+ * Bubble the root down as required to make the subtree a heap.
+ */
+ private void HeapAdjust(int root)
+ {
+ Scorer scorer = subScorers[root];
+ int doc = scorer.DocID();
+ int i = root;
+ while (i <= (numScorers >> 1) - 1)
+ {
+ int lchild = (i << 1) + 1;
+ Scorer lscorer = subScorers[lchild];
+ int ldoc = lscorer.DocID();
+ int rdoc = System.Int32.MaxValue, rchild = (i << 1) + 2;
+ Scorer rscorer = null;
+ if (rchild < numScorers)
+ {
+ rscorer = subScorers[rchild];
+ rdoc = rscorer.DocID();
+ }
+ if (ldoc < doc)
+ {
+ if (rdoc < ldoc)
+ {
+ subScorers[i] = rscorer;
+ subScorers[rchild] = scorer;
+ i = rchild;
+ }
+ else
+ {
+ subScorers[i] = lscorer;
+ subScorers[lchild] = scorer;
+ i = lchild;
+ }
+ }
+ else if (rdoc < doc)
+ {
+ subScorers[i] = rscorer;
+ subScorers[rchild] = scorer;
+ i = rchild;
+ }
+ else
+ {
+ return ;
+ }
+ }
+ }
+
+ // Remove the root Scorer from subScorers and re-establish it as a heap
+ private void HeapRemoveRoot()
+ {
+ if (numScorers == 1)
+ {
+ subScorers[0] = null;
+ numScorers = 0;
+ }
+ else
+ {
+ subScorers[0] = subScorers[numScorers - 1];
+ subScorers[numScorers - 1] = null;
+ --numScorers;
+ HeapAdjust(0);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/DisjunctionSumScorer.cs b/src/core/Search/DisjunctionSumScorer.cs
new file mode 100644
index 0000000..8d65ab8
--- /dev/null
+++ b/src/core/Search/DisjunctionSumScorer.cs
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using ScorerDocQueue = Lucene.Net.Util.ScorerDocQueue;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>A Scorer for OR like queries, counterpart of <c>ConjunctionScorer</c>.
+ /// This Scorer implements <see cref="DocIdSetIterator.Advance(int)" /> and uses skipTo() on the given Scorers.
+ /// </summary>
+ class DisjunctionSumScorer:Scorer
+ {
+ /// <summary>The number of subscorers. </summary>
+ private int nrScorers;
+
+ /// <summary>The subscorers. </summary>
+ protected internal System.Collections.Generic.IList<Scorer> subScorers;
+
+ /// <summary>The minimum number of scorers that should match. </summary>
+ private int minimumNrMatchers;
+
+ /// <summary>The scorerDocQueue contains all subscorers ordered by their current doc(),
+ /// with the minimum at the top.
+ /// <br/>The scorerDocQueue is initialized the first time next() or skipTo() is called.
+ /// <br/>An exhausted scorer is immediately removed from the scorerDocQueue.
+ /// <br/>If less than the minimumNrMatchers scorers
+ /// remain in the scorerDocQueue next() and skipTo() return false.
+ /// <p/>
+ /// After each to call to next() or skipTo()
+ /// <c>currentSumScore</c> is the total score of the current matching doc,
+ /// <c>nrMatchers</c> is the number of matching scorers,
+ /// and all scorers are after the matching doc, or are exhausted.
+ /// </summary>
+ private ScorerDocQueue scorerDocQueue;
+
+ /// <summary>The document number of the current match. </summary>
+ private int currentDoc = - 1;
+
+ /// <summary>The number of subscorers that provide the current match. </summary>
+ protected internal int nrMatchers = - 1;
+
+ private float currentScore = System.Single.NaN;
+
+ /// <summary>Construct a <c>DisjunctionScorer</c>.</summary>
+ /// <param name="subScorers">A collection of at least two subscorers.
+ /// </param>
+ /// <param name="minimumNrMatchers">The positive minimum number of subscorers that should
+ /// match to match this query.
+ /// <br/>When <c>minimumNrMatchers</c> is bigger than
+ /// the number of <c>subScorers</c>,
+ /// no matches will be produced.
+ /// <br/>When minimumNrMatchers equals the number of subScorers,
+ /// it more efficient to use <c>ConjunctionScorer</c>.
+ /// </param>
+ public DisjunctionSumScorer(System.Collections.Generic.IList<Scorer> subScorers, int minimumNrMatchers):base(null)
+ {
+
+ nrScorers = subScorers.Count;
+
+ if (minimumNrMatchers <= 0)
+ {
+ throw new System.ArgumentException("Minimum nr of matchers must be positive");
+ }
+ if (nrScorers <= 1)
+ {
+ throw new System.ArgumentException("There must be at least 2 subScorers");
+ }
+
+ this.minimumNrMatchers = minimumNrMatchers;
+ this.subScorers = subScorers;
+
+ InitScorerDocQueue();
+ }
+
+ /// <summary>Construct a <c>DisjunctionScorer</c>, using one as the minimum number
+ /// of matching subscorers.
+ /// </summary>
+ public DisjunctionSumScorer(System.Collections.Generic.IList<Scorer> subScorers)
+ : this(subScorers, 1)
+ {
+ }
+
+ /// <summary>Called the first time next() or skipTo() is called to
+ /// initialize <c>scorerDocQueue</c>.
+ /// </summary>
+ private void InitScorerDocQueue()
+ {
+ scorerDocQueue = new ScorerDocQueue(nrScorers);
+ foreach(Scorer se in subScorers)
+ {
+ if (se.NextDoc() != NO_MORE_DOCS)
+ {
+ // doc() method will be used in scorerDocQueue.
+ scorerDocQueue.Insert(se);
+ }
+ }
+ }
+
+ /// <summary>Scores and collects all matching documents.</summary>
+ /// <param name="collector">The collector to which all matching documents are passed through.</param>
+ public override void Score(Collector collector)
+ {
+ collector.SetScorer(this);
+ while (NextDoc() != NO_MORE_DOCS)
+ {
+ collector.Collect(currentDoc);
+ }
+ }
+
+ /// <summary>Expert: Collects matching documents in a range. Hook for optimization.
+ /// Note that <see cref="NextDoc()" /> must be called once before this method is called
+ /// for the first time.
+ /// </summary>
+ /// <param name="collector">The collector to which all matching documents are passed through.
+ /// </param>
+ /// <param name="max">Do not score documents past this.
+ /// </param>
+ /// <param name="firstDocID"></param>
+ /// <returns> true if more matching documents may remain.
+ /// </returns>
+ public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID)
+ {
+ // firstDocID is ignored since nextDoc() sets 'currentDoc'
+ collector.SetScorer(this);
+ while (currentDoc < max)
+ {
+ collector.Collect(currentDoc);
+ if (NextDoc() == NO_MORE_DOCS)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public override int NextDoc()
+ {
+ if (scorerDocQueue.Size() < minimumNrMatchers || !AdvanceAfterCurrent())
+ {
+ currentDoc = NO_MORE_DOCS;
+ }
+ return currentDoc;
+ }
+
+ /// <summary>Advance all subscorers after the current document determined by the
+ /// top of the <c>scorerDocQueue</c>.
+ /// Repeat until at least the minimum number of subscorers match on the same
+ /// document and all subscorers are after that document or are exhausted.
+ /// <br/>On entry the <c>scorerDocQueue</c> has at least <c>minimumNrMatchers</c>
+ /// available. At least the scorer with the minimum document number will be advanced.
+ /// </summary>
+ /// <returns> true iff there is a match.
+ /// <br/>In case there is a match, <c>currentDoc</c>, <c>currentSumScore</c>,
+ /// and <c>nrMatchers</c> describe the match.
+ ///
+ /// TODO: Investigate whether it is possible to use skipTo() when
+ /// the minimum number of matchers is bigger than one, ie. try and use the
+ /// character of ConjunctionScorer for the minimum number of matchers.
+ /// Also delay calling score() on the sub scorers until the minimum number of
+ /// matchers is reached.
+ /// <br/>For this, a Scorer array with minimumNrMatchers elements might
+ /// hold Scorers at currentDoc that are temporarily popped from scorerQueue.
+ /// </returns>
+ protected internal virtual bool AdvanceAfterCurrent()
+ {
+ do
+ {
+ // repeat until minimum nr of matchers
+ currentDoc = scorerDocQueue.TopDoc();
+ currentScore = scorerDocQueue.TopScore();
+ nrMatchers = 1;
+ do
+ {
+ // Until all subscorers are after currentDoc
+ if (!scorerDocQueue.TopNextAndAdjustElsePop())
+ {
+ if (scorerDocQueue.Size() == 0)
+ {
+ break; // nothing more to advance, check for last match.
+ }
+ }
+ if (scorerDocQueue.TopDoc() != currentDoc)
+ {
+ break; // All remaining subscorers are after currentDoc.
+ }
+ currentScore += scorerDocQueue.TopScore();
+ nrMatchers++;
+ }
+ while (true);
+
+ if (nrMatchers >= minimumNrMatchers)
+ {
+ return true;
+ }
+ else if (scorerDocQueue.Size() < minimumNrMatchers)
+ {
+ return false;
+ }
+ }
+ while (true);
+ }
+
+ /// <summary>Returns the score of the current document matching the query.
+ /// Initially invalid, until <see cref="NextDoc()" /> is called the first time.
+ /// </summary>
+ public override float Score()
+ {
+ return currentScore;
+ }
+
+ public override int DocID()
+ {
+ return currentDoc;
+ }
+
+ /// <summary>Returns the number of subscorers matching the current document.
+ /// Initially invalid, until <see cref="NextDoc()" /> is called the first time.
+ /// </summary>
+ public virtual int NrMatchers()
+ {
+ return nrMatchers;
+ }
+
+ /// <summary> Advances to the first match beyond the current whose document number is
+ /// greater than or equal to a given target. <br/>
+ /// The implementation uses the skipTo() method on the subscorers.
+ ///
+ /// </summary>
+ /// <param name="target">The target document number.
+ /// </param>
+ /// <returns> the document whose number is greater than or equal to the given
+ /// target, or -1 if none exist.
+ /// </returns>
+ public override int Advance(int target)
+ {
+ if (scorerDocQueue.Size() < minimumNrMatchers)
+ {
+ return currentDoc = NO_MORE_DOCS;
+ }
+ if (target <= currentDoc)
+ {
+ return currentDoc;
+ }
+ do
+ {
+ if (scorerDocQueue.TopDoc() >= target)
+ {
+ return AdvanceAfterCurrent()?currentDoc:(currentDoc = NO_MORE_DOCS);
+ }
+ else if (!scorerDocQueue.TopSkipToAndAdjustElsePop(target))
+ {
+ if (scorerDocQueue.Size() < minimumNrMatchers)
+ {
+ return currentDoc = NO_MORE_DOCS;
+ }
+ }
+ }
+ while (true);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/DocIdSet.cs b/src/core/Search/DocIdSet.cs
new file mode 100644
index 0000000..6c49dcd
--- /dev/null
+++ b/src/core/Search/DocIdSet.cs
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A DocIdSet contains a set of doc ids. Implementing classes must
+ /// only implement <see cref="Iterator" /> to provide access to the set.
+ /// </summary>
+ [Serializable]
+ public abstract class DocIdSet
+ {
+ public class AnonymousClassDocIdSet:DocIdSet
+ {
+ public AnonymousClassDocIdSet()
+ {
+ InitBlock();
+ }
+ public class AnonymousClassDocIdSetIterator:DocIdSetIterator
+ {
+ public AnonymousClassDocIdSetIterator(AnonymousClassDocIdSet enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(AnonymousClassDocIdSet enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private AnonymousClassDocIdSet enclosingInstance;
+ public AnonymousClassDocIdSet Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public override int Advance(int target)
+ {
+ return NO_MORE_DOCS;
+ }
+ public override int DocID()
+ {
+ return NO_MORE_DOCS;
+ }
+ public override int NextDoc()
+ {
+ return NO_MORE_DOCS;
+ }
+ }
+ private void InitBlock()
+ {
+ iterator = new AnonymousClassDocIdSetIterator(this);
+ }
+
+ private DocIdSetIterator iterator;
+
+ public override DocIdSetIterator Iterator()
+ {
+ return iterator;
+ }
+
+ public override bool IsCacheable
+ {
+ get { return true; }
+ }
+ }
+
+ /// <summary>An empty <see cref="DocIdSet"/> instance for easy use, e.g. in Filters that hit no documents. </summary>
+ [NonSerialized]
+ public static readonly DocIdSet EMPTY_DOCIDSET;
+
+ /// <summary>Provides a <see cref="DocIdSetIterator" /> to access the set.
+ /// This implementation can return <c>null</c> or
+ /// <c>EMPTY_DOCIDSET.Iterator()</c> if there
+ /// are no docs that match.
+ /// </summary>
+ public abstract DocIdSetIterator Iterator();
+
+ /// <summary>This method is a hint for <see cref="CachingWrapperFilter" />, if this <c>DocIdSet</c>
+ /// should be cached without copying it into a BitSet. The default is to return
+ /// <c>false</c>. If you have an own <c>DocIdSet</c> implementation
+ /// that does its iteration very effective and fast without doing disk I/O,
+ /// override this method and return true.
+ /// </summary>
+ public virtual bool IsCacheable
+ {
+ get { return false; }
+ }
+
+ static DocIdSet()
+ {
+ EMPTY_DOCIDSET = new AnonymousClassDocIdSet();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/DocIdSetIterator.cs b/src/core/Search/DocIdSetIterator.cs
new file mode 100644
index 0000000..1c7be51
--- /dev/null
+++ b/src/core/Search/DocIdSetIterator.cs
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> This abstract class defines methods to iterate over a set of non-decreasing
+ /// doc ids. Note that this class assumes it iterates on doc Ids, and therefore
+ /// <see cref="NO_MORE_DOCS" /> is set to Int32.MaxValue in order to be used as
+ /// a sentinel object. Implementations of this class are expected to consider
+ /// <see cref="int.MaxValue" /> as an invalid value.
+ /// </summary>
+ public abstract class DocIdSetIterator
+ {
+ private int doc = - 1;
+
+ /// <summary> When returned by <see cref="NextDoc()" />, <see cref="Advance(int)" /> and
+ /// <see cref="DocID()" /> it means there are no more docs in the iterator.
+ /// </summary>
+ public static readonly int NO_MORE_DOCS = System.Int32.MaxValue;
+
+ /// <summary> Returns the following:
+ /// <list type="bullet">
+ /// <item>-1 or <see cref="NO_MORE_DOCS" /> if <see cref="NextDoc()" /> or
+ /// <see cref="Advance(int)" /> were not called yet.</item>
+ /// <item><see cref="NO_MORE_DOCS" /> if the iterator has exhausted.</item>
+ /// <item>Otherwise it should return the doc ID it is currently on.</item>
+ /// </list>
+ /// <p/>
+ /// </summary>
+ public abstract int DocID();
+
+ /// <summary> Advances to the next document in the set and returns the doc it is
+ /// currently on, or <see cref="NO_MORE_DOCS" /> if there are no more docs in the
+ /// set.<br/>
+ ///
+ /// <b>NOTE:</b> after the iterator has exhausted you should not call this
+ /// method, as it may result in unpredicted behavior.
+ ///
+ /// </summary>
+ public abstract int NextDoc();
+
+ /// <summary> Advances to the first beyond the current whose document number is greater
+ /// than or equal to <i>target</i>. Returns the current document number or
+ /// <see cref="NO_MORE_DOCS" /> if there are no more docs in the set.
+ /// <p/>
+ /// Behaves as if written:
+ ///
+ /// <code>
+ /// int advance(int target) {
+ /// int doc;
+ /// while ((doc = nextDoc()) &lt; target) {
+ /// }
+ /// return doc;
+ /// }
+ /// </code>
+ ///
+ /// Some implementations are considerably more efficient than that.
+ /// <p/>
+ /// <b>NOTE:</b> certain implemenations may return a different value (each
+ /// time) if called several times in a row with the same target.
+ /// <p/>
+ /// <b>NOTE:</b> this method may be called with <see cref="NO_MORE_DOCS"/> for
+ /// efficiency by some Scorers. If your implementation cannot efficiently
+ /// determine that it should exhaust, it is recommended that you check for that
+ /// value in each call to this method.
+ /// <p/>
+ /// <b>NOTE:</b> after the iterator has exhausted you should not call this
+ /// method, as it may result in unpredicted behavior.
+ /// <p/>
+ ///
+ /// </summary>
+ /// <since>2.9</since>
+ public abstract int Advance(int target);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ExactPhraseScorer.cs b/src/core/Search/ExactPhraseScorer.cs
new file mode 100644
index 0000000..481ee54
--- /dev/null
+++ b/src/core/Search/ExactPhraseScorer.cs
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search
+{
+
+ sealed class ExactPhraseScorer:PhraseScorer
+ {
+
+ internal ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(weight, tps, offsets, similarity, norms)
+ {
+ }
+
+ protected internal override float PhraseFreq()
+ {
+ // sort list with pq
+ pq.Clear();
+ for (PhrasePositions pp = first; pp != null; pp = pp.next)
+ {
+ pp.FirstPosition();
+ pq.Add(pp); // build pq from list
+ }
+ PqToList(); // rebuild list from pq
+
+ // for counting how many times the exact phrase is found in current document,
+ // just count how many times all PhrasePosition's have exactly the same position.
+ int freq = 0;
+ do
+ {
+ // find position w/ all terms
+ while (first.position < last.position)
+ {
+ // scan forward in first
+ do
+ {
+ if (!first.NextPosition())
+ return freq;
+ }
+ while (first.position < last.position);
+ FirstToLast();
+ }
+ freq++; // all equal: a match
+ }
+ while (last.NextPosition());
+
+ return freq;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Explanation.cs b/src/core/Search/Explanation.cs
new file mode 100644
index 0000000..0136db2
--- /dev/null
+++ b/src/core/Search/Explanation.cs
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: Describes the score computation for document and query. </summary>
+ [Serializable]
+ public class Explanation
+ {
+ private float value; // the value of this node
+ private System.String description; // what it represents
+ private List<Explanation> details; // sub-explanations
+
+ public Explanation()
+ {
+ }
+
+ public Explanation(float value, System.String description)
+ {
+ this.value = value;
+ this.description = description;
+ }
+
+ /// <summary> Indicates whether or not this Explanation models a good match.
+ ///
+ /// <p/>
+ /// By default, an Explanation represents a "match" if the value is positive.
+ /// <p/>
+ /// </summary>
+ /// <seealso cref="Value">
+ /// </seealso>
+ public virtual bool IsMatch
+ {
+ get { return (0.0f < Value); }
+ }
+
+
+ /// <summary>The value assigned to this explanation node. </summary>
+ public virtual float Value
+ {
+ get { return value; }
+ set { this.value = value; }
+ }
+
+ /// <summary>A description of this explanation node. </summary>
+ public virtual string Description
+ {
+ get { return description; }
+ set { this.description = value; }
+ }
+
+ /// <summary> A short one line summary which should contain all high level
+ /// information about this Explanation, without the "Details"
+ /// </summary>
+ protected internal virtual string Summary
+ {
+ get { return Value + " = " + Description; }
+ }
+
+ /// <summary>The sub-nodes of this explanation node. </summary>
+ public virtual Explanation[] GetDetails()
+ {
+ if (details == null)
+ return null;
+ return details.ToArray();
+ }
+
+ /// <summary>Adds a sub-node to this explanation node. </summary>
+ public virtual void AddDetail(Explanation detail)
+ {
+ if (details == null)
+ details = new List<Explanation>();
+ details.Add(detail);
+ }
+
+ /// <summary>Render an explanation as text. </summary>
+ public override System.String ToString()
+ {
+ return ToString(0);
+ }
+
+ protected internal virtual System.String ToString(int depth)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ for (int i = 0; i < depth; i++)
+ {
+ buffer.Append(" ");
+ }
+ buffer.Append(Summary);
+ buffer.Append("\n");
+
+ Explanation[] details = GetDetails();
+ if (details != null)
+ {
+ for (int i = 0; i < details.Length; i++)
+ {
+ buffer.Append(details[i].ToString(depth + 1));
+ }
+ }
+
+ return buffer.ToString();
+ }
+
+ /// <summary>Render an explanation as HTML. </summary>
+ public virtual System.String ToHtml()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("<list>\n");
+
+ buffer.Append("<item>");
+ buffer.Append(Summary);
+ buffer.Append("<br />\n");
+
+ Explanation[] details = GetDetails();
+ if (details != null)
+ {
+ for (int i = 0; i < details.Length; i++)
+ {
+ buffer.Append(details[i].ToHtml());
+ }
+ }
+
+ buffer.Append("</item>\n");
+ buffer.Append("</list>\n");
+
+ return buffer.ToString();
+ }
+
+ /// <summary> Small Util class used to pass both an idf factor as well as an
+ /// explanation for that factor.
+ ///
+ /// This class will likely be held on a <see cref="Weight" />, so be aware
+ /// before storing any large or un-serializable fields.
+ ///
+ /// </summary>
+ [Serializable]
+ public abstract class IDFExplanation
+ {
+ /// <value> the idf factor </value>
+ public abstract float Idf { get; }
+
+ /// <summary> This should be calculated lazily if possible.
+ ///
+ /// </summary>
+ /// <returns> the explanation for the idf factor.
+ /// </returns>
+ public abstract System.String Explain();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldCache.cs b/src/core/Search/FieldCache.cs
new file mode 100644
index 0000000..e0ac588
--- /dev/null
+++ b/src/core/Search/FieldCache.cs
@@ -0,0 +1,708 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Support;
+using Double = Lucene.Net.Support.Double;
+using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
+using NumericField = Lucene.Net.Documents.NumericField;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator;
+using Single = Lucene.Net.Support.Single;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: Maintains caches of term values.
+ ///
+ /// <p/>Created: May 19, 2004 11:13:14 AM
+ ///
+ /// </summary>
+ /// <since> lucene 1.4
+ /// </since>
+ /// <version> $Id: FieldCache.java 807841 2009-08-25 22:27:31Z markrmiller $
+ /// </version>
+ /// <seealso cref="Lucene.Net.Util.FieldCacheSanityChecker">
+ /// </seealso>
+ public sealed class CreationPlaceholder
+ {
+ internal System.Object value_Renamed;
+ }
+ /// <summary>Expert: Stores term text values and document ordering data. </summary>
+ public class StringIndex
+ {
+
+ public virtual int BinarySearchLookup(System.String key)
+ {
+ // this special case is the reason that Arrays.binarySearch() isn't useful.
+ if (key == null)
+ return 0;
+
+ int low = 1;
+ int high = lookup.Length - 1;
+
+ while (low <= high)
+ {
+ int mid = Number.URShift((low + high), 1);
+ int cmp = String.CompareOrdinal(lookup[mid], key);
+
+ if (cmp < 0)
+ low = mid + 1;
+ else if (cmp > 0)
+ high = mid - 1;
+ else
+ return mid; // key found
+ }
+ return -(low + 1); // key not found.
+ }
+
+ /// <summary>All the term values, in natural order. </summary>
+ public System.String[] lookup;
+
+ /// <summary>For each document, an index into the lookup array. </summary>
+ public int[] order;
+
+ /// <summary>Creates one of these objects </summary>
+ public StringIndex(int[] values, System.String[] lookup)
+ {
+ this.order = values;
+ this.lookup = lookup;
+ }
+ }
+ /// <summary> EXPERT: A unique Identifier/Description for each item in the FieldCache.
+ /// Can be useful for logging/debugging.
+ /// <p/>
+ /// <b>EXPERIMENTAL API:</b> This API is considered extremely advanced
+ /// and experimental. It may be removed or altered w/o warning in future
+ /// releases
+ /// of Lucene.
+ /// <p/>
+ /// </summary>
+ public abstract class CacheEntry
+ {
+ public abstract object ReaderKey { get; }
+ public abstract string FieldName { get; }
+ public abstract Type CacheType { get; }
+ public abstract object Custom { get; }
+ public abstract object Value { get; }
+
+ /// <seealso cref="EstimateSize(RamUsageEstimator)">
+ /// </seealso>
+ public virtual void EstimateSize()
+ {
+ EstimateSize(new RamUsageEstimator(false)); // doesn't check for interned
+ }
+ /// <summary> Computes (and stores) the estimated size of the cache Value </summary>
+ /// <seealso cref="EstimatedSize">
+ /// </seealso>
+ public virtual void EstimateSize(RamUsageEstimator ramCalc)
+ {
+ long size = ramCalc.EstimateRamUsage(Value);
+ EstimatedSize = RamUsageEstimator.HumanReadableUnits(size, new System.Globalization.NumberFormatInfo()); // {{Aroush-2.9}} in Java, the formater is set to "0.#", so we need to do the same in C#
+ }
+
+ /// <summary> The most recently estimated size of the value, null unless
+ /// estimateSize has been called.
+ /// </summary>
+ public string EstimatedSize { get; protected internal set; }
+
+
+ public override System.String ToString()
+ {
+ var b = new System.Text.StringBuilder();
+ b.Append("'").Append(ReaderKey).Append("'=>");
+ b.Append("'").Append(FieldName).Append("',");
+ b.Append(CacheType).Append(",").Append(Custom);
+ b.Append("=>").Append(Value.GetType().FullName).Append("#");
+ b.Append(Value.GetHashCode());
+
+ System.String s = EstimatedSize;
+ if (null != s)
+ {
+ b.Append(" (size =~ ").Append(s).Append(')');
+ }
+
+ return b.ToString();
+ }
+ }
+ public struct FieldCache_Fields
+ {
+ /// <summary>Indicator for StringIndex values in the cache. </summary>
+ // NOTE: the value assigned to this constant must not be
+ // the same as any of those in SortField!!
+ public readonly static int STRING_INDEX = -1;
+ /// <summary>Expert: The cache used internally by sorting and range query classes. </summary>
+ public readonly static FieldCache DEFAULT;
+ /// <summary>The default parser for byte values, which are encoded by <see cref="byte.ToString()" /> </summary>
+ public readonly static ByteParser DEFAULT_BYTE_PARSER;
+ /// <summary>The default parser for short values, which are encoded by <see cref="short.ToString()" /> </summary>
+ public readonly static ShortParser DEFAULT_SHORT_PARSER;
+ /// <summary>The default parser for int values, which are encoded by <see cref="int.ToString()" /> </summary>
+ public readonly static IntParser DEFAULT_INT_PARSER;
+ /// <summary>The default parser for float values, which are encoded by <see cref="float.ToString()" /> </summary>
+ public readonly static FloatParser DEFAULT_FLOAT_PARSER;
+ /// <summary>The default parser for long values, which are encoded by <see cref="long.ToString()" /> </summary>
+ public readonly static LongParser DEFAULT_LONG_PARSER;
+ /// <summary>The default parser for double values, which are encoded by <see cref="double.ToString()" /> </summary>
+ public readonly static DoubleParser DEFAULT_DOUBLE_PARSER;
+ /// <summary> A parser instance for int values encoded by <see cref="NumericUtils.IntToPrefixCoded(int)" />, e.g. when indexed
+ /// via <see cref="NumericField" />/<see cref="NumericTokenStream" />.
+ /// </summary>
+ public readonly static IntParser NUMERIC_UTILS_INT_PARSER;
+ /// <summary> A parser instance for float values encoded with <see cref="NumericUtils" />, e.g. when indexed
+ /// via <see cref="NumericField" />/<see cref="NumericTokenStream" />.
+ /// </summary>
+ public readonly static FloatParser NUMERIC_UTILS_FLOAT_PARSER;
+ /// <summary> A parser instance for long values encoded by <see cref="NumericUtils.LongToPrefixCoded(long)" />, e.g. when indexed
+ /// via <see cref="NumericField" />/<see cref="NumericTokenStream" />.
+ /// </summary>
+ public readonly static LongParser NUMERIC_UTILS_LONG_PARSER;
+ /// <summary> A parser instance for double values encoded with <see cref="NumericUtils" />, e.g. when indexed
+ /// via <see cref="NumericField" />/<see cref="NumericTokenStream" />.
+ /// </summary>
+ public readonly static DoubleParser NUMERIC_UTILS_DOUBLE_PARSER;
+ static FieldCache_Fields()
+ {
+ DEFAULT = new FieldCacheImpl();
+ DEFAULT_BYTE_PARSER = new AnonymousClassByteParser();
+ DEFAULT_SHORT_PARSER = new AnonymousClassShortParser();
+ DEFAULT_INT_PARSER = new AnonymousClassIntParser();
+ DEFAULT_FLOAT_PARSER = new AnonymousClassFloatParser();
+ DEFAULT_LONG_PARSER = new AnonymousClassLongParser();
+ DEFAULT_DOUBLE_PARSER = new AnonymousClassDoubleParser();
+ NUMERIC_UTILS_INT_PARSER = new AnonymousClassIntParser1();
+ NUMERIC_UTILS_FLOAT_PARSER = new AnonymousClassFloatParser1();
+ NUMERIC_UTILS_LONG_PARSER = new AnonymousClassLongParser1();
+ NUMERIC_UTILS_DOUBLE_PARSER = new AnonymousClassDoubleParser1();
+ }
+ }
+
+ [Serializable]
+ class AnonymousClassByteParser : ByteParser
+ {
+ public virtual sbyte ParseByte(System.String value_Renamed)
+ {
+ return System.SByte.Parse(value_Renamed);
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.DEFAULT_BYTE_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".DEFAULT_BYTE_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassShortParser : ShortParser
+ {
+ public virtual short ParseShort(System.String value_Renamed)
+ {
+ return System.Int16.Parse(value_Renamed);
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".DEFAULT_SHORT_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassIntParser : IntParser
+ {
+ public virtual int ParseInt(System.String value_Renamed)
+ {
+ return System.Int32.Parse(value_Renamed);
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.DEFAULT_INT_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".DEFAULT_INT_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassFloatParser : FloatParser
+ {
+ public virtual float ParseFloat(System.String value_Renamed)
+ {
+ try
+ {
+ return Single.Parse(value_Renamed);
+ }
+ catch (System.OverflowException)
+ {
+ return value_Renamed.StartsWith("-") ? float.PositiveInfinity : float.NegativeInfinity;
+ }
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".DEFAULT_FLOAT_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassLongParser : LongParser
+ {
+ public virtual long ParseLong(System.String value_Renamed)
+ {
+ return System.Int64.Parse(value_Renamed);
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.DEFAULT_LONG_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".DEFAULT_LONG_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassDoubleParser : DoubleParser
+ {
+ public virtual double ParseDouble(System.String value_Renamed)
+ {
+ return Double.Parse(value_Renamed);
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.DEFAULT_DOUBLE_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".DEFAULT_DOUBLE_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassIntParser1 : IntParser
+ {
+ public virtual int ParseInt(System.String val)
+ {
+ int shift = val[0] - NumericUtils.SHIFT_START_INT;
+ if (shift > 0 && shift <= 31)
+ throw new FieldCacheImpl.StopFillCacheException();
+ return NumericUtils.PrefixCodedToInt(val);
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_INT_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".NUMERIC_UTILS_INT_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassFloatParser1 : FloatParser
+ {
+ public virtual float ParseFloat(System.String val)
+ {
+ int shift = val[0] - NumericUtils.SHIFT_START_INT;
+ if (shift > 0 && shift <= 31)
+ throw new FieldCacheImpl.StopFillCacheException();
+ return NumericUtils.SortableIntToFloat(NumericUtils.PrefixCodedToInt(val));
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".NUMERIC_UTILS_FLOAT_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassLongParser1 : LongParser
+ {
+ public virtual long ParseLong(System.String val)
+ {
+ int shift = val[0] - NumericUtils.SHIFT_START_LONG;
+ if (shift > 0 && shift <= 63)
+ throw new FieldCacheImpl.StopFillCacheException();
+ return NumericUtils.PrefixCodedToLong(val);
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_LONG_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".NUMERIC_UTILS_LONG_PARSER";
+ }
+ }
+ [Serializable]
+ class AnonymousClassDoubleParser1 : DoubleParser
+ {
+ public virtual double ParseDouble(System.String val)
+ {
+ int shift = val[0] - NumericUtils.SHIFT_START_LONG;
+ if (shift > 0 && shift <= 63)
+ throw new FieldCacheImpl.StopFillCacheException();
+ return NumericUtils.SortableLongToDouble(NumericUtils.PrefixCodedToLong(val));
+ }
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_DOUBLE_PARSER;
+ }
+ public override System.String ToString()
+ {
+ return typeof(FieldCache).FullName + ".NUMERIC_UTILS_DOUBLE_PARSER";
+ }
+ }
+
+ public interface FieldCache
+ {
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none is
+ /// found, reads the terms in <c>field</c> as a single byte and returns an array
+ /// of size <c>reader.MaxDoc</c> of the value each document
+ /// has in the given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the single byte values.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ sbyte[] GetBytes(IndexReader reader, System.String field);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none is found,
+ /// reads the terms in <c>field</c> as bytes and returns an array of
+ /// size <c>reader.MaxDoc</c> of the value each document has in the
+ /// given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the bytes.
+ /// </param>
+ /// <param name="parser"> Computes byte for string values.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ sbyte[] GetBytes(IndexReader reader, System.String field, ByteParser parser);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none is
+ /// found, reads the terms in <c>field</c> as shorts and returns an array
+ /// of size <c>reader.MaxDoc</c> of the value each document
+ /// has in the given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the shorts.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ short[] GetShorts(IndexReader reader, System.String field);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none is found,
+ /// reads the terms in <c>field</c> as shorts and returns an array of
+ /// size <c>reader.MaxDoc</c> of the value each document has in the
+ /// given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the shorts.
+ /// </param>
+ /// <param name="parser"> Computes short for string values.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ short[] GetShorts(IndexReader reader, System.String field, ShortParser parser);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none is
+ /// found, reads the terms in <c>field</c> as integers and returns an array
+ /// of size <c>reader.MaxDoc</c> of the value each document
+ /// has in the given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the integers.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ int[] GetInts(IndexReader reader, System.String field);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none is found,
+ /// reads the terms in <c>field</c> as integers and returns an array of
+ /// size <c>reader.MaxDoc</c> of the value each document has in the
+ /// given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the integers.
+ /// </param>
+ /// <param name="parser"> Computes integer for string values.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ int[] GetInts(IndexReader reader, System.String field, IntParser parser);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if
+ /// none is found, reads the terms in <c>field</c> as floats and returns an array
+ /// of size <c>reader.MaxDoc</c> of the value each document
+ /// has in the given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the floats.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ float[] GetFloats(IndexReader reader, System.String field);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if
+ /// none is found, reads the terms in <c>field</c> as floats and returns an array
+ /// of size <c>reader.MaxDoc</c> of the value each document
+ /// has in the given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the floats.
+ /// </param>
+ /// <param name="parser"> Computes float for string values.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ float[] GetFloats(IndexReader reader, System.String field, FloatParser parser);
+
+ /// <summary> Checks the internal cache for an appropriate entry, and if none is
+ /// found, reads the terms in <c>field</c> as longs and returns an array
+ /// of size <c>reader.MaxDoc</c> of the value each document
+ /// has in the given field.
+ ///
+ /// </summary>
+ /// <param name="reader">Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the longs.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> java.io.IOException If any error occurs. </throws>
+ long[] GetLongs(IndexReader reader, System.String field);
+
+ /// <summary> Checks the internal cache for an appropriate entry, and if none is found,
+ /// reads the terms in <c>field</c> as longs and returns an array of
+ /// size <c>reader.MaxDoc</c> of the value each document has in the
+ /// given field.
+ ///
+ /// </summary>
+ /// <param name="reader">Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the longs.
+ /// </param>
+ /// <param name="parser">Computes integer for string values.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ long[] GetLongs(IndexReader reader, System.String field, LongParser parser);
+
+
+ /// <summary> Checks the internal cache for an appropriate entry, and if none is
+ /// found, reads the terms in <c>field</c> as integers and returns an array
+ /// of size <c>reader.MaxDoc</c> of the value each document
+ /// has in the given field.
+ ///
+ /// </summary>
+ /// <param name="reader">Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the doubles.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ double[] GetDoubles(IndexReader reader, System.String field);
+
+ /// <summary> Checks the internal cache for an appropriate entry, and if none is found,
+ /// reads the terms in <c>field</c> as doubles and returns an array of
+ /// size <c>reader.MaxDoc</c> of the value each document has in the
+ /// given field.
+ ///
+ /// </summary>
+ /// <param name="reader">Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the doubles.
+ /// </param>
+ /// <param name="parser">Computes integer for string values.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ double[] GetDoubles(IndexReader reader, System.String field, DoubleParser parser);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none
+ /// is found, reads the term values in <c>field</c> and returns an array
+ /// of size <c>reader.MaxDoc</c> containing the value each document
+ /// has in the given field.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the strings.
+ /// </param>
+ /// <returns> The values in the given field for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ System.String[] GetStrings(IndexReader reader, System.String field);
+
+ /// <summary>Checks the internal cache for an appropriate entry, and if none
+ /// is found reads the term values in <c>field</c> and returns
+ /// an array of them in natural order, along with an array telling
+ /// which element in the term array each document uses.
+ /// </summary>
+ /// <param name="reader"> Used to get field values.
+ /// </param>
+ /// <param name="field"> Which field contains the strings.
+ /// </param>
+ /// <returns> Array of terms and index into the array for each document.
+ /// </returns>
+ /// <throws> IOException If any error occurs. </throws>
+ StringIndex GetStringIndex(IndexReader reader, System.String field);
+
+ /// <summary> EXPERT: Generates an array of CacheEntry objects representing all items
+ /// currently in the FieldCache.
+ /// <p/>
+ /// NOTE: These CacheEntry objects maintain a strong refrence to the
+ /// Cached Values. Maintaining refrences to a CacheEntry the IndexReader
+ /// associated with it has garbage collected will prevent the Value itself
+ /// from being garbage collected when the Cache drops the WeakRefrence.
+ /// <p/>
+ /// <p/>
+ /// <b>EXPERIMENTAL API:</b> This API is considered extremely advanced
+ /// and experimental. It may be removed or altered w/o warning in future
+ /// releases
+ /// of Lucene.
+ /// <p/>
+ /// </summary>
+ CacheEntry[] GetCacheEntries();
+
+ /// <summary> <p/>
+ /// EXPERT: Instructs the FieldCache to forcibly expunge all entries
+ /// from the underlying caches. This is intended only to be used for
+ /// test methods as a way to ensure a known base state of the Cache
+ /// (with out needing to rely on GC to free WeakReferences).
+ /// It should not be relied on for "Cache maintenance" in general
+ /// application code.
+ /// <p/>
+ /// <p/>
+ /// <b>EXPERIMENTAL API:</b> This API is considered extremely advanced
+ /// and experimental. It may be removed or altered w/o warning in future
+ /// releases
+ /// of Lucene.
+ /// <p/>
+ /// </summary>
+ void PurgeAllCaches();
+
+ /// <summary>
+ /// Expert: drops all cache entries associated with this
+ /// reader. NOTE: this reader must precisely match the
+ /// reader that the cache entry is keyed on. If you pass a
+ /// top-level reader, it usually will have no effect as
+ /// Lucene now caches at the segment reader level.
+ /// </summary>
+ void Purge(IndexReader r);
+
+ /// <summary> Gets or sets the InfoStream for this FieldCache.
+ /// <para>If non-null, FieldCacheImpl will warn whenever
+ /// entries are created that are not sane according to
+ /// <see cref="Lucene.Net.Util.FieldCacheSanityChecker" />.
+ /// </para>
+ /// </summary>
+ StreamWriter InfoStream { get; set; }
+ }
+
+ /// <summary> Marker interface as super-interface to all parsers. It
+ /// is used to specify a custom parser to <see cref="SortField(String, Parser)" />.
+ /// </summary>
+ public interface Parser
+ {
+ }
+
+ /// <summary>Interface to parse bytes from document fields.</summary>
+ /// <seealso cref="FieldCache.GetBytes(IndexReader, String, ByteParser)">
+ /// </seealso>
+ public interface ByteParser : Parser
+ {
+ /// <summary>Return a single Byte representation of this field's value. </summary>
+ sbyte ParseByte(System.String string_Renamed);
+ }
+
+ /// <summary>Interface to parse shorts from document fields.</summary>
+ /// <seealso cref="FieldCache.GetShorts(IndexReader, String, ShortParser)">
+ /// </seealso>
+ public interface ShortParser : Parser
+ {
+ /// <summary>Return a short representation of this field's value. </summary>
+ short ParseShort(System.String string_Renamed);
+ }
+
+ /// <summary>Interface to parse ints from document fields.</summary>
+ /// <seealso cref="FieldCache.GetInts(IndexReader, String, IntParser)">
+ /// </seealso>
+ public interface IntParser : Parser
+ {
+ /// <summary>Return an integer representation of this field's value. </summary>
+ int ParseInt(System.String string_Renamed);
+ }
+
+ /// <summary>Interface to parse floats from document fields.</summary>
+ /// <seealso cref="FieldCache.GetFloats(IndexReader, String, FloatParser)">
+ /// </seealso>
+ public interface FloatParser : Parser
+ {
+ /// <summary>Return an float representation of this field's value. </summary>
+ float ParseFloat(System.String string_Renamed);
+ }
+
+ /// <summary>Interface to parse long from document fields.</summary>
+ /// <seealso cref="FieldCache.GetLongs(IndexReader, String, LongParser)">
+ /// </seealso>
+ /// <deprecated> Use <see cref="LongParser" />, this will be removed in Lucene 3.0
+ /// </deprecated>
+ public interface LongParser : Parser
+ {
+ /// <summary>Return an long representation of this field's value. </summary>
+ long ParseLong(System.String string_Renamed);
+ }
+
+ /// <summary>Interface to parse doubles from document fields.</summary>
+ /// <seealso cref="FieldCache.GetDoubles(IndexReader, String, DoubleParser)">
+ /// </seealso>
+ /// <deprecated> Use <see cref="DoubleParser" />, this will be removed in Lucene 3.0
+ /// </deprecated>
+ public interface DoubleParser : Parser
+ {
+ /// <summary>Return an long representation of this field's value. </summary>
+ double ParseDouble(System.String string_Renamed);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldCacheImpl.cs b/src/core/Search/FieldCacheImpl.cs
new file mode 100644
index 0000000..6c24dcc
--- /dev/null
+++ b/src/core/Search/FieldCacheImpl.cs
@@ -0,0 +1,858 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Lucene.Net.Support;
+using NumericField = Lucene.Net.Documents.NumericField;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using TermDocs = Lucene.Net.Index.TermDocs;
+using TermEnum = Lucene.Net.Index.TermEnum;
+using FieldCacheSanityChecker = Lucene.Net.Util.FieldCacheSanityChecker;
+using Single = Lucene.Net.Support.Single;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: The default cache implementation, storing all values in memory.
+ /// A WeakDictionary is used for storage.
+ ///
+ /// <p/>Created: May 19, 2004 4:40:36 PM
+ ///
+ /// </summary>
+ /// <since> lucene 1.4
+ /// </since>
+ class FieldCacheImpl : FieldCache
+ {
+ private IDictionary<Type, Cache> caches;
+
+ internal FieldCacheImpl()
+ {
+ Init();
+ }
+ private void Init()
+ {
+ lock (this)
+ {
+ caches = new HashMap<Type, Cache>(7);
+ caches[typeof(sbyte)] = new ByteCache(this);
+ caches[typeof(short)] = new ShortCache(this);
+ caches[typeof(int)] = new IntCache(this);
+ caches[typeof(float)] = new FloatCache(this);
+ caches[typeof(long)] = new LongCache(this);
+ caches[typeof(double)] = new DoubleCache(this);
+ caches[typeof(string)] = new StringCache(this);
+ caches[typeof(StringIndex)] = new StringIndexCache(this);
+ }
+ }
+
+ // lucene.net: java version 3.0.3 with patch in rev. 912330 applied:
+ // uschindler 21/02/2010 12:16:42 LUCENE-2273: Fixed bug in FieldCacheImpl.getCacheEntries() that used
+ // WeakHashMap incorrectly and lead to ConcurrentModificationException
+ public virtual void PurgeAllCaches()
+ {
+ lock (this)
+ {
+ Init();
+ }
+ }
+
+ // lucene.net: java version 3.0.3 with patch in rev. 912330 applied:
+ // uschindler 21/02/2010 12:16:42 LUCENE-2273: Fixed bug in FieldCacheImpl.getCacheEntries() that used
+ // WeakHashMap incorrectly and lead to ConcurrentModificationException
+ public void Purge(IndexReader r)
+ {
+ lock (this)
+ {
+ foreach (Cache c in caches.Values)
+ {
+ c.Purge(r);
+ }
+ }
+ }
+
+ // lucene.net: java version 3.0.3 with patch in rev. 912330 applied:
+ // uschindler 21/02/2010 12:16:42 LUCENE-2273: Fixed bug in FieldCacheImpl.getCacheEntries() that used
+ // WeakHashMap incorrectly and lead to ConcurrentModificationException
+ public virtual CacheEntry[] GetCacheEntries()
+ {
+ lock (this)
+ {
+ IList<CacheEntry> result = new List<CacheEntry>(17);
+ foreach (var cacheEntry in caches)
+ {
+ var cache = cacheEntry.Value;
+ var cacheType = cacheEntry.Key;
+ lock (cache.readerCache)
+ {
+ foreach (var readerCacheEntry in cache.readerCache)
+ {
+ var readerKey = readerCacheEntry.Key;
+ var innerCache = readerCacheEntry.Value;
+ foreach (var mapEntry in innerCache)
+ {
+ Entry entry = mapEntry.Key;
+ result.Add(new CacheEntryImpl(readerKey, entry.field, cacheType, entry.custom, mapEntry.Value));
+ }
+ }
+ }
+ }
+ return result.ToArray();
+ }
+ }
+
+ private sealed class CacheEntryImpl : CacheEntry
+ {
+ private System.Object readerKey;
+ private System.String fieldName;
+ private System.Type cacheType;
+ private System.Object custom;
+ private System.Object value;
+ internal CacheEntryImpl(System.Object readerKey, System.String fieldName, System.Type cacheType, System.Object custom, System.Object value)
+ {
+ this.readerKey = readerKey;
+ this.fieldName = fieldName;
+ this.cacheType = cacheType;
+ this.custom = custom;
+ this.value = value;
+
+ // :HACK: for testing.
+ // if (null != locale || SortField.CUSTOM != sortFieldType) {
+ // throw new RuntimeException("Locale/sortFieldType: " + this);
+ // }
+ }
+
+ public override object ReaderKey
+ {
+ get { return readerKey; }
+ }
+
+ public override string FieldName
+ {
+ get { return fieldName; }
+ }
+
+ public override Type CacheType
+ {
+ get { return cacheType; }
+ }
+
+ public override object Custom
+ {
+ get { return custom; }
+ }
+
+ public override object Value
+ {
+ get { return value; }
+ }
+ }
+
+ /// <summary> Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops
+ /// processing terms and returns the current FieldCache
+ /// array.
+ /// </summary>
+ [Serializable]
+ internal sealed class StopFillCacheException:System.SystemException
+ {
+ }
+
+ /// <summary>Expert: Internal cache. </summary>
+ internal abstract class Cache
+ {
+ internal Cache()
+ {
+ this.wrapper = null;
+ }
+
+ internal Cache(FieldCache wrapper)
+ {
+ this.wrapper = wrapper;
+ }
+
+ internal FieldCache wrapper;
+
+ internal IDictionary<object, IDictionary<Entry, object>> readerCache = new WeakDictionary<object, IDictionary<Entry, object>>();
+
+ protected internal abstract System.Object CreateValue(IndexReader reader, Entry key);
+
+ /* Remove this reader from the cache, if present. */
+ public void Purge(IndexReader r)
+ {
+ object readerKey = r.FieldCacheKey;
+ lock (readerCache)
+ {
+ readerCache.Remove(readerKey);
+ }
+ }
+
+ public virtual System.Object Get(IndexReader reader, Entry key)
+ {
+ IDictionary<Entry, object> innerCache;
+ System.Object value;
+ System.Object readerKey = reader.FieldCacheKey;
+ lock (readerCache)
+ {
+ innerCache = readerCache[readerKey];
+ if (innerCache == null)
+ {
+ innerCache = new HashMap<Entry, object>();
+ readerCache[readerKey] = innerCache;
+ value = null;
+ }
+ else
+ {
+ value = innerCache[key];
+ }
+ if (value == null)
+ {
+ value = new CreationPlaceholder();
+ innerCache[key] = value;
+ }
+ }
+ if (value is CreationPlaceholder)
+ {
+ lock (value)
+ {
+ CreationPlaceholder progress = (CreationPlaceholder) value;
+ if (progress.value_Renamed == null)
+ {
+ progress.value_Renamed = CreateValue(reader, key);
+ lock (readerCache)
+ {
+ innerCache[key] = progress.value_Renamed;
+ }
+
+ // Only check if key.custom (the parser) is
+ // non-null; else, we check twice for a single
+ // call to FieldCache.getXXX
+ if (key.custom != null && wrapper != null)
+ {
+ System.IO.StreamWriter infoStream = wrapper.InfoStream;
+ if (infoStream != null)
+ {
+ PrintNewInsanity(infoStream, progress.value_Renamed);
+ }
+ }
+ }
+ return progress.value_Renamed;
+ }
+ }
+ return value;
+ }
+
+ private void PrintNewInsanity(System.IO.StreamWriter infoStream, System.Object value_Renamed)
+ {
+ FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.CheckSanity(wrapper);
+ for (int i = 0; i < insanities.Length; i++)
+ {
+ FieldCacheSanityChecker.Insanity insanity = insanities[i];
+ CacheEntry[] entries = insanity.GetCacheEntries();
+ for (int j = 0; j < entries.Length; j++)
+ {
+ if (entries[j].Value == value_Renamed)
+ {
+ // OK this insanity involves our entry
+ infoStream.WriteLine("WARNING: new FieldCache insanity created\nDetails: " + insanity.ToString());
+ infoStream.WriteLine("\nStack:\n");
+ infoStream.WriteLine(new System.Exception());
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /// <summary>Expert: Every composite-key in the internal cache is of this type. </summary>
+ protected internal class Entry
+ {
+ internal System.String field; // which Fieldable
+ internal System.Object custom; // which custom comparator or parser
+
+ /// <summary>Creates one of these objects for a custom comparator/parser. </summary>
+ internal Entry(System.String field, System.Object custom)
+ {
+ this.field = StringHelper.Intern(field);
+ this.custom = custom;
+ }
+
+ /// <summary>Two of these are equal iff they reference the same field and type. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (o is Entry)
+ {
+ Entry other = (Entry) o;
+ if (other.field == field)
+ {
+ if (other.custom == null)
+ {
+ if (custom == null)
+ return true;
+ }
+ else if (other.custom.Equals(custom))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /// <summary>Composes a hashcode based on the field and type. </summary>
+ public override int GetHashCode()
+ {
+ return field.GetHashCode() ^ (custom == null?0:custom.GetHashCode());
+ }
+ }
+
+ // inherit javadocs
+ public virtual sbyte[] GetBytes(IndexReader reader, System.String field)
+ {
+ return GetBytes(reader, field, null);
+ }
+
+ // inherit javadocs
+ public virtual sbyte[] GetBytes(IndexReader reader, System.String field, ByteParser parser)
+ {
+ return (sbyte[]) caches[typeof(sbyte)].Get(reader, new Entry(field, parser));
+ }
+
+ internal sealed class ByteCache:Cache
+ {
+ internal ByteCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ Entry entry = entryKey;
+ System.String field = entry.field;
+ ByteParser parser = (ByteParser) entry.custom;
+ if (parser == null)
+ {
+ return wrapper.GetBytes(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_BYTE_PARSER);
+ }
+ sbyte[] retArray = new sbyte[reader.MaxDoc];
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || (System.Object) term.Field != (System.Object) field)
+ break;
+ sbyte termval = parser.ParseByte(term.Text);
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = termval;
+ }
+ }
+ while (termEnum.Next());
+ }
+ catch (StopFillCacheException)
+ {
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+ return retArray;
+ }
+ }
+
+
+ // inherit javadocs
+ public virtual short[] GetShorts(IndexReader reader, System.String field)
+ {
+ return GetShorts(reader, field, null);
+ }
+
+ // inherit javadocs
+ public virtual short[] GetShorts(IndexReader reader, System.String field, ShortParser parser)
+ {
+ return (short[]) caches[typeof(short)].Get(reader, new Entry(field, parser));
+ }
+
+ internal sealed class ShortCache:Cache
+ {
+ internal ShortCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ Entry entry = entryKey;
+ System.String field = entry.field;
+ ShortParser parser = (ShortParser) entry.custom;
+ if (parser == null)
+ {
+ return wrapper.GetShorts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER);
+ }
+ short[] retArray = new short[reader.MaxDoc];
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || (System.Object) term.Field != (System.Object) field)
+ break;
+ short termval = parser.ParseShort(term.Text);
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = termval;
+ }
+ }
+ while (termEnum.Next());
+ }
+ catch (StopFillCacheException)
+ {
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+ return retArray;
+ }
+ }
+
+
+ // inherit javadocs
+ public virtual int[] GetInts(IndexReader reader, System.String field)
+ {
+ return GetInts(reader, field, null);
+ }
+
+ // inherit javadocs
+ public virtual int[] GetInts(IndexReader reader, System.String field, IntParser parser)
+ {
+ return (int[]) caches[typeof(int)].Get(reader, new Entry(field, parser));
+ }
+
+ internal sealed class IntCache:Cache
+ {
+ internal IntCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ Entry entry = entryKey;
+ System.String field = entry.field;
+ IntParser parser = (IntParser) entry.custom;
+ if (parser == null)
+ {
+ try
+ {
+ return wrapper.GetInts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_INT_PARSER);
+ }
+ catch (System.FormatException)
+ {
+ return wrapper.GetInts(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_INT_PARSER);
+ }
+ }
+ int[] retArray = null;
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || (System.Object) term.Field != (System.Object) field)
+ break;
+ int termval = parser.ParseInt(term.Text);
+ if (retArray == null)
+ // late init
+ retArray = new int[reader.MaxDoc];
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = termval;
+ }
+ }
+ while (termEnum.Next());
+ }
+ catch (StopFillCacheException)
+ {
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+ if (retArray == null)
+ // no values
+ retArray = new int[reader.MaxDoc];
+ return retArray;
+ }
+ }
+
+
+
+ // inherit javadocs
+ public virtual float[] GetFloats(IndexReader reader, System.String field)
+ {
+ return GetFloats(reader, field, null);
+ }
+
+ // inherit javadocs
+ public virtual float[] GetFloats(IndexReader reader, System.String field, FloatParser parser)
+ {
+
+ return (float[]) caches[typeof(float)].Get(reader, new Entry(field, parser));
+ }
+
+ internal sealed class FloatCache:Cache
+ {
+ internal FloatCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ Entry entry = entryKey;
+ System.String field = entry.field;
+ FloatParser parser = (FloatParser) entry.custom;
+ if (parser == null)
+ {
+ try
+ {
+ return wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER);
+ }
+ catch (System.FormatException)
+ {
+ return wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER);
+ }
+ }
+ float[] retArray = null;
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || (System.Object) term.Field != (System.Object) field)
+ break;
+ float termval = parser.ParseFloat(term.Text);
+ if (retArray == null)
+ // late init
+ retArray = new float[reader.MaxDoc];
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = termval;
+ }
+ }
+ while (termEnum.Next());
+ }
+ catch (StopFillCacheException)
+ {
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+ if (retArray == null)
+ // no values
+ retArray = new float[reader.MaxDoc];
+ return retArray;
+ }
+ }
+
+
+
+ public virtual long[] GetLongs(IndexReader reader, System.String field)
+ {
+ return GetLongs(reader, field, null);
+ }
+
+ // inherit javadocs
+ public virtual long[] GetLongs(IndexReader reader, System.String field, Lucene.Net.Search.LongParser parser)
+ {
+ return (long[]) caches[typeof(long)].Get(reader, new Entry(field, parser));
+ }
+
+ internal sealed class LongCache:Cache
+ {
+ internal LongCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ Entry entry = entryKey;
+ System.String field = entry.field;
+ Lucene.Net.Search.LongParser parser = (Lucene.Net.Search.LongParser) entry.custom;
+ if (parser == null)
+ {
+ try
+ {
+ return wrapper.GetLongs(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_LONG_PARSER);
+ }
+ catch (System.FormatException)
+ {
+ return wrapper.GetLongs(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_LONG_PARSER);
+ }
+ }
+ long[] retArray = null;
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || (System.Object) term.Field != (System.Object) field)
+ break;
+ long termval = parser.ParseLong(term.Text);
+ if (retArray == null)
+ // late init
+ retArray = new long[reader.MaxDoc];
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = termval;
+ }
+ }
+ while (termEnum.Next());
+ }
+ catch (StopFillCacheException)
+ {
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+ if (retArray == null)
+ // no values
+ retArray = new long[reader.MaxDoc];
+ return retArray;
+ }
+ }
+
+
+ // inherit javadocs
+ public virtual double[] GetDoubles(IndexReader reader, System.String field)
+ {
+ return GetDoubles(reader, field, null);
+ }
+
+ // inherit javadocs
+ public virtual double[] GetDoubles(IndexReader reader, System.String field, Lucene.Net.Search.DoubleParser parser)
+ {
+ return (double[]) caches[typeof(double)].Get(reader, new Entry(field, parser));
+ }
+
+ internal sealed class DoubleCache:Cache
+ {
+ internal DoubleCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ Entry entry = entryKey;
+ System.String field = entry.field;
+ Lucene.Net.Search.DoubleParser parser = (Lucene.Net.Search.DoubleParser) entry.custom;
+ if (parser == null)
+ {
+ try
+ {
+ return wrapper.GetDoubles(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_DOUBLE_PARSER);
+ }
+ catch (System.FormatException)
+ {
+ return wrapper.GetDoubles(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_DOUBLE_PARSER);
+ }
+ }
+ double[] retArray = null;
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || (System.Object) term.Field != (System.Object) field)
+ break;
+ double termval = parser.ParseDouble(term.Text);
+ if (retArray == null)
+ // late init
+ retArray = new double[reader.MaxDoc];
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = termval;
+ }
+ }
+ while (termEnum.Next());
+ }
+ catch (StopFillCacheException)
+ {
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+ if (retArray == null)
+ // no values
+ retArray = new double[reader.MaxDoc];
+ return retArray;
+ }
+ }
+
+
+ // inherit javadocs
+ public virtual System.String[] GetStrings(IndexReader reader, System.String field)
+ {
+ return (System.String[]) caches[typeof(string)].Get(reader, new Entry(field, (Parser) null));
+ }
+
+ internal sealed class StringCache:Cache
+ {
+ internal StringCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ System.String field = StringHelper.Intern(entryKey.field);
+ System.String[] retArray = new System.String[reader.MaxDoc];
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || (System.Object) term.Field != (System.Object) field)
+ break;
+ System.String termval = term.Text;
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = termval;
+ }
+ }
+ while (termEnum.Next());
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+ return retArray;
+ }
+ }
+
+
+ // inherit javadocs
+ public virtual StringIndex GetStringIndex(IndexReader reader, System.String field)
+ {
+ return (StringIndex) caches[typeof(StringIndex)].Get(reader, new Entry(field, (Parser) null));
+ }
+
+ internal sealed class StringIndexCache:Cache
+ {
+ internal StringIndexCache(FieldCache wrapper):base(wrapper)
+ {
+ }
+
+ protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
+ {
+ System.String field = StringHelper.Intern(entryKey.field);
+ int[] retArray = new int[reader.MaxDoc];
+ System.String[] mterms = new System.String[reader.MaxDoc + 1];
+ TermDocs termDocs = reader.TermDocs();
+ TermEnum termEnum = reader.Terms(new Term(field));
+ int t = 0; // current term number
+
+ // an entry for documents that have no terms in this field
+ // should a document with no terms be at top or bottom?
+ // this puts them at the top - if it is changed, FieldDocSortedHitQueue
+ // needs to change as well.
+ mterms[t++] = null;
+
+ try
+ {
+ do
+ {
+ Term term = termEnum.Term;
+ if (term == null || term.Field != field || t >= mterms.Length) break;
+
+ // store term text
+ mterms[t] = term.Text;
+
+ termDocs.Seek(termEnum);
+ while (termDocs.Next())
+ {
+ retArray[termDocs.Doc] = t;
+ }
+
+ t++;
+ }
+ while (termEnum.Next());
+ }
+ finally
+ {
+ termDocs.Close();
+ termEnum.Close();
+ }
+
+ if (t == 0)
+ {
+ // if there are no terms, make the term array
+ // have a single null entry
+ mterms = new System.String[1];
+ }
+ else if (t < mterms.Length)
+ {
+ // if there are less terms than documents,
+ // trim off the dead array space
+ System.String[] terms = new System.String[t];
+ Array.Copy(mterms, 0, terms, 0, t);
+ mterms = terms;
+ }
+
+ StringIndex value_Renamed = new StringIndex(retArray, mterms);
+ return value_Renamed;
+ }
+ }
+
+ private volatile System.IO.StreamWriter infoStream;
+
+ public virtual StreamWriter InfoStream
+ {
+ get { return infoStream; }
+ set { infoStream = value; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldCacheRangeFilter.cs b/src/core/Search/FieldCacheRangeFilter.cs
new file mode 100644
index 0000000..7fc1175
--- /dev/null
+++ b/src/core/Search/FieldCacheRangeFilter.cs
@@ -0,0 +1,964 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using NumericField = Lucene.Net.Documents.NumericField;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using TermDocs = Lucene.Net.Index.TermDocs;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A range filter built on top of a cached single term field (in <see cref="FieldCache" />).
+ ///
+ /// <p/><see cref="FieldCacheRangeFilter" /> builds a single cache for the field the first time it is used.
+ /// Each subsequent <see cref="FieldCacheRangeFilter" /> on the same field then reuses this cache,
+ /// even if the range itself changes.
+ ///
+ /// <p/>This means that <see cref="FieldCacheRangeFilter" /> is much faster (sometimes more than 100x as fast)
+ /// as building a <see cref="TermRangeFilter" /> if using a <see cref="NewStringRange" />. However, if the range never changes it
+ /// is slower (around 2x as slow) than building a CachingWrapperFilter on top of a single <see cref="TermRangeFilter" />.
+ ///
+ /// For numeric data types, this filter may be significantly faster than <see cref="NumericRangeFilter{T}" />.
+ /// Furthermore, it does not need the numeric values encoded by <see cref="NumericField" />. But
+ /// it has the problem that it only works with exact one value/document (see below).
+ ///
+ /// <p/>As with all <see cref="FieldCache" /> based functionality, <see cref="FieldCacheRangeFilter" /> is only valid for
+ /// fields which exact one term for each document (except for <see cref="NewStringRange" />
+ /// where 0 terms are also allowed). Due to a restriction of <see cref="FieldCache" />, for numeric ranges
+ /// all terms that do not have a numeric value, 0 is assumed.
+ ///
+ /// <p/>Thus it works on dates, prices and other single value fields but will not work on
+ /// regular text fields. It is preferable to use a <c>NOT_ANALYZED</c> field to ensure that
+ /// there is only a single term.
+ ///
+ /// <p/>This class does not have an constructor, use one of the static factory methods available,
+ /// that create a correct instance for different data types supported by <see cref="FieldCache" />.
+ /// </summary>
+
+ public static class FieldCacheRangeFilter
+ {
+ [Serializable]
+ private class AnonymousClassFieldCacheRangeFilter : FieldCacheRangeFilter<string>
+ {
+ private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet
+ {
+ private void InitBlock(Lucene.Net.Search.StringIndex fcsi, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter<string> enclosingInstance)
+ {
+ this.fcsi = fcsi;
+ this.inclusiveLowerPoint = inclusiveLowerPoint;
+ this.inclusiveUpperPoint = inclusiveUpperPoint;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Search.StringIndex fcsi;
+ private int inclusiveLowerPoint;
+ private int inclusiveUpperPoint;
+ private FieldCacheRangeFilter<string> enclosingInstance;
+ public FieldCacheRangeFilter<string> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFieldCacheDocIdSet(Lucene.Net.Search.StringIndex fcsi, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter<string> enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2)
+ : base(Param1, Param2)
+ {
+ InitBlock(fcsi, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance);
+ }
+ internal override bool MatchDoc(int doc)
+ {
+ return fcsi.order[doc] >= inclusiveLowerPoint && fcsi.order[doc] <= inclusiveUpperPoint;
+ }
+ }
+ internal AnonymousClassFieldCacheRangeFilter(string field, Lucene.Net.Search.Parser parser, string lowerVal, string upperVal, bool includeLower, bool includeUpper)
+ : base(field, parser, lowerVal, upperVal, includeLower, includeUpper)
+ {
+ }
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ Lucene.Net.Search.StringIndex fcsi = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStringIndex(reader, field);
+ int lowerPoint = fcsi.BinarySearchLookup(lowerVal);
+ int upperPoint = fcsi.BinarySearchLookup(upperVal);
+
+ int inclusiveLowerPoint;
+ int inclusiveUpperPoint;
+
+ // Hints:
+ // * binarySearchLookup returns 0, if value was null.
+ // * the value is <0 if no exact hit was found, the returned value
+ // is (-(insertion point) - 1)
+ if (lowerPoint == 0)
+ {
+ System.Diagnostics.Debug.Assert(lowerVal == null);
+ inclusiveLowerPoint = 1;
+ }
+ else if (includeLower && lowerPoint > 0)
+ {
+ inclusiveLowerPoint = lowerPoint;
+ }
+ else if (lowerPoint > 0)
+ {
+ inclusiveLowerPoint = lowerPoint + 1;
+ }
+ else
+ {
+ inclusiveLowerPoint = System.Math.Max(1, -lowerPoint - 1);
+ }
+
+ if (upperPoint == 0)
+ {
+ System.Diagnostics.Debug.Assert(upperVal == null);
+ inclusiveUpperPoint = System.Int32.MaxValue;
+ }
+ else if (includeUpper && upperPoint > 0)
+ {
+ inclusiveUpperPoint = upperPoint;
+ }
+ else if (upperPoint > 0)
+ {
+ inclusiveUpperPoint = upperPoint - 1;
+ }
+ else
+ {
+ inclusiveUpperPoint = -upperPoint - 2;
+ }
+
+ if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint)
+ return DocIdSet.EMPTY_DOCIDSET;
+
+ System.Diagnostics.Debug.Assert(inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0);
+
+ // for this DocIdSet, we never need to use TermDocs,
+ // because deleted docs have an order of 0 (null entry in StringIndex)
+ return new AnonymousClassFieldCacheDocIdSet(fcsi, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, false);
+ }
+ }
+ [Serializable]
+ private class AnonymousClassFieldCacheRangeFilter1 : FieldCacheRangeFilter<sbyte?>
+ {
+ private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet
+ {
+ private void InitBlock(sbyte[] values, sbyte inclusiveLowerPoint, sbyte inclusiveUpperPoint, FieldCacheRangeFilter<sbyte?> enclosingInstance)
+ {
+ this.values = values;
+ this.inclusiveLowerPoint = inclusiveLowerPoint;
+ this.inclusiveUpperPoint = inclusiveUpperPoint;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private sbyte[] values;
+ private sbyte inclusiveLowerPoint;
+ private sbyte inclusiveUpperPoint;
+ private FieldCacheRangeFilter<sbyte?> enclosingInstance;
+ public FieldCacheRangeFilter<sbyte?> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFieldCacheDocIdSet(sbyte[] values, sbyte inclusiveLowerPoint, sbyte inclusiveUpperPoint, FieldCacheRangeFilter<sbyte?> enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2)
+ : base(Param1, Param2)
+ {
+ InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance);
+ }
+ internal override bool MatchDoc(int doc)
+ {
+ return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
+ }
+ }
+ internal AnonymousClassFieldCacheRangeFilter1(string field, Parser parser, sbyte? lowerVal, sbyte? upperVal, bool includeLower, bool includeUpper)
+ : base(field, parser, lowerVal, upperVal, includeLower, includeUpper)
+ {
+ }
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ sbyte inclusiveLowerPoint;
+ sbyte inclusiveUpperPoint;
+ if (lowerVal != null)
+ {
+ sbyte i = (sbyte)lowerVal;
+ if (!includeLower && i == sbyte.MaxValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveLowerPoint = (sbyte)(includeLower ? i : (i + 1));
+ }
+ else
+ {
+ inclusiveLowerPoint = sbyte.MinValue;
+ }
+ if (upperVal != null)
+ {
+ sbyte i = (sbyte)upperVal;
+ if (!includeUpper && i == sbyte.MinValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveUpperPoint = (sbyte)(includeUpper ? i : (i - 1));
+ }
+ else
+ {
+ inclusiveUpperPoint = sbyte.MaxValue;
+ }
+
+ if (inclusiveLowerPoint > inclusiveUpperPoint)
+ return DocIdSet.EMPTY_DOCIDSET;
+
+ sbyte[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetBytes(reader, field, (Lucene.Net.Search.ByteParser)parser);
+ // we only request the usage of termDocs, if the range contains 0
+ return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0));
+ }
+ }
+ [Serializable]
+ private class AnonymousClassFieldCacheRangeFilter2 : FieldCacheRangeFilter<short?>
+ {
+ private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet
+ {
+ private void InitBlock(short[] values, short inclusiveLowerPoint, short inclusiveUpperPoint, FieldCacheRangeFilter<short?> enclosingInstance)
+ {
+ this.values = values;
+ this.inclusiveLowerPoint = inclusiveLowerPoint;
+ this.inclusiveUpperPoint = inclusiveUpperPoint;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private short[] values;
+ private short inclusiveLowerPoint;
+ private short inclusiveUpperPoint;
+ private FieldCacheRangeFilter<short?> enclosingInstance;
+ public FieldCacheRangeFilter<short?> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFieldCacheDocIdSet(short[] values, short inclusiveLowerPoint, short inclusiveUpperPoint, FieldCacheRangeFilter<short?> enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2)
+ : base(Param1, Param2)
+ {
+ InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance);
+ }
+ internal override bool MatchDoc(int doc)
+ {
+ return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
+ }
+ }
+ internal AnonymousClassFieldCacheRangeFilter2(string field, Parser parser, short? lowerVal, short? upperVal, bool includeLower, bool includeUpper)
+ : base(field, parser, lowerVal, upperVal, includeLower, includeUpper)
+ {
+ }
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ short inclusiveLowerPoint;
+ short inclusiveUpperPoint;
+ if (lowerVal != null)
+ {
+ short i = (short)lowerVal;
+ if (!includeLower && i == short.MaxValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveLowerPoint = (short)(includeLower ? i : (i + 1));
+ }
+ else
+ {
+ inclusiveLowerPoint = short.MinValue;
+ }
+ if (upperVal != null)
+ {
+ short i = (short)upperVal;
+ if (!includeUpper && i == short.MinValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveUpperPoint = (short)(includeUpper ? i : (i - 1));
+ }
+ else
+ {
+ inclusiveUpperPoint = short.MaxValue;
+ }
+
+ if (inclusiveLowerPoint > inclusiveUpperPoint)
+ return DocIdSet.EMPTY_DOCIDSET;
+
+ short[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetShorts(reader, field, (Lucene.Net.Search.ShortParser)parser);
+ // we only request the usage of termDocs, if the range contains 0
+ return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0));
+ }
+ }
+ [Serializable]
+ private class AnonymousClassFieldCacheRangeFilter3 : FieldCacheRangeFilter<int?>
+ {
+ private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet
+ {
+ private void InitBlock(int[] values, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter<int?> enclosingInstance)
+ {
+ this.values = values;
+ this.inclusiveLowerPoint = inclusiveLowerPoint;
+ this.inclusiveUpperPoint = inclusiveUpperPoint;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private int[] values;
+ private int inclusiveLowerPoint;
+ private int inclusiveUpperPoint;
+ private FieldCacheRangeFilter<int?> enclosingInstance;
+ public FieldCacheRangeFilter<int?> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFieldCacheDocIdSet(int[] values, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter<int?> enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2)
+ : base(Param1, Param2)
+ {
+ InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance);
+ }
+ internal override bool MatchDoc(int doc)
+ {
+ return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
+ }
+ }
+ internal AnonymousClassFieldCacheRangeFilter3(string field, Lucene.Net.Search.Parser parser, int? lowerVal, int? upperVal, bool includeLower, bool includeUpper)
+ : base(field, parser, lowerVal, upperVal, includeLower, includeUpper)
+ {
+ }
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ int inclusiveLowerPoint;
+ int inclusiveUpperPoint;
+ if (lowerVal != null)
+ {
+ int i = (int)lowerVal;
+ if (!includeLower && i == int.MaxValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveLowerPoint = includeLower ? i : (i + 1);
+ }
+ else
+ {
+ inclusiveLowerPoint = int.MinValue;
+ }
+ if (upperVal != null)
+ {
+ int i = (int)upperVal;
+ if (!includeUpper && i == int.MinValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveUpperPoint = includeUpper ? i : (i - 1);
+ }
+ else
+ {
+ inclusiveUpperPoint = int.MaxValue;
+ }
+
+ if (inclusiveLowerPoint > inclusiveUpperPoint)
+ return DocIdSet.EMPTY_DOCIDSET;
+
+ int[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetInts(reader, field, (Lucene.Net.Search.IntParser)parser);
+ // we only request the usage of termDocs, if the range contains 0
+ return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0));
+ }
+ }
+ [Serializable]
+ private class AnonymousClassFieldCacheRangeFilter4 : FieldCacheRangeFilter<long?>
+ {
+ private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet
+ {
+ private void InitBlock(long[] values, long inclusiveLowerPoint, long inclusiveUpperPoint, FieldCacheRangeFilter<long?> enclosingInstance)
+ {
+ this.values = values;
+ this.inclusiveLowerPoint = inclusiveLowerPoint;
+ this.inclusiveUpperPoint = inclusiveUpperPoint;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private long[] values;
+ private long inclusiveLowerPoint;
+ private long inclusiveUpperPoint;
+ private FieldCacheRangeFilter<long?> enclosingInstance;
+ public FieldCacheRangeFilter<long?> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFieldCacheDocIdSet(long[] values, long inclusiveLowerPoint, long inclusiveUpperPoint, FieldCacheRangeFilter<long?> enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2)
+ : base(Param1, Param2)
+ {
+ InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance);
+ }
+ internal override bool MatchDoc(int doc)
+ {
+ return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
+ }
+ }
+ internal AnonymousClassFieldCacheRangeFilter4(string field, Lucene.Net.Search.Parser parser, long? lowerVal, long? upperVal, bool includeLower, bool includeUpper)
+ : base(field, parser, lowerVal, upperVal, includeLower, includeUpper)
+ {
+ }
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ long inclusiveLowerPoint;
+ long inclusiveUpperPoint;
+ if (lowerVal != null)
+ {
+ long i = (long)lowerVal;
+ if (!includeLower && i == long.MaxValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveLowerPoint = includeLower ? i : (i + 1L);
+ }
+ else
+ {
+ inclusiveLowerPoint = long.MinValue;
+ }
+ if (upperVal != null)
+ {
+ long i = (long)upperVal;
+ if (!includeUpper && i == long.MinValue)
+ return DocIdSet.EMPTY_DOCIDSET;
+ inclusiveUpperPoint = includeUpper ? i : (i - 1L);
+ }
+ else
+ {
+ inclusiveUpperPoint = long.MaxValue;
+ }
+
+ if (inclusiveLowerPoint > inclusiveUpperPoint)
+ return DocIdSet.EMPTY_DOCIDSET;
+
+ long[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetLongs(reader, field, (Lucene.Net.Search.LongParser)parser);
+ // we only request the usage of termDocs, if the range contains 0
+ return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L));
+ }
+ }
+ [Serializable]
+ private class AnonymousClassFieldCacheRangeFilter5 : FieldCacheRangeFilter<float?>
+ {
+ private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet
+ {
+ private void InitBlock(float[] values, float inclusiveLowerPoint, float inclusiveUpperPoint, FieldCacheRangeFilter<float?> enclosingInstance)
+ {
+ this.values = values;
+ this.inclusiveLowerPoint = inclusiveLowerPoint;
+ this.inclusiveUpperPoint = inclusiveUpperPoint;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private float[] values;
+ private float inclusiveLowerPoint;
+ private float inclusiveUpperPoint;
+ private FieldCacheRangeFilter<float?> enclosingInstance;
+ public FieldCacheRangeFilter<float?> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFieldCacheDocIdSet(float[] values, float inclusiveLowerPoint, float inclusiveUpperPoint, FieldCacheRangeFilter<float?> enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2)
+ : base(Param1, Param2)
+ {
+ InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance);
+ }
+ internal override bool MatchDoc(int doc)
+ {
+ return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
+ }
+ }
+ internal AnonymousClassFieldCacheRangeFilter5(string field, Lucene.Net.Search.Parser parser, float? lowerVal, float? upperVal, bool includeLower, bool includeUpper)
+ : base(field, parser, lowerVal, upperVal, includeLower, includeUpper)
+ {
+ }
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ // we transform the floating point numbers to sortable integers
+ // using NumericUtils to easier find the next bigger/lower value
+ float inclusiveLowerPoint;
+ float inclusiveUpperPoint;
+ if (lowerVal != null)
+ {
+ float f = (float)lowerVal;
+ if (!includeUpper && f > 0.0f && float.IsInfinity(f))
+ return DocIdSet.EMPTY_DOCIDSET;
+ int i = NumericUtils.FloatToSortableInt(f);
+ inclusiveLowerPoint = NumericUtils.SortableIntToFloat(includeLower ? i : (i + 1));
+ }
+ else
+ {
+ inclusiveLowerPoint = float.NegativeInfinity;
+ }
+ if (upperVal != null)
+ {
+ float f = (float)upperVal;
+ if (!includeUpper && f < 0.0f && float.IsInfinity(f))
+ return DocIdSet.EMPTY_DOCIDSET;
+ int i = NumericUtils.FloatToSortableInt(f);
+ inclusiveUpperPoint = NumericUtils.SortableIntToFloat(includeUpper ? i : (i - 1));
+ }
+ else
+ {
+ inclusiveUpperPoint = float.PositiveInfinity;
+ }
+
+ if (inclusiveLowerPoint > inclusiveUpperPoint)
+ return DocIdSet.EMPTY_DOCIDSET;
+
+ float[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetFloats(reader, field, (Lucene.Net.Search.FloatParser)parser);
+ // we only request the usage of termDocs, if the range contains 0
+ return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f));
+ }
+ }
+ [Serializable]
+ private class AnonymousClassFieldCacheRangeFilter6 : FieldCacheRangeFilter<double?>
+ {
+ private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet
+ {
+ private void InitBlock(double[] values, double inclusiveLowerPoint, double inclusiveUpperPoint, FieldCacheRangeFilter<double?> enclosingInstance)
+ {
+ this.values = values;
+ this.inclusiveLowerPoint = inclusiveLowerPoint;
+ this.inclusiveUpperPoint = inclusiveUpperPoint;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private double[] values;
+ private double inclusiveLowerPoint;
+ private double inclusiveUpperPoint;
+ private FieldCacheRangeFilter<double?> enclosingInstance;
+ public FieldCacheRangeFilter<double?> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFieldCacheDocIdSet(double[] values, double inclusiveLowerPoint, double inclusiveUpperPoint, FieldCacheRangeFilter<double?> enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2)
+ : base(Param1, Param2)
+ {
+ InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance);
+ }
+ internal override bool MatchDoc(int doc)
+ {
+ return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
+ }
+ }
+ internal AnonymousClassFieldCacheRangeFilter6(string field, Lucene.Net.Search.Parser parser, double? lowerVal, double? upperVal, bool includeLower, bool includeUpper)
+ : base(field, parser, lowerVal, upperVal, includeLower, includeUpper)
+ {
+ }
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ // we transform the floating point numbers to sortable integers
+ // using NumericUtils to easier find the next bigger/lower value
+ double inclusiveLowerPoint;
+ double inclusiveUpperPoint;
+ if (lowerVal != null)
+ {
+ double f = (double)lowerVal;
+ if (!includeUpper && f > 0.0 && double.IsInfinity(f))
+ return DocIdSet.EMPTY_DOCIDSET;
+ long i = NumericUtils.DoubleToSortableLong(f);
+ inclusiveLowerPoint = NumericUtils.SortableLongToDouble(includeLower ? i : (i + 1L));
+ }
+ else
+ {
+ inclusiveLowerPoint = double.NegativeInfinity;
+ }
+ if (upperVal != null)
+ {
+ double f = (double)upperVal;
+ if (!includeUpper && f < 0.0 && double.IsInfinity(f))
+ return DocIdSet.EMPTY_DOCIDSET;
+ long i = NumericUtils.DoubleToSortableLong(f);
+ inclusiveUpperPoint = NumericUtils.SortableLongToDouble(includeUpper ? i : (i - 1L));
+ }
+ else
+ {
+ inclusiveUpperPoint = double.PositiveInfinity;
+ }
+
+ if (inclusiveLowerPoint > inclusiveUpperPoint)
+ return DocIdSet.EMPTY_DOCIDSET;
+
+ double[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetDoubles(reader, field, (Lucene.Net.Search.DoubleParser)parser);
+ // we only request the usage of termDocs, if the range contains 0
+ return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0));
+ }
+ }
+
+ /// <summary> Creates a string range filter using <see cref="FieldCache.GetStringIndex(IndexReader,string)" />. This works with all
+ /// fields containing zero or one term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<string> NewStringRange(string field, string lowerVal, string upperVal, bool includeLower, bool includeUpper)
+ {
+ return new AnonymousClassFieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range filter using <see cref="FieldCache.GetBytes(IndexReader,String)" />. This works with all
+ /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<sbyte?> NewByteRange(string field, sbyte? lowerVal, sbyte? upperVal, bool includeLower, bool includeUpper)
+ {
+ return NewByteRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range filter using <see cref="FieldCache.GetBytes(IndexReader,String,ByteParser)" />. This works with all
+ /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<sbyte?> NewByteRange(string field, Lucene.Net.Search.ByteParser parser, sbyte? lowerVal, sbyte? upperVal, bool includeLower, bool includeUpper)
+ {
+ return new AnonymousClassFieldCacheRangeFilter1(field, parser, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetShorts(IndexReader,String)" />. This works with all
+ /// short fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<short?> NewShortRange(string field, short? lowerVal, short? upperVal, bool includeLower, bool includeUpper)
+ {
+ return NewShortRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetShorts(IndexReader,String,ShortParser)" />. This works with all
+ /// short fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<short?> NewShortRange(string field, Lucene.Net.Search.ShortParser parser, short? lowerVal, short? upperVal, bool includeLower, bool includeUpper)
+ {
+ return new AnonymousClassFieldCacheRangeFilter2(field, parser, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetInts(IndexReader,String)" />. This works with all
+ /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<int?> NewIntRange(string field, int? lowerVal, int? upperVal, bool includeLower, bool includeUpper)
+ {
+ return NewIntRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetInts(IndexReader,String,IntParser)" />. This works with all
+ /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<int?> NewIntRange(string field, Lucene.Net.Search.IntParser parser, int? lowerVal, int? upperVal, bool includeLower, bool includeUpper)
+ {
+ return new AnonymousClassFieldCacheRangeFilter3(field, parser, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetLongs(IndexReader,String)" />. This works with all
+ /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<long?> NewLongRange(string field, long? lowerVal, long? upperVal, bool includeLower, bool includeUpper)
+ {
+ return NewLongRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetLongs(IndexReader,String,LongParser)" />. This works with all
+ /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<long?> NewLongRange(string field, Lucene.Net.Search.LongParser parser, long? lowerVal, long? upperVal, bool includeLower, bool includeUpper)
+ {
+ return new AnonymousClassFieldCacheRangeFilter4(field, parser, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetFloats(IndexReader,String)" />. This works with all
+ /// float fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<float?> NewFloatRange(string field, float? lowerVal, float? upperVal, bool includeLower, bool includeUpper)
+ {
+ return NewFloatRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetFloats(IndexReader,String,FloatParser)" />. This works with all
+ /// float fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<float?> NewFloatRange(string field, Lucene.Net.Search.FloatParser parser, float? lowerVal, float? upperVal, bool includeLower, bool includeUpper)
+ {
+ return new AnonymousClassFieldCacheRangeFilter5(field, parser, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetDoubles(IndexReader,String)" />. This works with all
+ /// double fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<double?> NewDoubleRange(string field, double? lowerVal, double? upperVal, bool includeLower, bool includeUpper)
+ {
+ return NewDoubleRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
+ }
+
+ /// <summary> Creates a numeric range query using <see cref="FieldCache.GetDoubles(IndexReader,String,DoubleParser)" />. This works with all
+ /// double fields containing exactly one numeric term in the field. The range can be half-open by setting one
+ /// of the values to <c>null</c>.
+ /// </summary>
+ public static FieldCacheRangeFilter<double?> NewDoubleRange(string field, Lucene.Net.Search.DoubleParser parser, double? lowerVal, double? upperVal, bool includeLower, bool includeUpper)
+ {
+ return new AnonymousClassFieldCacheRangeFilter6(field, parser, lowerVal, upperVal, includeLower, includeUpper);
+ }
+ }
+
+ [Serializable]
+ public abstract class FieldCacheRangeFilter<T> : Filter
+ {
+ internal System.String field;
+ internal Lucene.Net.Search.Parser parser;
+ internal T lowerVal;
+ internal T upperVal;
+ internal bool includeLower;
+ internal bool includeUpper;
+
+ protected internal FieldCacheRangeFilter(System.String field, Lucene.Net.Search.Parser parser, T lowerVal, T upperVal, bool includeLower, bool includeUpper)
+ {
+ this.field = field;
+ this.parser = parser;
+ this.lowerVal = lowerVal;
+ this.upperVal = upperVal;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ }
+
+ /// <summary>This method is implemented for each data type </summary>
+ public abstract override DocIdSet GetDocIdSet(IndexReader reader);
+
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder(field).Append(":");
+ return sb.Append(includeLower?'[':'{').Append((lowerVal == null)?"*":lowerVal.ToString()).Append(" TO ").Append((upperVal == null)?"*":upperVal.ToString()).Append(includeUpper?']':'}').ToString();
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is FieldCacheRangeFilter<T>))
+ return false;
+ FieldCacheRangeFilter<T> other = (FieldCacheRangeFilter<T>) o;
+
+ if (!this.field.Equals(other.field) || this.includeLower != other.includeLower || this.includeUpper != other.includeUpper)
+ {
+ return false;
+ }
+ if (this.lowerVal != null ?! this.lowerVal.Equals(other.lowerVal):other.lowerVal != null)
+ return false;
+ if (this.upperVal != null ?! this.upperVal.Equals(other.upperVal):other.upperVal != null)
+ return false;
+ if (this.parser != null ?! this.parser.Equals(other.parser):other.parser != null)
+ return false;
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ int h = field.GetHashCode();
+ h ^= ((lowerVal != null)?lowerVal.GetHashCode():550356204);
+ h = (h << 1) | (Number.URShift(h, 31)); // rotate to distinguish lower from upper
+ h ^= ((upperVal != null)?upperVal.GetHashCode():- 1674416163);
+ h ^= ((parser != null)?parser.GetHashCode():- 1572457324);
+ h ^= (includeLower?1549299360:- 365038026) ^ (includeUpper?1721088258:1948649653);
+ return h;
+ }
+
+ /// <summary>
+ /// Returns the field name for this filter
+ /// </summary>
+ public string GetField { get { return field; } }
+
+ /// <summary>
+ /// Returns <c>true</c> if the lower endpoint is inclusive
+ /// </summary>
+ public bool IncludesLower { get { return includeLower; } }
+
+ /// <summary>
+ /// Returns <c>true</c> if the upper endpoint is inclusive
+ /// </summary>
+ public bool IncludesUpper { get { return includeUpper; } }
+
+ /// <summary>
+ /// Returns the lower value of the range filter
+ /// </summary>
+ public T LowerValue { get { return lowerVal; } }
+
+ /// <summary>
+ /// Returns the upper value of this range filter
+ /// </summary>
+ public T UpperValue { get { return upperVal; } }
+
+ public Parser Parser { get { return parser; } }
+
+ internal abstract class FieldCacheDocIdSet:DocIdSet
+ {
+ private class AnonymousClassDocIdSetIterator : DocIdSetIterator
+ {
+ public AnonymousClassDocIdSetIterator(Lucene.Net.Index.TermDocs termDocs, FieldCacheDocIdSet enclosingInstance)
+ {
+ InitBlock(termDocs, enclosingInstance);
+ }
+ private void InitBlock(Lucene.Net.Index.TermDocs termDocs, FieldCacheDocIdSet enclosingInstance)
+ {
+ this.termDocs = termDocs;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Index.TermDocs termDocs;
+ private FieldCacheDocIdSet enclosingInstance;
+ public FieldCacheDocIdSet Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private int doc = - 1;
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ do
+ {
+ if (!termDocs.Next())
+ return doc = NO_MORE_DOCS;
+ }
+ while (!Enclosing_Instance.MatchDoc(doc = termDocs.Doc));
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ if (!termDocs.SkipTo(target))
+ return doc = NO_MORE_DOCS;
+ while (!Enclosing_Instance.MatchDoc(doc = termDocs.Doc))
+ {
+ if (!termDocs.Next())
+ return doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+ }
+ private class AnonymousClassDocIdSetIterator1:DocIdSetIterator
+ {
+ public AnonymousClassDocIdSetIterator1(FieldCacheDocIdSet enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(FieldCacheDocIdSet enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private FieldCacheDocIdSet enclosingInstance;
+ public FieldCacheDocIdSet Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private int doc = - 1;
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ try
+ {
+ do
+ {
+ doc++;
+ }
+ while (!Enclosing_Instance.MatchDoc(doc));
+ return doc;
+ }
+ catch (System.IndexOutOfRangeException)
+ {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+
+ public override int Advance(int target)
+ {
+ try
+ {
+ doc = target;
+ while (!Enclosing_Instance.MatchDoc(doc))
+ {
+ doc++;
+ }
+ return doc;
+ }
+ catch (System.IndexOutOfRangeException)
+ {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+ private IndexReader reader;
+ private bool mayUseTermDocs;
+
+ internal FieldCacheDocIdSet(IndexReader reader, bool mayUseTermDocs)
+ {
+ this.reader = reader;
+ this.mayUseTermDocs = mayUseTermDocs;
+ }
+
+ /// <summary>this method checks, if a doc is a hit, should throw AIOBE, when position invalid </summary>
+ internal abstract bool MatchDoc(int doc);
+
+ /// <summary>this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs </summary>
+ public override bool IsCacheable
+ {
+ get { return !(mayUseTermDocs && reader.HasDeletions); }
+ }
+
+ public override DocIdSetIterator Iterator()
+ {
+ // Synchronization needed because deleted docs BitVector
+ // can change after call to hasDeletions until TermDocs creation.
+ // We only use an iterator with termDocs, when this was requested (e.g. range contains 0)
+ // and the index has deletions
+ TermDocs termDocs;
+ lock (reader)
+ {
+ termDocs = IsCacheable ? null : reader.TermDocs(null);
+ }
+ if (termDocs != null)
+ {
+ // a DocIdSetIterator using TermDocs to iterate valid docIds
+ return new AnonymousClassDocIdSetIterator(termDocs, this);
+ }
+ else
+ {
+ // a DocIdSetIterator generating docIds by incrementing a variable -
+ // this one can be used if there are no deletions are on the index
+ return new AnonymousClassDocIdSetIterator1(this);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldCacheTermsFilter.cs b/src/core/Search/FieldCacheTermsFilter.cs
new file mode 100644
index 0000000..d6526f4
--- /dev/null
+++ b/src/core/Search/FieldCacheTermsFilter.cs
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using TermDocs = Lucene.Net.Index.TermDocs;
+using OpenBitSet = Lucene.Net.Util.OpenBitSet;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A <see cref="Filter" /> that only accepts documents whose single
+ /// term value in the specified field is contained in the
+ /// provided set of allowed terms.
+ ///
+ /// <p/>
+ ///
+ /// This is the same functionality as TermsFilter (from
+ /// contrib/queries), except this filter requires that the
+ /// field contains only a single term for all documents.
+ /// Because of drastically different implementations, they
+ /// also have different performance characteristics, as
+ /// described below.
+ ///
+ /// <p/>
+ ///
+ /// The first invocation of this filter on a given field will
+ /// be slower, since a <see cref="StringIndex" /> must be
+ /// created. Subsequent invocations using the same field
+ /// will re-use this cache. However, as with all
+ /// functionality based on <see cref="FieldCache" />, persistent RAM
+ /// is consumed to hold the cache, and is not freed until the
+ /// <see cref="IndexReader" /> is closed. In contrast, TermsFilter
+ /// has no persistent RAM consumption.
+ ///
+ ///
+ /// <p/>
+ ///
+ /// With each search, this filter translates the specified
+ /// set of Terms into a private <see cref="OpenBitSet" /> keyed by
+ /// term number per unique <see cref="IndexReader" /> (normally one
+ /// reader per segment). Then, during matching, the term
+ /// number for each docID is retrieved from the cache and
+ /// then checked for inclusion using the <see cref="OpenBitSet" />.
+ /// Since all testing is done using RAM resident data
+ /// structures, performance should be very fast, most likely
+ /// fast enough to not require further caching of the
+ /// DocIdSet for each possible combination of terms.
+ /// However, because docIDs are simply scanned linearly, an
+ /// index with a great many small documents may find this
+ /// linear scan too costly.
+ ///
+ /// <p/>
+ ///
+ /// In contrast, TermsFilter builds up an <see cref="OpenBitSet" />,
+ /// keyed by docID, every time it's created, by enumerating
+ /// through all matching docs using <see cref="TermDocs" /> to seek
+ /// and scan through each term's docID list. While there is
+ /// no linear scan of all docIDs, besides the allocation of
+ /// the underlying array in the <see cref="OpenBitSet" />, this
+ /// approach requires a number of "disk seeks" in proportion
+ /// to the number of terms, which can be exceptionally costly
+ /// when there are cache misses in the OS's IO cache.
+ ///
+ /// <p/>
+ ///
+ /// Generally, this filter will be slower on the first
+ /// invocation for a given field, but subsequent invocations,
+ /// even if you change the allowed set of Terms, should be
+ /// faster than TermsFilter, especially as the number of
+ /// Terms being matched increases. If you are matching only
+ /// a very small number of terms, and those terms in turn
+ /// match a very small number of documents, TermsFilter may
+ /// perform faster.
+ ///
+ /// <p/>
+ ///
+ /// Which filter is best is very application dependent.
+ /// </summary>
+
+ [Serializable]
+ public class FieldCacheTermsFilter:Filter
+ {
+ private readonly string field;
+ private readonly string[] terms;
+
+ public FieldCacheTermsFilter(string field, params string[] terms)
+ {
+ this.field = field;
+ this.terms = terms;
+ }
+
+ public virtual FieldCache FieldCache
+ {
+ get { return FieldCache_Fields.DEFAULT; }
+ }
+
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ return new FieldCacheTermsFilterDocIdSet(this, FieldCache.GetStringIndex(reader, field));
+ }
+
+ protected internal class FieldCacheTermsFilterDocIdSet:DocIdSet
+ {
+ private void InitBlock(FieldCacheTermsFilter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private FieldCacheTermsFilter enclosingInstance;
+ public FieldCacheTermsFilter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private readonly Lucene.Net.Search.StringIndex fcsi;
+
+ private readonly OpenBitSet openBitSet;
+
+ public FieldCacheTermsFilterDocIdSet(FieldCacheTermsFilter enclosingInstance, StringIndex fcsi)
+ {
+ InitBlock(enclosingInstance);
+ this.fcsi = fcsi;
+ openBitSet = new OpenBitSet(this.fcsi.lookup.Length);
+ foreach (string t in Enclosing_Instance.terms)
+ {
+ int termNumber = this.fcsi.BinarySearchLookup(t);
+ if (termNumber > 0)
+ {
+ openBitSet.FastSet(termNumber);
+ }
+ }
+ }
+
+ public override DocIdSetIterator Iterator()
+ {
+ return new FieldCacheTermsFilterDocIdSetIterator(this);
+ }
+
+ /// <summary>This DocIdSet implementation is cacheable. </summary>
+ public override bool IsCacheable
+ {
+ get { return true; }
+ }
+
+ protected internal class FieldCacheTermsFilterDocIdSetIterator:DocIdSetIterator
+ {
+ public FieldCacheTermsFilterDocIdSetIterator(FieldCacheTermsFilterDocIdSet enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(FieldCacheTermsFilterDocIdSet enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private FieldCacheTermsFilterDocIdSet enclosingInstance;
+ public FieldCacheTermsFilterDocIdSet Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private int doc = - 1;
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ try
+ {
+ while (!Enclosing_Instance.openBitSet.FastGet(Enclosing_Instance.fcsi.order[++doc]))
+ {
+ }
+ }
+ catch (IndexOutOfRangeException)
+ {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ try
+ {
+ doc = target;
+ while (!Enclosing_Instance.openBitSet.FastGet(Enclosing_Instance.fcsi.order[doc]))
+ {
+ doc++;
+ }
+ }
+ catch (IndexOutOfRangeException)
+ {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldComparator.cs b/src/core/Search/FieldComparator.cs
new file mode 100644
index 0000000..0eb5f52
--- /dev/null
+++ b/src/core/Search/FieldComparator.cs
@@ -0,0 +1,1065 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ByteParser = Lucene.Net.Search.ByteParser;
+using DoubleParser = Lucene.Net.Search.DoubleParser;
+using FloatParser = Lucene.Net.Search.FloatParser;
+using IntParser = Lucene.Net.Search.IntParser;
+using LongParser = Lucene.Net.Search.LongParser;
+using ShortParser = Lucene.Net.Search.ShortParser;
+using StringIndex = Lucene.Net.Search.StringIndex;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: a FieldComparator compares hits so as to determine their
+ /// sort order when collecting the top results with <see cref="TopFieldCollector" />
+ ///. The concrete public FieldComparator
+ /// classes here correspond to the SortField types.
+ ///
+ /// <p/>This API is designed to achieve high performance
+ /// sorting, by exposing a tight interaction with <see cref="FieldValueHitQueue" />
+ /// as it visits hits. Whenever a hit is
+ /// competitive, it's enrolled into a virtual slot, which is
+ /// an int ranging from 0 to numHits-1. The <see cref="FieldComparator" />
+ /// is made aware of segment transitions
+ /// during searching in case any internal state it's tracking
+ /// needs to be recomputed during these transitions.<p/>
+ ///
+ /// <p/>A comparator must define these functions:<p/>
+ ///
+ /// <list type="bullet">
+ ///
+ /// <item> <see cref="Compare" /> Compare a hit at 'slot a'
+ /// with hit 'slot b'.</item>
+ ///
+ /// <item> <see cref="SetBottom" /> This method is called by
+ /// <see cref="FieldValueHitQueue" /> to notify the
+ /// FieldComparator of the current weakest ("bottom")
+ /// slot. Note that this slot may not hold the weakest
+ /// value according to your comparator, in cases where
+ /// your comparator is not the primary one (ie, is only
+ /// used to break ties from the comparators before it).</item>
+ ///
+ /// <item> <see cref="CompareBottom" /> Compare a new hit (docID)
+ /// against the "weakest" (bottom) entry in the queue.</item>
+ ///
+ /// <item> <see cref="Copy" /> Installs a new hit into the
+ /// priority queue. The <see cref="FieldValueHitQueue" />
+ /// calls this method when a new hit is competitive.</item>
+ ///
+ /// <item> <see cref="SetNextReader" /> Invoked
+ /// when the search is switching to the next segment.
+ /// You may need to update internal state of the
+ /// comparator, for example retrieving new values from
+ /// the <see cref="FieldCache" />.</item>
+ ///
+ /// <item> <see cref="P:Lucene.Net.Search.FieldComparator.Item(System.Int32)" /> Return the sort value stored in
+ /// the specified slot. This is only called at the end
+ /// of the search, in order to populate <see cref="FieldDoc.fields" />
+ /// when returning the top results.</item>
+ /// </list>
+ ///
+ /// <b>NOTE:</b> This API is experimental and might change in
+ /// incompatible ways in the next release.
+ /// </summary>
+ public abstract class FieldComparator
+ {
+ /// <summary> Compare hit at slot1 with hit at slot2.
+ ///
+ /// </summary>
+ /// <param name="slot1">first slot to compare
+ /// </param>
+ /// <param name="slot2">second slot to compare
+ /// </param>
+ /// <returns> any N &lt; 0 if slot2's value is sorted after
+ /// slot1, any N > 0 if the slot2's value is sorted before
+ /// slot1 and 0 if they are equal
+ /// </returns>
+ public abstract int Compare(int slot1, int slot2);
+
+ /// <summary> Set the bottom slot, ie the "weakest" (sorted last)
+ /// entry in the queue. When <see cref="CompareBottom" /> is
+ /// called, you should compare against this slot. This
+ /// will always be called before <see cref="CompareBottom" />.
+ ///
+ /// </summary>
+ /// <param name="slot">the currently weakest (sorted last) slot in the queue
+ /// </param>
+ public abstract void SetBottom(int slot);
+
+ /// <summary> Compare the bottom of the queue with doc. This will
+ /// only invoked after setBottom has been called. This
+ /// should return the same result as <see cref="Compare(int,int)" />
+ ///} as if bottom were slot1 and the new
+ /// document were slot 2.
+ ///
+ /// <p/>For a search that hits many results, this method
+ /// will be the hotspot (invoked by far the most
+ /// frequently).<p/>
+ ///
+ /// </summary>
+ /// <param name="doc">that was hit
+ /// </param>
+ /// <returns> any N &lt; 0 if the doc's value is sorted after
+ /// the bottom entry (not competitive), any N > 0 if the
+ /// doc's value is sorted before the bottom entry and 0 if
+ /// they are equal.
+ /// </returns>
+ public abstract int CompareBottom(int doc);
+
+ /// <summary> This method is called when a new hit is competitive.
+ /// You should copy any state associated with this document
+ /// that will be required for future comparisons, into the
+ /// specified slot.
+ ///
+ /// </summary>
+ /// <param name="slot">which slot to copy the hit to
+ /// </param>
+ /// <param name="doc">docID relative to current reader
+ /// </param>
+ public abstract void Copy(int slot, int doc);
+
+ /// <summary> Set a new Reader. All doc correspond to the current Reader.
+ ///
+ /// </summary>
+ /// <param name="reader">current reader
+ /// </param>
+ /// <param name="docBase">docBase of this reader
+ /// </param>
+ /// <throws> IOException </throws>
+ /// <throws> IOException </throws>
+ public abstract void SetNextReader(IndexReader reader, int docBase);
+
+ /// <summary>Sets the Scorer to use in case a document's score is
+ /// needed.
+ ///
+ /// </summary>
+ /// <param name="scorer">Scorer instance that you should use to
+ /// obtain the current hit's score, if necessary.
+ /// </param>
+ public virtual void SetScorer(Scorer scorer)
+ {
+ // Empty implementation since most comparators don't need the score. This
+ // can be overridden by those that need it.
+ }
+
+ /// <summary> Return the actual value in the slot.
+ ///
+ /// </summary>
+ /// <param name="slot">the value
+ /// </param>
+ /// <returns> value in this slot upgraded to Comparable
+ /// </returns>
+ public abstract IComparable this[int slot] { get; }
+
+ /// <summary>Parses field's values as byte (using <see cref="FieldCache.GetBytes(Lucene.Net.Index.IndexReader,string)" />
+ /// and sorts by ascending value
+ /// </summary>
+ public sealed class ByteComparator:FieldComparator
+ {
+ private sbyte[] values;
+ private sbyte[] currentReaderValues;
+ private System.String field;
+ private ByteParser parser;
+ private sbyte bottom;
+
+ internal ByteComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser)
+ {
+ values = new sbyte[numHits];
+ this.field = field;
+ this.parser = (ByteParser) parser;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ return values[slot1] - values[slot2];
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ return bottom - currentReaderValues[doc];
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetBytes(reader, field, parser);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (sbyte) values[slot]; }
+ }
+ }
+
+ /// <summary>Sorts by ascending docID </summary>
+ public sealed class DocComparator:FieldComparator
+ {
+ private int[] docIDs;
+ private int docBase;
+ private int bottom;
+
+ internal DocComparator(int numHits)
+ {
+ docIDs = new int[numHits];
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ // No overflow risk because docIDs are non-negative
+ return docIDs[slot1] - docIDs[slot2];
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ // No overflow risk because docIDs are non-negative
+ return bottom - (docBase + doc);
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ docIDs[slot] = docBase + doc;
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ // TODO: can we "map" our docIDs to the current
+ // reader? saves having to then subtract on every
+ // compare call
+ this.docBase = docBase;
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = docIDs[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (System.Int32) docIDs[slot]; }
+ }
+ }
+
+ /// <summary>Parses field's values as double (using <see cref="FieldCache.GetDoubles(Lucene.Net.Index.IndexReader,string)" />
+ /// and sorts by ascending value
+ /// </summary>
+ public sealed class DoubleComparator:FieldComparator
+ {
+ private double[] values;
+ private double[] currentReaderValues;
+ private System.String field;
+ private DoubleParser parser;
+ private double bottom;
+
+ internal DoubleComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser)
+ {
+ values = new double[numHits];
+ this.field = field;
+ this.parser = (DoubleParser) parser;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ double v1 = values[slot1];
+ double v2 = values[slot2];
+ if (v1 > v2)
+ {
+ return 1;
+ }
+ else if (v1 < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ double v2 = currentReaderValues[doc];
+ if (bottom > v2)
+ {
+ return 1;
+ }
+ else if (bottom < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetDoubles(reader, field, parser);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (double) values[slot]; }
+ }
+ }
+
+ /// <summary>Parses field's values as float (using <see cref="FieldCache.GetFloats(Lucene.Net.Index.IndexReader,string)" />
+ /// and sorts by ascending value
+ /// </summary>
+ public sealed class FloatComparator:FieldComparator
+ {
+ private float[] values;
+ private float[] currentReaderValues;
+ private System.String field;
+ private FloatParser parser;
+ private float bottom;
+
+ internal FloatComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser)
+ {
+ values = new float[numHits];
+ this.field = field;
+ this.parser = (FloatParser) parser;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ // TODO: are there sneaky non-branch ways to compute
+ // sign of float?
+ float v1 = values[slot1];
+ float v2 = values[slot2];
+ if (v1 > v2)
+ {
+ return 1;
+ }
+ else if (v1 < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ // TODO: are there sneaky non-branch ways to compute
+ // sign of float?
+ float v2 = currentReaderValues[doc];
+ if (bottom > v2)
+ {
+ return 1;
+ }
+ else if (bottom < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetFloats(reader, field, parser);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (float) values[slot]; }
+ }
+ }
+
+ /// <summary>Parses field's values as int (using <see cref="FieldCache.GetInts(Lucene.Net.Index.IndexReader,string)" />
+ /// and sorts by ascending value
+ /// </summary>
+ public sealed class IntComparator:FieldComparator
+ {
+ private int[] values;
+ private int[] currentReaderValues;
+ private System.String field;
+ private IntParser parser;
+ private int bottom; // Value of bottom of queue
+
+ internal IntComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser)
+ {
+ values = new int[numHits];
+ this.field = field;
+ this.parser = (IntParser) parser;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ // Cannot return values[slot1] - values[slot2] because that
+ // may overflow
+ int v1 = values[slot1];
+ int v2 = values[slot2];
+ if (v1 > v2)
+ {
+ return 1;
+ }
+ else if (v1 < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ // Cannot return bottom - values[slot2] because that
+ // may overflow
+ int v2 = currentReaderValues[doc];
+ if (bottom > v2)
+ {
+ return 1;
+ }
+ else if (bottom < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetInts(reader, field, parser);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (System.Int32) values[slot]; }
+ }
+ }
+
+ /// <summary>Parses field's values as long (using <see cref="FieldCache.GetLongs(Lucene.Net.Index.IndexReader,string)" />
+ /// and sorts by ascending value
+ /// </summary>
+ public sealed class LongComparator:FieldComparator
+ {
+ private long[] values;
+ private long[] currentReaderValues;
+ private System.String field;
+ private LongParser parser;
+ private long bottom;
+
+ internal LongComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser)
+ {
+ values = new long[numHits];
+ this.field = field;
+ this.parser = (LongParser) parser;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ long v1 = values[slot1];
+ long v2 = values[slot2];
+ if (v1 > v2)
+ {
+ return 1;
+ }
+ else if (v1 < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ long v2 = currentReaderValues[doc];
+ if (bottom > v2)
+ {
+ return 1;
+ }
+ else if (bottom < v2)
+ {
+ return - 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetLongs(reader, field, parser);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (long) values[slot]; }
+ }
+ }
+
+ /// <summary>Sorts by descending relevance. NOTE: if you are
+ /// sorting only by descending relevance and then
+ /// secondarily by ascending docID, peformance is faster
+ /// using <see cref="TopScoreDocCollector" /> directly (which <see cref="Searcher.Search(Query, int)" />
+ /// uses when no <see cref="Sort" /> is
+ /// specified).
+ /// </summary>
+ public sealed class RelevanceComparator:FieldComparator
+ {
+ private float[] scores;
+ private float bottom;
+ private Scorer scorer;
+
+ internal RelevanceComparator(int numHits)
+ {
+ scores = new float[numHits];
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ float score1 = scores[slot1];
+ float score2 = scores[slot2];
+ return score1 > score2?- 1:(score1 < score2?1:0);
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ float score = scorer.Score();
+ return bottom > score?- 1:(bottom < score?1:0);
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ scores[slot] = scorer.Score();
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = scores[bottom];
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ // wrap with a ScoreCachingWrappingScorer so that successive calls to
+ // score() will not incur score computation over and over again.
+ this.scorer = new ScoreCachingWrappingScorer(scorer);
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (float) scores[slot]; }
+ }
+ }
+
+ /// <summary>Parses field's values as short (using <see cref="FieldCache.GetShorts(IndexReader, string)" />)
+ /// and sorts by ascending value
+ /// </summary>
+ public sealed class ShortComparator:FieldComparator
+ {
+ private short[] values;
+ private short[] currentReaderValues;
+ private System.String field;
+ private ShortParser parser;
+ private short bottom;
+
+ internal ShortComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser)
+ {
+ values = new short[numHits];
+ this.field = field;
+ this.parser = (ShortParser) parser;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ return values[slot1] - values[slot2];
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ return bottom - currentReaderValues[doc];
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetShorts(reader, field, parser);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return (short) values[slot]; }
+ }
+ }
+
+ /// <summary>Sorts by a field's value using the Collator for a
+ /// given Locale.
+ /// </summary>
+ public sealed class StringComparatorLocale:FieldComparator
+ {
+
+ private System.String[] values;
+ private System.String[] currentReaderValues;
+ private System.String field;
+ internal System.Globalization.CompareInfo collator;
+ private System.String bottom;
+
+ internal StringComparatorLocale(int numHits, System.String field, System.Globalization.CultureInfo locale)
+ {
+ values = new System.String[numHits];
+ this.field = field;
+ collator = locale.CompareInfo;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ System.String val1 = values[slot1];
+ System.String val2 = values[slot2];
+ if (val1 == null)
+ {
+ if (val2 == null)
+ {
+ return 0;
+ }
+ return - 1;
+ }
+ else if (val2 == null)
+ {
+ return 1;
+ }
+ return collator.Compare(val1.ToString(), val2.ToString());
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ System.String val2 = currentReaderValues[doc];
+ if (bottom == null)
+ {
+ if (val2 == null)
+ {
+ return 0;
+ }
+ return - 1;
+ }
+ else if (val2 == null)
+ {
+ return 1;
+ }
+ return collator.Compare(bottom.ToString(), val2.ToString());
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStrings(reader, field);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return values[slot]; }
+ }
+ }
+
+ /// <summary>Sorts by field's natural String sort order, using
+ /// ordinals. This is functionally equivalent to <see cref="FieldComparator.StringValComparator" />
+ ///, but it first resolves the string
+ /// to their relative ordinal positions (using the index
+ /// returned by <see cref="FieldCache.GetStringIndex" />), and
+ /// does most comparisons using the ordinals. For medium
+ /// to large results, this comparator will be much faster
+ /// than <see cref="FieldComparator.StringValComparator" />. For very small
+ /// result sets it may be slower.
+ /// </summary>
+ public sealed class StringOrdValComparator:FieldComparator
+ {
+
+ private int[] ords;
+ private System.String[] values;
+ private int[] readerGen;
+
+ private int currentReaderGen = - 1;
+ private System.String[] lookup;
+ private int[] order;
+ private System.String field;
+
+ private int bottomSlot = - 1;
+ private int bottomOrd;
+ private System.String bottomValue;
+ private bool reversed;
+ private int sortPos;
+
+ public StringOrdValComparator(int numHits, System.String field, int sortPos, bool reversed)
+ {
+ ords = new int[numHits];
+ values = new System.String[numHits];
+ readerGen = new int[numHits];
+ this.sortPos = sortPos;
+ this.reversed = reversed;
+ this.field = field;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ if (readerGen[slot1] == readerGen[slot2])
+ {
+ int cmp = ords[slot1] - ords[slot2];
+ if (cmp != 0)
+ {
+ return cmp;
+ }
+ }
+
+ System.String val1 = values[slot1];
+ System.String val2 = values[slot2];
+ if (val1 == null)
+ {
+ if (val2 == null)
+ {
+ return 0;
+ }
+ return - 1;
+ }
+ else if (val2 == null)
+ {
+ return 1;
+ }
+ return String.CompareOrdinal(val1, val2);
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ System.Diagnostics.Debug.Assert(bottomSlot != - 1);
+ int order = this.order[doc];
+ int cmp = bottomOrd - order;
+ if (cmp != 0)
+ {
+ return cmp;
+ }
+
+ System.String val2 = lookup[order];
+ if (bottomValue == null)
+ {
+ if (val2 == null)
+ {
+ return 0;
+ }
+ // bottom wins
+ return - 1;
+ }
+ else if (val2 == null)
+ {
+ // doc wins
+ return 1;
+ }
+ return String.CompareOrdinal(bottomValue, val2);
+ }
+
+ private void Convert(int slot)
+ {
+ readerGen[slot] = currentReaderGen;
+ int index = 0;
+ System.String value_Renamed = values[slot];
+ if (value_Renamed == null)
+ {
+ ords[slot] = 0;
+ return ;
+ }
+
+ if (sortPos == 0 && bottomSlot != - 1 && bottomSlot != slot)
+ {
+ // Since we are the primary sort, the entries in the
+ // queue are bounded by bottomOrd:
+ System.Diagnostics.Debug.Assert(bottomOrd < lookup.Length);
+ if (reversed)
+ {
+ index = BinarySearch(lookup, value_Renamed, bottomOrd, lookup.Length - 1);
+ }
+ else
+ {
+ index = BinarySearch(lookup, value_Renamed, 0, bottomOrd);
+ }
+ }
+ else
+ {
+ // Full binary search
+ index = BinarySearch(lookup, value_Renamed);
+ }
+
+ if (index < 0)
+ {
+ index = - index - 2;
+ }
+ ords[slot] = index;
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ int ord = order[doc];
+ ords[slot] = ord;
+ System.Diagnostics.Debug.Assert(ord >= 0);
+ values[slot] = lookup[ord];
+ readerGen[slot] = currentReaderGen;
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ StringIndex currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStringIndex(reader, field);
+ currentReaderGen++;
+ order = currentReaderValues.order;
+ lookup = currentReaderValues.lookup;
+ System.Diagnostics.Debug.Assert(lookup.Length > 0);
+ if (bottomSlot != - 1)
+ {
+ Convert(bottomSlot);
+ bottomOrd = ords[bottomSlot];
+ }
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ bottomSlot = bottom;
+ if (readerGen[bottom] != currentReaderGen)
+ {
+ Convert(bottomSlot);
+ }
+ bottomOrd = ords[bottom];
+ System.Diagnostics.Debug.Assert(bottomOrd >= 0);
+ System.Diagnostics.Debug.Assert(bottomOrd < lookup.Length);
+ bottomValue = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return values[slot]; }
+ }
+
+ public string[] GetValues()
+ {
+ return values;
+ }
+
+ public int BottomSlot
+ {
+ get { return bottomSlot; }
+ }
+
+ public string Field
+ {
+ get { return field; }
+ }
+ }
+
+ /// <summary>Sorts by field's natural String sort order. All
+ /// comparisons are done using String.compareTo, which is
+ /// slow for medium to large result sets but possibly
+ /// very fast for very small results sets.
+ /// </summary>
+ public sealed class StringValComparator:FieldComparator
+ {
+
+ private System.String[] values;
+ private System.String[] currentReaderValues;
+ private System.String field;
+ private System.String bottom;
+
+ internal StringValComparator(int numHits, System.String field)
+ {
+ values = new System.String[numHits];
+ this.field = field;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ System.String val1 = values[slot1];
+ System.String val2 = values[slot2];
+ if (val1 == null)
+ {
+ if (val2 == null)
+ {
+ return 0;
+ }
+ return - 1;
+ }
+ else if (val2 == null)
+ {
+ return 1;
+ }
+
+ return String.CompareOrdinal(val1, val2);
+ }
+
+ public override int CompareBottom(int doc)
+ {
+ System.String val2 = currentReaderValues[doc];
+ if (bottom == null)
+ {
+ if (val2 == null)
+ {
+ return 0;
+ }
+ return - 1;
+ }
+ else if (val2 == null)
+ {
+ return 1;
+ }
+ return String.CompareOrdinal(bottom, val2);
+ }
+
+ public override void Copy(int slot, int doc)
+ {
+ values[slot] = currentReaderValues[doc];
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStrings(reader, field);
+ }
+
+ public override void SetBottom(int bottom)
+ {
+ this.bottom = values[bottom];
+ }
+
+ public override IComparable this[int slot]
+ {
+ get { return values[slot]; }
+ }
+ }
+
+ protected internal static int BinarySearch(System.String[] a, System.String key)
+ {
+ return BinarySearch(a, key, 0, a.Length - 1);
+ }
+
+ protected internal static int BinarySearch(System.String[] a, System.String key, int low, int high)
+ {
+
+ while (low <= high)
+ {
+ int mid = Number.URShift((low + high), 1);
+ System.String midVal = a[mid];
+ int cmp;
+ if (midVal != null)
+ {
+ cmp = String.CompareOrdinal(midVal, key);
+ }
+ else
+ {
+ cmp = - 1;
+ }
+
+ if (cmp < 0)
+ low = mid + 1;
+ else if (cmp > 0)
+ high = mid - 1;
+ else
+ return mid;
+ }
+ return - (low + 1);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldComparatorSource.cs b/src/core/Search/FieldComparatorSource.cs
new file mode 100644
index 0000000..bb02fa9
--- /dev/null
+++ b/src/core/Search/FieldComparatorSource.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Provides a <see cref="FieldComparator" /> for custom field sorting.
+ ///
+ /// <b>NOTE:</b> This API is experimental and might change in
+ /// incompatible ways in the next release.
+ ///
+ /// </summary>
+ [Serializable]
+ public abstract class FieldComparatorSource
+ {
+
+ /// <summary> Creates a comparator for the field in the given index.
+ ///
+ /// </summary>
+ /// <param name="fieldname">Name of the field to create comparator for.
+ /// </param>
+ /// <returns> FieldComparator.
+ /// </returns>
+ /// <throws> IOException </throws>
+ /// <summary> If an error occurs reading the index.
+ /// </summary>
+ public abstract FieldComparator NewComparator(System.String fieldname, int numHits, int sortPos, bool reversed);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldDoc.cs b/src/core/Search/FieldDoc.cs
new file mode 100644
index 0000000..b273035
--- /dev/null
+++ b/src/core/Search/FieldDoc.cs
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: A ScoreDoc which also contains information about
+ /// how to sort the referenced document. In addition to the
+ /// document number and score, this object contains an array
+ /// of values for the document from the field(s) used to sort.
+ /// For example, if the sort criteria was to sort by fields
+ /// "a", "b" then "c", the <c>fields</c> object array
+ /// will have three elements, corresponding respectively to
+ /// the term values for the document in fields "a", "b" and "c".
+ /// The class of each element in the array will be either
+ /// Integer, Float or String depending on the type of values
+ /// in the terms of each field.
+ ///
+ /// <p/>Created: Feb 11, 2004 1:23:38 PM
+ ///
+ /// </summary>
+ /// <seealso cref="ScoreDoc"></seealso>
+ /// <seealso cref="TopFieldDocs"></seealso>
+ [Serializable]
+ public class FieldDoc:ScoreDoc
+ {
+
+ /// <summary>Expert: The values which are used to sort the referenced document.
+ /// The order of these will match the original sort criteria given by a
+ /// Sort object. Each Object will be either an Integer, Float or String,
+ /// depending on the type of values in the terms of the original field.
+ /// </summary>
+ /// <seealso cref="Sort">
+ /// </seealso>
+ /// <seealso cref="Searcher.Search(Query,Filter,int,Sort)">
+ /// </seealso>
+ [NonSerialized]
+ public System.IComparable[] fields;
+
+ /// <summary>Expert: Creates one of these objects with empty sort information. </summary>
+ public FieldDoc(int doc, float score):base(doc, score)
+ {
+ }
+
+ /// <summary>Expert: Creates one of these objects with the given sort information. </summary>
+ public FieldDoc(int doc, float score, System.IComparable[] fields):base(doc, score)
+ {
+ this.fields = fields;
+ }
+
+ // A convenience method for debugging.
+ public override System.String ToString()
+ {
+ // super.toString returns the doc and score information, so just add the
+ // fields information
+ System.Text.StringBuilder sb = new System.Text.StringBuilder(base.ToString());
+ sb.Append("[");
+ for (int i = 0; i < fields.Length; i++)
+ {
+ sb.Append(fields[i]).Append(", ");
+ }
+ sb.Length -= 2; // discard last ", "
+ sb.Append("]");
+ return sb.ToString();
+ }
+
+ #region SERIALIZATION
+ internal object[] fieldsClone = null;
+
+ [System.Runtime.Serialization.OnSerializing]
+ void OnSerializing(System.Runtime.Serialization.StreamingContext context)
+ {
+ if (fields == null) return;
+
+ // Copy "fields" to "fieldsClone"
+ fieldsClone = new object[fields.Length];
+ for (int i = 0; i < fields.Length; i++)
+ {
+ fieldsClone[i] = fields[i];
+ }
+ }
+
+ [System.Runtime.Serialization.OnDeserialized]
+ void OnDeserialized(System.Runtime.Serialization.StreamingContext context)
+ {
+ if (fieldsClone == null) return;
+
+ // Form "fields" from "fieldsClone"
+ fields = new IComparable[fieldsClone.Length];
+ for (int i = 0; i < fields.Length; i++)
+ {
+ fields[i] = (IComparable)fieldsClone[i];
+ }
+ }
+ #endregion
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldDocSortedHitQueue.cs b/src/core/Search/FieldDocSortedHitQueue.cs
new file mode 100644
index 0000000..46a450c
--- /dev/null
+++ b/src/core/Search/FieldDocSortedHitQueue.cs
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: Collects sorted results from Searchable's and collates them.
+ /// The elements put into this queue must be of type FieldDoc.
+ ///
+ /// <p/>Created: Feb 11, 2004 2:04:21 PM
+ ///
+ /// </summary>
+ /// <since> lucene 1.4
+ /// </since>
+ class FieldDocSortedHitQueue : PriorityQueue<FieldDoc>
+ {
+ internal volatile SortField[] fields = null;
+
+ // used in the case where the fields are sorted by locale
+ // based strings
+ internal volatile System.Globalization.CompareInfo[] collators;
+
+
+ /// <summary> Creates a hit queue sorted by the given list of fields.</summary>
+ /// <param name="size">The number of hits to retain. Must be greater than zero.</param>
+ internal FieldDocSortedHitQueue(int size)
+ {
+ Initialize(size);
+ }
+
+
+ /// <summary> Allows redefinition of sort fields if they are <c>null</c>.
+ /// This is to handle the case using ParallelMultiSearcher where the
+ /// original list contains AUTO and we don't know the actual sort
+ /// type until the values come back. The fields can only be set once.
+ /// This method is thread safe.
+ /// </summary>
+ /// <param name="fields"></param>
+ internal virtual void SetFields(SortField[] fields)
+ {
+ lock (this)
+ {
+ this.fields = fields;
+ this.collators = HasCollators(fields);
+ }
+ }
+
+ /// <summary>Returns the fields being used to sort. </summary>
+ internal virtual SortField[] GetFields()
+ {
+ return fields;
+ }
+
+
+ /// <summary>Returns an array of collators, possibly <c>null</c>. The collators
+ /// correspond to any SortFields which were given a specific locale.
+ /// </summary>
+ /// <param name="fields">Array of sort fields.</param>
+ /// <returns> Array, possibly <c>null</c>.</returns>
+ private System.Globalization.CompareInfo[] HasCollators(SortField[] fields)
+ {
+ if (fields == null)
+ return null;
+ System.Globalization.CompareInfo[] ret = new System.Globalization.CompareInfo[fields.Length];
+ for (int i = 0; i < fields.Length; ++i)
+ {
+ System.Globalization.CultureInfo locale = fields[i].Locale;
+ if (locale != null)
+ ret[i] = locale.CompareInfo;
+ }
+ return ret;
+ }
+
+
+ /// <summary> Returns whether <c>a</c> is less relevant than <c>b</c>.</summary>
+ /// <param name="docA">ScoreDoc</param>
+ /// <param name="docB">ScoreDoc</param>
+ /// <returns><c>true</c> if document <c>a</c> should be sorted after document <c>b</c>.</returns>
+ public override bool LessThan(FieldDoc docA, FieldDoc docB)
+ {
+ int n = fields.Length;
+ int c = 0;
+ for (int i = 0; i < n && c == 0; ++i)
+ {
+ int type = fields[i].Type;
+ if(type == SortField.STRING)
+ {
+ string s1 = (string) docA.fields[i];
+ string s2 = (string) docB.fields[i];
+ // null values need to be sorted first, because of how FieldCache.getStringIndex()
+ // works - in that routine, any documents without a value in the given field are
+ // put first. If both are null, the next SortField is used
+ if (s1 == null)
+ {
+ c = (s2 == null) ? 0 : -1;
+ }
+ else if (s2 == null)
+ {
+ c = 1;
+ }
+ else if (fields[i].Locale == null)
+ {
+ c = s1.CompareTo(s2);
+ }
+ else
+ {
+ c = collators[i].Compare(s1, s2);
+ }
+ }
+ else
+ {
+ c = docA.fields[i].CompareTo(docB.fields[i]);
+ if (type == SortField.SCORE)
+ {
+ c = -c;
+ }
+ }
+ if (fields[i].Reverse)
+ {
+ c = - c;
+ }
+ }
+
+ // avoid random sort order that could lead to duplicates (bug #31241):
+ if (c == 0)
+ return docA.Doc > docB.Doc;
+
+ return c > 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FieldValueHitQueue.cs b/src/core/Search/FieldValueHitQueue.cs
new file mode 100644
index 0000000..752a564
--- /dev/null
+++ b/src/core/Search/FieldValueHitQueue.cs
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: A hit queue for sorting by hits by terms in more than one field.
+ /// Uses <c>FieldCache.DEFAULT</c> for maintaining
+ /// internal term lookup tables.
+ ///
+ /// <b>NOTE:</b> This API is experimental and might change in
+ /// incompatible ways in the next release.
+ ///
+ /// </summary>
+ /// <seealso cref="Searcher.Search(Query,Filter,int,Sort)"></seealso>
+ /// <seealso cref="FieldCache"></seealso>
+ public abstract class FieldValueHitQueue : PriorityQueue<FieldValueHitQueue.Entry>
+ {
+ // had to change from internal to public, due to public accessability of FieldValueHitQueue
+ public /*internal*/ sealed class Entry : ScoreDoc
+ {
+ internal int slot;
+
+ internal Entry(int slot, int doc, float score)
+ : base(doc, score)
+ {
+
+ this.slot = slot;
+ }
+
+ public override System.String ToString()
+ {
+ return "slot:" + slot + " " + base.ToString();
+ }
+ }
+
+ /// <summary> An implementation of <see cref="FieldValueHitQueue" /> which is optimized in case
+ /// there is just one comparator.
+ /// </summary>
+ private sealed class OneComparatorFieldValueHitQueue : FieldValueHitQueue
+ {
+
+ private FieldComparator comparator;
+ private int oneReverseMul;
+
+ public OneComparatorFieldValueHitQueue(SortField[] fields, int size):base(fields)
+ {
+ if (fields.Length == 0)
+ {
+ throw new System.ArgumentException("Sort must contain at least one field");
+ }
+
+ SortField field = fields[0];
+ comparator = field.GetComparator(size, 0);
+ oneReverseMul = field.reverse?- 1:1;
+
+ comparators[0] = comparator;
+ reverseMul[0] = oneReverseMul;
+
+ Initialize(size);
+ }
+
+ /// <summary> Returns whether <c>a</c> is less relevant than <c>b</c>.</summary>
+ /// <param name="hitA">ScoreDoc</param>
+ /// <param name="hitB">ScoreDoc</param>
+ /// <returns><c>true</c> if document <c>a</c> should be sorted after document <c>b</c>.</returns>
+ public override bool LessThan(Entry hitA, Entry hitB)
+ {
+ System.Diagnostics.Debug.Assert(hitA != hitB);
+ System.Diagnostics.Debug.Assert(hitA.slot != hitB.slot);
+
+ int c = oneReverseMul * comparator.Compare(hitA.slot, hitB.slot);
+ if (c != 0)
+ {
+ return c > 0;
+ }
+
+ // avoid random sort order that could lead to duplicates (bug #31241):
+ return hitA.Doc > hitB.Doc;
+ }
+ }
+
+ /// <summary> An implementation of <see cref="FieldValueHitQueue" /> which is optimized in case
+ /// there is more than one comparator.
+ /// </summary>
+ private sealed class MultiComparatorsFieldValueHitQueue : FieldValueHitQueue
+ {
+
+ public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size):base(fields)
+ {
+
+ int numComparators = comparators.Length;
+ for (int i = 0; i < numComparators; ++i)
+ {
+ SortField field = fields[i];
+
+ reverseMul[i] = field.reverse?- 1:1;
+ comparators[i] = field.GetComparator(size, i);
+ }
+
+ Initialize(size);
+ }
+
+ public override bool LessThan(Entry hitA, Entry hitB)
+ {
+ System.Diagnostics.Debug.Assert(hitA != hitB);
+ System.Diagnostics.Debug.Assert(hitA.slot != hitB.slot);
+
+ int numComparators = comparators.Length;
+ for (int i = 0; i < numComparators; ++i)
+ {
+ int c = reverseMul[i] * comparators[i].Compare(hitA.slot, hitB.slot);
+ if (c != 0)
+ {
+ // Short circuit
+ return c > 0;
+ }
+ }
+
+ // avoid random sort order that could lead to duplicates (bug #31241):
+ return hitA.Doc > hitB.Doc;
+ }
+ }
+
+ // prevent instantiation and extension.
+ private FieldValueHitQueue(SortField[] fields)
+ {
+ // When we get here, fields.length is guaranteed to be > 0, therefore no
+ // need to check it again.
+
+ // All these are required by this class's API - need to return arrays.
+ // Therefore even in the case of a single comparator, create an array
+ // anyway.
+ this.fields = fields;
+ int numComparators = fields.Length;
+ comparators = new FieldComparator[numComparators];
+ reverseMul = new int[numComparators];
+ }
+
+ /// <summary> Creates a hit queue sorted by the given list of fields.
+ ///
+ /// <p/><b>NOTE</b>: The instances returned by this method
+ /// pre-allocate a full array of length <c>numHits</c>.
+ ///
+ /// </summary>
+ /// <param name="fields">SortField array we are sorting by in priority order (highest
+ /// priority first); cannot be <c>null</c> or empty
+ /// </param>
+ /// <param name="size">The number of hits to retain. Must be greater than zero.
+ /// </param>
+ /// <throws> IOException </throws>
+ public static FieldValueHitQueue Create(SortField[] fields, int size)
+ {
+
+ if (fields.Length == 0)
+ {
+ throw new System.ArgumentException("Sort must contain at least one field");
+ }
+
+ if (fields.Length == 1)
+ {
+ return new OneComparatorFieldValueHitQueue(fields, size);
+ }
+ else
+ {
+ return new MultiComparatorsFieldValueHitQueue(fields, size);
+ }
+ }
+
+ internal virtual FieldComparator[] GetComparators()
+ {
+ return comparators;
+ }
+
+ internal virtual int[] GetReverseMul()
+ {
+ return reverseMul;
+ }
+
+ /// <summary>Stores the sort criteria being used. </summary>
+ protected internal SortField[] fields;
+ protected internal FieldComparator[] comparators;
+ protected internal int[] reverseMul;
+
+ public abstract override bool LessThan(Entry a, Entry b);
+
+ /// <summary> Given a queue Entry, creates a corresponding FieldDoc
+ /// that contains the values used to sort the given document.
+ /// These values are not the raw values out of the index, but the internal
+ /// representation of them. This is so the given search hit can be collated by
+ /// a MultiSearcher with other search hits.
+ ///
+ /// </summary>
+ /// <param name="entry">The Entry used to create a FieldDoc
+ /// </param>
+ /// <returns> The newly created FieldDoc
+ /// </returns>
+ /// <seealso cref="Searchable.Search(Weight,Filter,int,Sort)">
+ /// </seealso>
+ internal virtual FieldDoc FillFields(Entry entry)
+ {
+ int n = comparators.Length;
+ System.IComparable[] fields = new System.IComparable[n];
+ for (int i = 0; i < n; ++i)
+ {
+ fields[i] = comparators[i][entry.slot];
+ }
+ //if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
+ return new FieldDoc(entry.Doc, entry.Score, fields);
+ }
+
+ /// <summary>Returns the SortFields being used by this hit queue. </summary>
+ internal virtual SortField[] GetFields()
+ {
+ return fields;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Filter.cs b/src/core/Search/Filter.cs
new file mode 100644
index 0000000..f4f1f24
--- /dev/null
+++ b/src/core/Search/Filter.cs
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using DocIdBitSet = Lucene.Net.Util.DocIdBitSet;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Abstract base class for restricting which documents may be returned during searching.</summary>
+ [Serializable]
+ public abstract class Filter
+ {
+ ///<summary>
+ /// <para>Creates a <see cref="DocIdSet" /> enumerating the documents that should be
+ /// permitted in search results. <b>NOTE:</b> null can be
+ /// returned if no documents are accepted by this Filter.</para>
+ /// <p/>
+ /// <para>Note: This method will be called once per segment in
+ /// the index during searching. The returned <see cref="DocIdSet" />
+ /// must refer to document IDs for that segment, not for
+ /// the top-level reader.</para>
+ ///</summary>
+ /// <returns> a DocIdSet that provides the documents which should be permitted or
+ /// prohibited in search results. <b>NOTE:</b> null can be returned if
+ /// no documents will be accepted by this Filter.
+ /// </returns>
+ /// <param name="reader">
+ /// A <see cref="IndexReader" /> instance opened on the index currently
+ /// searched on. Note, it is likely that the provided reader does not
+ /// represent the whole underlying index i.e. if the index has more than
+ /// one segment the given reader only represents a single segment.
+ /// </param>
+ /// <seealso cref="DocIdBitSet">
+ /// </seealso>
+ public abstract DocIdSet GetDocIdSet(IndexReader reader);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FilterManager.cs b/src/core/Search/FilterManager.cs
new file mode 100644
index 0000000..1afdc0b
--- /dev/null
+++ b/src/core/Search/FilterManager.cs
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Filter caching singleton. It can be used
+ /// to save filters locally for reuse.
+ /// This class makes it possble to cache Filters even when using RMI, as it
+ /// keeps the cache on the seaercher side of the RMI connection.
+ ///
+ /// Also could be used as a persistent storage for any filter as long as the
+ /// filter provides a proper hashCode(), as that is used as the key in the cache.
+ ///
+ /// The cache is periodically cleaned up from a separate thread to ensure the
+ /// cache doesn't exceed the maximum size.
+ /// </summary>
+ public class FilterManager
+ {
+
+ protected internal static FilterManager manager;
+
+ /// <summary>The default maximum number of Filters in the cache </summary>
+ protected internal const int DEFAULT_CACHE_CLEAN_SIZE = 100;
+ /// <summary>The default frequency of cache clenup </summary>
+ protected internal const long DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10;
+
+ /// <summary>The cache itself </summary>
+ protected internal IDictionary<int, FilterItem> cache;
+ /// <summary>Maximum allowed cache size </summary>
+ protected internal int cacheCleanSize;
+ /// <summary>Cache cleaning frequency </summary>
+ protected internal long cleanSleepTime;
+ /// <summary>Cache cleaner that runs in a separate thread </summary>
+ protected internal FilterCleaner internalFilterCleaner;
+
+ private static readonly object _staticSyncObj = new object();
+ public static FilterManager Instance
+ {
+ get
+ {
+ lock (_staticSyncObj)
+ {
+ return manager ?? (manager = new FilterManager());
+ }
+ }
+ }
+
+ /// <summary> Sets up the FilterManager singleton.</summary>
+ protected internal FilterManager()
+ {
+ cache = new HashMap<int, FilterItem>();
+ cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items
+ cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings
+
+ internalFilterCleaner = new FilterCleaner(this);
+ ThreadClass fcThread = new ThreadClass(new System.Threading.ThreadStart(internalFilterCleaner.Run));
+ // setto be a Daemon so it doesn't have to be stopped
+ fcThread.IsBackground = true;
+ fcThread.Start();
+ }
+
+ /// <summary> Sets the max size that cache should reach before it is cleaned up</summary>
+ /// <param name="value"> maximum allowed cache size </param>
+ public virtual void SetCacheSize(int value)
+ {
+ this.cacheCleanSize = value;
+ }
+
+ /// <summary> Sets the cache cleaning frequency in milliseconds.</summary>
+ /// <param name="value"> cleaning frequency in millioseconds </param>
+ public virtual void SetCleanThreadSleepTime(long value)
+ {
+ this.cleanSleepTime = value;
+ }
+
+ /// <summary> Returns the cached version of the filter. Allows the caller to pass up
+ /// a small filter but this will keep a persistent version around and allow
+ /// the caching filter to do its job.
+ ///
+ /// </summary>
+ /// <param name="filter">The input filter
+ /// </param>
+ /// <returns> The cached version of the filter
+ /// </returns>
+ public virtual Filter GetFilter(Filter filter)
+ {
+ lock (cache)
+ {
+ FilterItem fi = null;
+ fi = cache[filter.GetHashCode()];
+ if (fi != null)
+ {
+ fi.timestamp = System.DateTime.UtcNow.Ticks;
+ return fi.filter;
+ }
+ cache[filter.GetHashCode()] = new FilterItem(filter);
+ return filter;
+ }
+ }
+
+ /// <summary> Holds the filter and the last time the filter was used, to make LRU-based
+ /// cache cleaning possible.
+ /// TODO: Clean this up when we switch to Java 1.5
+ /// </summary>
+ protected internal class FilterItem
+ {
+ public Filter filter;
+ public long timestamp;
+
+ public FilterItem(Filter filter)
+ {
+ this.filter = filter;
+ this.timestamp = System.DateTime.UtcNow.Ticks;
+ }
+ }
+
+
+ /// <summary> Keeps the cache from getting too big.
+ /// If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread
+ /// to clean out the cache.
+ ///
+ /// The SortedSet sortedFilterItems is used only to sort the items from the cache,
+ /// so when it's time to clean up we have the TreeSet sort the FilterItems by
+ /// timestamp.
+ ///
+ /// Removes 1.5 * the numbers of items to make the cache smaller.
+ /// For example:
+ /// If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8.
+ /// This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently.
+ /// </summary>
+ protected internal class FilterCleaner : IThreadRunnable
+ {
+ private class FilterItemComparer : IComparer<KeyValuePair<int, FilterItem>>
+ {
+ #region IComparer<FilterItem> Members
+
+ public int Compare(KeyValuePair<int, FilterItem> x, KeyValuePair<int, FilterItem> y)
+ {
+ return x.Value.timestamp.CompareTo(y.Value.timestamp);
+ }
+
+ #endregion
+ }
+
+ private bool running = true;
+ private FilterManager manager;
+ private ISet<KeyValuePair<int, FilterItem>> sortedFilterItems;
+
+ public FilterCleaner(FilterManager enclosingInstance)
+ {
+ this.manager = enclosingInstance;
+ sortedFilterItems = new SortedSet<KeyValuePair<int, FilterItem>>(new FilterItemComparer());
+ }
+
+ public virtual void Run()
+ {
+ while (running)
+ {
+ // sort items from oldest to newest
+ // we delete the oldest filters
+ if (this.manager.cache.Count > this.manager.cacheCleanSize)
+ {
+ // empty the temporary set
+ sortedFilterItems.Clear();
+ lock (this.manager.cache)
+ {
+ sortedFilterItems.UnionWith(this.manager.cache);
+ int numToDelete = (int)((this.manager.cache.Count - this.manager.cacheCleanSize) * 1.5);
+
+ //delete all of the cache entries not used in a while
+ sortedFilterItems.ExceptWith(sortedFilterItems.Take(numToDelete).ToArray());
+ }
+ // empty the set so we don't tie up the memory
+ sortedFilterItems.Clear();
+ }
+ // take a nap
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64)10000 * this.manager.cleanSleepTime));
+
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FilteredDocIdSet.cs b/src/core/Search/FilteredDocIdSet.cs
new file mode 100644
index 0000000..cd590d3
--- /dev/null
+++ b/src/core/Search/FilteredDocIdSet.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Abstract decorator class for a DocIdSet implementation
+ /// that provides on-demand filtering/validation
+ /// mechanism on a given DocIdSet.
+ ///
+ /// <p/>
+ ///
+ /// Technically, this same functionality could be achieved
+ /// with ChainedFilter (under contrib/misc), however the
+ /// benefit of this class is it never materializes the full
+ /// bitset for the filter. Instead, the <see cref="Match" />
+ /// method is invoked on-demand, per docID visited during
+ /// searching. If you know few docIDs will be visited, and
+ /// the logic behind <see cref="Match" /> is relatively costly,
+ /// this may be a better way to filter than ChainedFilter.
+ ///
+ /// </summary>
+ /// <seealso cref="DocIdSet">
+ /// </seealso>
+
+ public abstract class FilteredDocIdSet:DocIdSet
+ {
+ private class AnonymousClassFilteredDocIdSetIterator:FilteredDocIdSetIterator
+ {
+ public AnonymousClassFilteredDocIdSetIterator(FilteredDocIdSet enclosingInstance) : base(null)
+ {
+ System.Diagnostics.Debug.Fail("Port issue:", "Lets see if we need this"); // {{Aroush-2.9}}
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(FilteredDocIdSet enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private FilteredDocIdSet enclosingInstance;
+ public FilteredDocIdSet Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFilteredDocIdSetIterator(FilteredDocIdSet enclosingInstance, Lucene.Net.Search.DocIdSetIterator Param1):base(Param1)
+ {
+ InitBlock(enclosingInstance);
+ }
+ public /*protected internal*/ override bool Match(int docid)
+ {
+ return Enclosing_Instance.Match(docid);
+ }
+ }
+ private DocIdSet _innerSet;
+
+ /// <summary> Constructor.</summary>
+ /// <param name="innerSet">Underlying DocIdSet
+ /// </param>
+ protected FilteredDocIdSet(DocIdSet innerSet)
+ {
+ _innerSet = innerSet;
+ }
+
+ /// <summary>This DocIdSet implementation is cacheable if the inner set is cacheable. </summary>
+ public override bool IsCacheable
+ {
+ get { return _innerSet.IsCacheable; }
+ }
+
+ /// <summary> Validation method to determine whether a docid should be in the result set.</summary>
+ /// <param name="docid">docid to be tested
+ /// </param>
+ /// <returns> true if input docid should be in the result set, false otherwise.
+ /// </returns>
+ public /*protected internal*/ abstract bool Match(int docid);
+
+ /// <summary> Implementation of the contract to build a DocIdSetIterator.</summary>
+ /// <seealso cref="DocIdSetIterator">
+ /// </seealso>
+ /// <seealso cref="FilteredDocIdSetIterator">
+ /// </seealso>
+ // @Override
+ public override DocIdSetIterator Iterator()
+ {
+ return new AnonymousClassFilteredDocIdSetIterator(this, _innerSet.Iterator());
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FilteredDocIdSetIterator.cs b/src/core/Search/FilteredDocIdSetIterator.cs
new file mode 100644
index 0000000..29e93b5
--- /dev/null
+++ b/src/core/Search/FilteredDocIdSetIterator.cs
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Abstract decorator class of a DocIdSetIterator
+ /// implementation that provides on-demand filter/validation
+ /// mechanism on an underlying DocIdSetIterator. See <see cref="FilteredDocIdSet" />
+ ///.
+ /// </summary>
+ public abstract class FilteredDocIdSetIterator:DocIdSetIterator
+ {
+ protected internal DocIdSetIterator internalInnerIter;
+ private int doc;
+
+ /// <summary> Constructor.</summary>
+ /// <param name="innerIter">Underlying DocIdSetIterator.
+ /// </param>
+ protected FilteredDocIdSetIterator(DocIdSetIterator innerIter)
+ {
+ if (innerIter == null)
+ {
+ throw new System.ArgumentException("null iterator");
+ }
+ internalInnerIter = innerIter;
+ doc = - 1;
+ }
+
+ /// <summary> Validation method to determine whether a docid should be in the result set.</summary>
+ /// <param name="doc">docid to be tested
+ /// </param>
+ /// <returns> true if input docid should be in the result set, false otherwise.
+ /// </returns>
+ /// <seealso cref="FilteredDocIdSetIterator(DocIdSetIterator)">
+ /// </seealso>
+ public abstract /*protected internal*/ bool Match(int doc);
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ while ((doc = internalInnerIter.NextDoc()) != NO_MORE_DOCS)
+ {
+ if (Match(doc))
+ {
+ return doc;
+ }
+ }
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ doc = internalInnerIter.Advance(target);
+ if (doc != NO_MORE_DOCS)
+ {
+ if (Match(doc))
+ {
+ return doc;
+ }
+ else
+ {
+ while ((doc = internalInnerIter.NextDoc()) != NO_MORE_DOCS)
+ {
+ if (Match(doc))
+ {
+ return doc;
+ }
+ }
+ return doc;
+ }
+ }
+ return doc;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FilteredQuery.cs b/src/core/Search/FilteredQuery.cs
new file mode 100644
index 0000000..d60a75b
--- /dev/null
+++ b/src/core/Search/FilteredQuery.cs
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+
+ /// <summary> A query that applies a filter to the results of another query.
+ ///
+ /// <p/>Note: the bits are retrieved from the filter each time this
+ /// query is used in a search - use a CachingWrapperFilter to avoid
+ /// regenerating the bits every time.
+ ///
+ /// <p/>Created: Apr 20, 2004 8:58:29 AM
+ ///
+ /// </summary>
+ /// <since>1.4</since>
+ /// <seealso cref="CachingWrapperFilter"/>
+ [Serializable]
+ public class FilteredQuery:Query
+ {
+ [Serializable]
+ private class AnonymousClassWeight:Weight
+ {
+ public AnonymousClassWeight(Lucene.Net.Search.Weight weight, Lucene.Net.Search.Similarity similarity, FilteredQuery enclosingInstance)
+ {
+ InitBlock(weight, similarity, enclosingInstance);
+ }
+ private class AnonymousClassScorer:Scorer
+ {
+ private void InitBlock(Lucene.Net.Search.Scorer scorer, Lucene.Net.Search.DocIdSetIterator docIdSetIterator, AnonymousClassWeight enclosingInstance)
+ {
+ this.scorer = scorer;
+ this.docIdSetIterator = docIdSetIterator;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Search.Scorer scorer;
+ private Lucene.Net.Search.DocIdSetIterator docIdSetIterator;
+ private AnonymousClassWeight enclosingInstance;
+ public AnonymousClassWeight Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassScorer(Lucene.Net.Search.Scorer scorer, Lucene.Net.Search.DocIdSetIterator docIdSetIterator, AnonymousClassWeight enclosingInstance, Lucene.Net.Search.Similarity Param1):base(Param1)
+ {
+ InitBlock(scorer, docIdSetIterator, enclosingInstance);
+ }
+
+ private int doc = - 1;
+
+ private int AdvanceToCommon(int scorerDoc, int disiDoc)
+ {
+ while (scorerDoc != disiDoc)
+ {
+ if (scorerDoc < disiDoc)
+ {
+ scorerDoc = scorer.Advance(disiDoc);
+ }
+ else
+ {
+ disiDoc = docIdSetIterator.Advance(scorerDoc);
+ }
+ }
+ return scorerDoc;
+ }
+
+ public override int NextDoc()
+ {
+ int scorerDoc, disiDoc;
+ return doc = (disiDoc = docIdSetIterator.NextDoc()) != NO_MORE_DOCS && (scorerDoc = scorer.NextDoc()) != NO_MORE_DOCS && AdvanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS?scorer.DocID():NO_MORE_DOCS;
+ }
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ int disiDoc, scorerDoc;
+ return doc = (disiDoc = docIdSetIterator.Advance(target)) != NO_MORE_DOCS && (scorerDoc = scorer.Advance(disiDoc)) != NO_MORE_DOCS && AdvanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS?scorer.DocID():NO_MORE_DOCS;
+ }
+
+ public override float Score()
+ {
+ return Enclosing_Instance.Enclosing_Instance.Boost * scorer.Score();
+ }
+ }
+ private void InitBlock(Lucene.Net.Search.Weight weight, Lucene.Net.Search.Similarity similarity, FilteredQuery enclosingInstance)
+ {
+ this.weight = weight;
+ this.similarity = similarity;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Search.Weight weight;
+ private Lucene.Net.Search.Similarity similarity;
+ private FilteredQuery enclosingInstance;
+ public FilteredQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private float value_Renamed;
+
+ // pass these methods through to enclosed query's weight
+
+ public override float Value
+ {
+ get { return value_Renamed; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ return weight.GetSumOfSquaredWeights()*Enclosing_Instance.Boost*Enclosing_Instance.Boost;
+ }
+
+ public override void Normalize(float v)
+ {
+ weight.Normalize(v);
+ value_Renamed = weight.Value * Enclosing_Instance.Boost;
+ }
+ public override Explanation Explain(IndexReader ir, int i)
+ {
+ Explanation inner = weight.Explain(ir, i);
+ if (Enclosing_Instance.Boost != 1)
+ {
+ Explanation preBoost = inner;
+ inner = new Explanation(inner.Value * Enclosing_Instance.Boost, "product of:");
+ inner.AddDetail(new Explanation(Enclosing_Instance.Boost, "boost"));
+ inner.AddDetail(preBoost);
+ }
+ Filter f = Enclosing_Instance.filter;
+ DocIdSet docIdSet = f.GetDocIdSet(ir);
+ DocIdSetIterator docIdSetIterator = docIdSet == null?DocIdSet.EMPTY_DOCIDSET.Iterator():docIdSet.Iterator();
+ if (docIdSetIterator == null)
+ {
+ docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.Iterator();
+ }
+ if (docIdSetIterator.Advance(i) == i)
+ {
+ return inner;
+ }
+ else
+ {
+ Explanation result = new Explanation(0.0f, "failure to match filter: " + f.ToString());
+ result.AddDetail(inner);
+ return result;
+ }
+ }
+
+ // return this query
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ // return a filtering scorer
+ public override Scorer Scorer(IndexReader indexReader, bool scoreDocsInOrder, bool topScorer)
+ {
+ Scorer scorer = weight.Scorer(indexReader, true, false);
+ if (scorer == null)
+ {
+ return null;
+ }
+ DocIdSet docIdSet = Enclosing_Instance.filter.GetDocIdSet(indexReader);
+ if (docIdSet == null)
+ {
+ return null;
+ }
+ DocIdSetIterator docIdSetIterator = docIdSet.Iterator();
+ if (docIdSetIterator == null)
+ {
+ return null;
+ }
+
+ return new AnonymousClassScorer(scorer, docIdSetIterator, this, similarity);
+ }
+ }
+
+ internal Query query;
+ internal Filter filter;
+
+ /// <summary> Constructs a new query which applies a filter to the results of the original query.
+ /// Filter.getDocIdSet() will be called every time this query is used in a search.
+ /// </summary>
+ /// <param name="query"> Query to be filtered, cannot be <c>null</c>.
+ /// </param>
+ /// <param name="filter">Filter to apply to query results, cannot be <c>null</c>.
+ /// </param>
+ public FilteredQuery(Query query, Filter filter)
+ {
+ this.query = query;
+ this.filter = filter;
+ }
+
+ /// <summary> Returns a Weight that applies the filter to the enclosed query's Weight.
+ /// This is accomplished by overriding the Scorer returned by the Weight.
+ /// </summary>
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ Weight weight = query.CreateWeight(searcher);
+ Similarity similarity = query.GetSimilarity(searcher);
+ return new AnonymousClassWeight(weight, similarity, this);
+ }
+
+ /// <summary>Rewrites the wrapped query. </summary>
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query rewritten = query.Rewrite(reader);
+ if (rewritten != query)
+ {
+ FilteredQuery clone = (FilteredQuery) this.Clone();
+ clone.query = rewritten;
+ return clone;
+ }
+ else
+ {
+ return this;
+ }
+ }
+
+ public virtual Query Query
+ {
+ get { return query; }
+ }
+
+ public virtual Filter Filter
+ {
+ get { return filter; }
+ }
+
+ // inherit javadoc
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ Query.ExtractTerms(terms);
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String s)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("filtered(");
+ buffer.Append(query.ToString(s));
+ buffer.Append(")->");
+ buffer.Append(filter);
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (o is FilteredQuery)
+ {
+ FilteredQuery fq = (FilteredQuery) o;
+ return (query.Equals(fq.query) && filter.Equals(fq.filter) && Boost == fq.Boost);
+ }
+ return false;
+ }
+
+ /// <summary>Returns a hash code value for this object. </summary>
+ public override int GetHashCode()
+ {
+ return query.GetHashCode() ^ filter.GetHashCode() + System.Convert.ToInt32(Boost);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FilteredTermEnum.cs b/src/core/Search/FilteredTermEnum.cs
new file mode 100644
index 0000000..8c6e428
--- /dev/null
+++ b/src/core/Search/FilteredTermEnum.cs
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Term = Lucene.Net.Index.Term;
+using TermEnum = Lucene.Net.Index.TermEnum;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Abstract class for enumerating a subset of all terms.
+ /// <p/>Term enumerations are always ordered by Term.compareTo(). Each term in
+ /// the enumeration is greater than all that precede it.
+ /// </summary>
+ public abstract class FilteredTermEnum:TermEnum
+ {
+ /// <summary>the current term </summary>
+ protected internal Term currentTerm = null;
+
+ /// <summary>the delegate enum - to set this member use <see cref="SetEnum" /> </summary>
+ protected internal TermEnum actualEnum = null;
+
+ protected FilteredTermEnum()
+ {
+ }
+
+ /// <summary>Equality compare on the term </summary>
+ protected internal abstract bool TermCompare(Term term);
+
+ /// <summary>Equality measure on the term </summary>
+ public abstract float Difference();
+
+ /// <summary>Indicates the end of the enumeration has been reached </summary>
+ public abstract bool EndEnum();
+
+ private bool isDisposed;
+
+ /// <summary> use this method to set the actual TermEnum (e.g. in ctor),
+ /// it will be automatically positioned on the first matching term.
+ /// </summary>
+ protected internal virtual void SetEnum(TermEnum actualEnum)
+ {
+ this.actualEnum = actualEnum;
+ // Find the first term that matches
+ Term term = actualEnum.Term;
+ if (term != null && TermCompare(term))
+ currentTerm = term;
+ else
+ Next();
+ }
+
+ /// <summary> Returns the docFreq of the current Term in the enumeration.
+ /// Returns -1 if no Term matches or all terms have been enumerated.
+ /// </summary>
+ public override int DocFreq()
+ {
+ if (currentTerm == null)
+ return - 1;
+ System.Diagnostics.Debug.Assert(actualEnum != null);
+ return actualEnum.DocFreq();
+ }
+
+ /// <summary>Increments the enumeration to the next element. True if one exists. </summary>
+ public override bool Next()
+ {
+ if (actualEnum == null)
+ return false; // the actual enumerator is not initialized!
+ currentTerm = null;
+ while (currentTerm == null)
+ {
+ if (EndEnum())
+ return false;
+ if (actualEnum.Next())
+ {
+ Term term = actualEnum.Term;
+ if (TermCompare(term))
+ {
+ currentTerm = term;
+ return true;
+ }
+ }
+ else
+ return false;
+ }
+ currentTerm = null;
+ return false;
+ }
+
+ /// <summary>Returns the current Term in the enumeration.
+ /// Returns null if no Term matches or all terms have been enumerated.
+ /// </summary>
+ public override Term Term
+ {
+ get { return currentTerm; }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (actualEnum != null)
+ actualEnum.Close();
+ currentTerm = null;
+ actualEnum = null;
+ }
+
+ isDisposed = true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/ByteFieldSource.cs b/src/core/Search/Function/ByteFieldSource.cs
new file mode 100644
index 0000000..edebbdb
--- /dev/null
+++ b/src/core/Search/Function/ByteFieldSource.cs
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: obtains single byte field values from the
+ /// <see cref="Lucene.Net.Search.FieldCache">FieldCache</see>
+ /// using <c>getBytes()</c> and makes those values
+ /// available as other numeric types, casting as needed.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Search.Function.FieldCacheSource"> for requirements"
+ /// on the field.
+ ///
+ /// <p/><b>NOTE</b>: with the switch in 2.9 to segment-based
+ /// searching, if <see cref="FieldCacheSource.GetValues" /> is invoked with a
+ /// composite (multi-segment) reader, this can easily cause
+ /// double RAM usage for the values in the FieldCache. It's
+ /// best to switch your application to pass only atomic
+ /// (single segment) readers to this API.<p/>
+ /// </seealso>
+ [Serializable]
+ public class ByteFieldSource:FieldCacheSource
+ {
+ private class AnonymousClassDocValues:DocValues
+ {
+ public AnonymousClassDocValues(sbyte[] arr, ByteFieldSource enclosingInstance)
+ {
+ InitBlock(arr, enclosingInstance);
+ }
+ private void InitBlock(sbyte[] arr, ByteFieldSource enclosingInstance)
+ {
+ this.arr = arr;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private sbyte[] arr;
+ private ByteFieldSource enclosingInstance;
+ public ByteFieldSource Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.floatVal(int) */
+ public override float FloatVal(int doc)
+ {
+ return (float) arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.intVal(int) */
+ public override int IntVal(int doc)
+ {
+ return arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.toString(int) */
+ public override System.String ToString(int doc)
+ {
+ return Enclosing_Instance.Description() + '=' + IntVal(doc);
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.getInnerArray() */
+
+ protected internal override object InnerArray
+ {
+ get { return arr; }
+ }
+ }
+ private Lucene.Net.Search.ByteParser parser;
+
+ /// <summary> Create a cached byte field source with default string-to-byte parser. </summary>
+ public ByteFieldSource(System.String field):this(field, null)
+ {
+ }
+
+ /// <summary> Create a cached byte field source with a specific string-to-byte parser. </summary>
+ public ByteFieldSource(System.String field, Lucene.Net.Search.ByteParser parser):base(field)
+ {
+ this.parser = parser;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.description() */
+ public override System.String Description()
+ {
+ return "byte(" + base.Description() + ')';
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.getCachedValues(Lucene.Net.Search.FieldCache, java.lang.String, Lucene.Net.Index.IndexReader) */
+ public override DocValues GetCachedFieldValues(FieldCache cache, System.String field, IndexReader reader)
+ {
+ sbyte[] arr = cache.GetBytes(reader, field, parser);
+ return new AnonymousClassDocValues(arr, this);
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceEquals(Lucene.Net.Search.Function.FieldCacheSource) */
+ public override bool CachedFieldSourceEquals(FieldCacheSource o)
+ {
+ if (o.GetType() != typeof(ByteFieldSource))
+ {
+ return false;
+ }
+ ByteFieldSource other = (ByteFieldSource) o;
+ return this.parser == null?other.parser == null:this.parser.GetType() == other.parser.GetType();
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceHashCode() */
+ public override int CachedFieldSourceHashCode()
+ {
+ return parser == null?typeof(System.SByte).GetHashCode():parser.GetType().GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/CustomScoreProvider.cs b/src/core/Search/Function/CustomScoreProvider.cs
new file mode 100644
index 0000000..630edf4
--- /dev/null
+++ b/src/core/Search/Function/CustomScoreProvider.cs
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search.Function
+{
+ /// <summary>
+ /// An instance of this subclass should be returned by
+ /// <see cref="CustomScoreQuery.GetCustomScoreProvider" />, if you want
+ /// to modify the custom score calculation of a <see cref="CustomScoreQuery" />.
+ /// <para>Since Lucene 2.9, queries operate on each segment of an Index separately,
+ /// so overriding the similar (now deprecated) methods in <see cref="CustomScoreQuery" />
+ /// is no longer suitable, as the supplied <c>doc</c> ID is per-segment
+ /// and without knowledge of the IndexReader you cannot access the
+ /// document or <see cref="FieldCache" />.</para>
+ ///
+ /// @lucene.experimental
+ /// @since 2.9.2
+ /// </summary>
+ public class CustomScoreProvider
+ {
+
+ protected IndexReader reader;
+
+ /// <summary>
+ /// Creates a new instance of the provider class for the given IndexReader.
+ /// </summary>
+ public CustomScoreProvider(IndexReader reader)
+ {
+ this.reader = reader;
+ }
+
+ /// <summary>
+ /// * Compute a custom score by the subQuery score and a number of
+ /// ValueSourceQuery scores.
+ /// <p/>
+ /// Subclasses can override this method to modify the custom score.
+ /// <p/>
+ /// If your custom scoring is different than the default herein you
+ /// should override at least one of the two customScore() methods.
+ /// If the number of ValueSourceQueries is always &lt; 2 it is
+ /// sufficient to override the other
+ /// <see cref="CustomScore(int, float, float)">CustomScore()</see>
+ /// method, which is simpler.
+ /// <p/>
+ /// The default computation herein is a multiplication of given scores:
+ /// <pre>
+ /// ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
+ /// </pre>
+ /// </summary>
+ /// <param name="doc">id of scored doc</param>
+ /// <param name="subQueryScore">score of that doc by the subQuery</param>
+ /// <param name="valSrcScores">scores of that doc by the ValueSourceQuery</param>
+ /// <returns>custom score</returns>
+ public virtual float CustomScore(int doc, float subQueryScore, float[] valSrcScores)
+ {
+ if (valSrcScores.Length == 1)
+ {
+ return CustomScore(doc, subQueryScore, valSrcScores[0]);
+ }
+ if (valSrcScores.Length == 0)
+ {
+ return CustomScore(doc, subQueryScore, 1);
+ }
+ float score = subQueryScore;
+ for (int i = 0; i < valSrcScores.Length; i++)
+ {
+ score *= valSrcScores[i];
+ }
+ return score;
+ }
+
+ /// <summary>
+ /// Compute a custom score by the subQuery score and the ValueSourceQuery score.
+ /// <p/>
+ /// Subclasses can override this method to modify the custom score.
+ /// <p/>
+ /// If your custom scoring is different than the default herein you
+ /// should override at least one of the two customScore() methods.
+ /// If the number of ValueSourceQueries is always &lt; 2 it is
+ /// sufficient to override this customScore() method, which is simpler.
+ /// <p/>
+ /// The default computation herein is a multiplication of the two scores:
+ /// <pre>
+ /// ModifiedScore = subQueryScore * valSrcScore
+ /// </pre>
+ /// </summary>
+ /// <param name="doc">id of scored doc</param>
+ /// <param name="subQueryScore">score of that doc by the subQuery</param>
+ /// <param name="valSrcScore">score of that doc by the ValueSourceQuery</param>
+ /// <returns>custom score</returns>
+ public virtual float CustomScore(int doc, float subQueryScore, float valSrcScore)
+ {
+ return subQueryScore * valSrcScore;
+ }
+
+ /// <summary>
+ /// Explain the custom score.
+ /// Whenever overriding <see cref="CustomScore(int, float, float[])" />,
+ /// this method should also be overridden to provide the correct explanation
+ /// for the part of the custom scoring.
+ /// </summary>
+ /// <param name="doc">doc being explained</param>
+ /// <param name="subQueryExpl">explanation for the sub-query part</param>
+ /// <param name="valSrcExpls">explanation for the value source part</param>
+ /// <returns>an explanation for the custom score</returns>
+ public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation[] valSrcExpls)
+ {
+ if (valSrcExpls.Length == 1)
+ {
+ return CustomExplain(doc, subQueryExpl, valSrcExpls[0]);
+ }
+ if (valSrcExpls.Length == 0)
+ {
+ return subQueryExpl;
+ }
+ float valSrcScore = 1;
+ for (int i = 0; i < valSrcExpls.Length; i++)
+ {
+ valSrcScore *= valSrcExpls[i].Value;
+ }
+ Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:");
+ exp.AddDetail(subQueryExpl);
+ for (int i = 0; i < valSrcExpls.Length; i++)
+ {
+ exp.AddDetail(valSrcExpls[i]);
+ }
+ return exp;
+ }
+
+ /// <summary>
+ /// Explain the custom score.
+ /// Whenever overriding <see cref="CustomScore(int, float, float)" />,
+ /// this method should also be overridden to provide the correct explanation
+ /// for the part of the custom scoring.
+ ///
+ /// </summary>
+ /// <param name="doc">doc being explained</param>
+ /// <param name="subQueryExpl">explanation for the sub-query part</param>
+ /// <param name="valSrcExpl">explanation for the value source part</param>
+ /// <returns>an explanation for the custom score</returns>
+ public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl)
+ {
+ float valSrcScore = 1;
+ if (valSrcExpl != null)
+ {
+ valSrcScore *= valSrcExpl.Value;
+ }
+ Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:");
+ exp.AddDetail(subQueryExpl);
+ exp.AddDetail(valSrcExpl);
+ return exp;
+ }
+
+ }
+}
diff --git a/src/core/Search/Function/CustomScoreQuery.cs b/src/core/Search/Function/CustomScoreQuery.cs
new file mode 100644
index 0000000..cd6f2b2
--- /dev/null
+++ b/src/core/Search/Function/CustomScoreQuery.cs
@@ -0,0 +1,579 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using ComplexExplanation = Lucene.Net.Search.ComplexExplanation;
+using Explanation = Lucene.Net.Search.Explanation;
+using Query = Lucene.Net.Search.Query;
+using Scorer = Lucene.Net.Search.Scorer;
+using Searcher = Lucene.Net.Search.Searcher;
+using Similarity = Lucene.Net.Search.Similarity;
+using Weight = Lucene.Net.Search.Weight;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Query that sets document score as a programmatic function of several (sub) scores:
+ /// <list type="bullet">
+ /// <item>the score of its subQuery (any query)</item>
+ /// <item>(optional) the score of its ValueSourceQuery (or queries).
+ /// For most simple/convenient use cases this query is likely to be a
+ /// <see cref="Lucene.Net.Search.Function.FieldScoreQuery">FieldScoreQuery</see></item>
+ /// </list>
+ /// Subclasses can modify the computation by overriding <see cref="GetCustomScoreProvider" />.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ /// </summary>
+ [Serializable]
+ public class CustomScoreQuery:Query, System.ICloneable
+ {
+
+ private Query subQuery;
+ private ValueSourceQuery[] valSrcQueries; // never null (empty array if there are no valSrcQueries).
+ private bool strict = false; // if true, valueSource part of query does not take part in weights normalization.
+
+ /// <summary> Create a CustomScoreQuery over input subQuery.</summary>
+ /// <param name="subQuery">the sub query whose scored is being customed. Must not be null.
+ /// </param>
+ public CustomScoreQuery(Query subQuery):this(subQuery, new ValueSourceQuery[0])
+ {
+ }
+
+ /// <summary> Create a CustomScoreQuery over input subQuery and a <see cref="ValueSourceQuery" />.</summary>
+ /// <param name="subQuery">the sub query whose score is being customed. Must not be null.
+ /// </param>
+ /// <param name="valSrcQuery">a value source query whose scores are used in the custom score
+ /// computation. For most simple/convineient use case this would be a
+ /// <see cref="Lucene.Net.Search.Function.FieldScoreQuery">FieldScoreQuery</see>.
+ /// This parameter is optional - it can be null or even an empty array.
+ /// </param>
+ public CustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery):this(subQuery, valSrcQuery != null?new ValueSourceQuery[]{valSrcQuery}:new ValueSourceQuery[0])
+ {
+ }
+
+ /// <summary> Create a CustomScoreQuery over input subQuery and a <see cref="ValueSourceQuery" />.</summary>
+ /// <param name="subQuery">the sub query whose score is being customized. Must not be null.
+ /// </param>
+ /// <param name="valSrcQueries">value source queries whose scores are used in the custom score
+ /// computation. For most simple/convenient use case these would be
+ /// <see cref="Lucene.Net.Search.Function.FieldScoreQuery">FieldScoreQueries</see>.
+ /// This parameter is optional - it can be null or even an empty array.
+ /// </param>
+ public CustomScoreQuery(Query subQuery, params ValueSourceQuery[] valSrcQueries)
+ {
+ this.subQuery = subQuery;
+ this.valSrcQueries = valSrcQueries != null?valSrcQueries:new ValueSourceQuery[0];
+ if (subQuery == null)
+ throw new System.ArgumentException("<subquery> must not be null!");
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Query.rewrite(Lucene.Net.Index.IndexReader) */
+ public override Query Rewrite(IndexReader reader)
+ {
+ CustomScoreQuery clone = null;
+
+ Query sq = subQuery.Rewrite(reader);
+ if (sq != subQuery)
+ {
+ clone = (CustomScoreQuery)Clone();
+ clone.subQuery = sq;
+ }
+
+ for (int i = 0; i < valSrcQueries.Length; i++)
+ {
+ ValueSourceQuery v = (ValueSourceQuery)valSrcQueries[i].Rewrite(reader);
+ if (v != valSrcQueries[i])
+ {
+ if (clone == null) clone = (CustomScoreQuery)Clone();
+ clone.valSrcQueries[i] = v;
+ }
+ }
+
+ return (clone == null) ? this : clone;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Query.extractTerms(java.util.Set) */
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ subQuery.ExtractTerms(terms);
+ for (int i = 0; i < valSrcQueries.Length; i++)
+ {
+ valSrcQueries[i].ExtractTerms(terms);
+ }
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Query.clone() */
+ public override System.Object Clone()
+ {
+ CustomScoreQuery clone = (CustomScoreQuery) base.Clone();
+ clone.subQuery = (Query) subQuery.Clone();
+ clone.valSrcQueries = new ValueSourceQuery[valSrcQueries.Length];
+ for (int i = 0; i < valSrcQueries.Length; i++)
+ {
+ clone.valSrcQueries[i] = (ValueSourceQuery) valSrcQueries[i].Clone();
+ }
+ return clone;
+ }
+
+ /* (non-Javadoc) <see cref="Lucene.Net.Search.Query.toString(java.lang.String) */
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder(Name()).Append("(");
+ sb.Append(subQuery.ToString(field));
+ for (int i = 0; i < valSrcQueries.Length; i++)
+ {
+ sb.Append(", ").Append(valSrcQueries[i].ToString(field));
+ }
+ sb.Append(")");
+ sb.Append(strict?" STRICT":"");
+ return sb.ToString() + ToStringUtils.Boost(Boost);
+ }
+
+ /// <summary>Returns true if <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (GetType() != o.GetType())
+ {
+ return false;
+ }
+ CustomScoreQuery other = (CustomScoreQuery) o;
+ if (this.Boost != other.Boost ||
+ !this.subQuery.Equals(other.subQuery) ||
+ this.strict != other.strict ||
+ this.valSrcQueries.Length != other.valSrcQueries.Length)
+ {
+ return false;
+ }
+
+ // SequenceEqual should properly mimic java's Array.equals()
+ return valSrcQueries.SequenceEqual(other.valSrcQueries);
+ }
+
+ /// <summary>Returns a hash code value for this object. </summary>
+ public override int GetHashCode()
+ {
+ int valSrcHash = 0;
+ for (int i = 0; i < valSrcQueries.Length; i++)
+ {
+ // TODO: Simplify this hash code generation
+ valSrcHash += valSrcQueries[i].GetHashCode();
+ }
+ return (GetType().GetHashCode() + subQuery.GetHashCode() + valSrcHash) ^
+ BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ (strict ? 1234 : 4321);
+
+ }
+
+ /// <summary>
+ /// Returns a <see cref="CustomScoreProvider" /> that calculates the custom scores
+ /// for the given <see cref="IndexReader" />. The default implementation returns a default
+ /// implementation as specified in the docs of <see cref="CustomScoreProvider" />.
+ /// </summary>
+ protected virtual CustomScoreProvider GetCustomScoreProvider(IndexReader reader)
+ {
+ // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider
+ return new AnonymousCustomScoreProvider(this, reader);
+ }
+
+ class AnonymousCustomScoreProvider : CustomScoreProvider
+ {
+ CustomScoreQuery parent;
+ public AnonymousCustomScoreProvider(CustomScoreQuery parent, IndexReader reader) : base(reader)
+ {
+ this.parent = parent;
+ }
+ public override float CustomScore(int doc, float subQueryScore, float[] valSrcScores)
+ {
+ return parent.CustomScore(doc, subQueryScore, valSrcScores);
+ }
+
+ public override float CustomScore(int doc, float subQueryScore, float valSrcScore)
+ {
+ return parent.CustomScore(doc, subQueryScore, valSrcScore);
+ }
+
+ public override Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation[] valSrcExpls)
+ {
+ return parent.CustomExplain(doc, subQueryExpl, valSrcExpls);
+ }
+
+ public override Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl)
+ {
+ return parent.CustomExplain(doc, subQueryExpl, valSrcExpl);
+ }
+ }
+
+ /// <summary>
+ /// Compute a custom score by the subQuery score and a number of
+ /// ValueSourceQuery scores.
+ ///
+ /// The doc is relative to the current reader, which is
+ /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9).
+ /// Please override <see cref="GetCustomScoreProvider" /> and return a subclass
+ /// of <see cref="CustomScoreProvider" /> for the given <see cref="IndexReader" />.
+ /// see CustomScoreProvider#customScore(int,float,float[])
+ /// </summary>
+ [Obsolete("Will be removed in Lucene 3.1")]
+ public virtual float CustomScore(int doc, float subQueryScore, float[] valSrcScores)
+ {
+ if (valSrcScores.Length == 1)
+ {
+ return CustomScore(doc, subQueryScore, valSrcScores[0]);
+ }
+ if (valSrcScores.Length == 0)
+ {
+ return CustomScore(doc, subQueryScore, 1);
+ }
+ float score = subQueryScore;
+ for (int i = 0; i < valSrcScores.Length; i++)
+ {
+ score *= valSrcScores[i];
+ }
+ return score;
+ }
+
+ /// <summary> Compute a custom score by the subQuery score and the ValueSourceQuery score.
+ ///
+ /// The doc is relative to the current reader, which is
+ /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9).
+ /// Please override <see cref="GetCustomScoreProvider" /> and return a subclass
+ /// of <see cref="CustomScoreProvider" /> for the given <see cref="IndexReader" />.
+ /// </summary>
+ /// <seealso cref="CustomScoreProvider.CustomScore(int,float,float)" />
+ [Obsolete("Will be removed in Lucene 3.1")]
+ public virtual float CustomScore(int doc, float subQueryScore, float valSrcScore)
+ {
+ return subQueryScore * valSrcScore;
+ }
+
+
+
+ /// <summary> Explain the custom score.
+ ///
+ /// The doc is relative to the current reader, which is
+ /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9).
+ /// Please override <see cref="GetCustomScoreProvider(IndexReader)" /> and return a subclass
+ /// of <see cref="CustomScoreProvider" /> for the given <see cref="IndexReader" />.
+ /// </summary>
+ [Obsolete("Will be removed in Lucene 3.1")]
+ public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation[] valSrcExpls)
+ {
+ if (valSrcExpls.Length == 1)
+ {
+ return CustomExplain(doc, subQueryExpl, valSrcExpls[0]);
+ }
+ if (valSrcExpls.Length == 0)
+ {
+ return subQueryExpl;
+ }
+ float valSrcScore = 1;
+ for (int i = 0; i < valSrcExpls.Length; i++)
+ {
+ valSrcScore *= valSrcExpls[i].Value;
+ }
+ Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:");
+ exp.AddDetail(subQueryExpl);
+ for (int i = 0; i < valSrcExpls.Length; i++)
+ {
+ exp.AddDetail(valSrcExpls[i]);
+ }
+ return exp;
+ }
+
+ /// <summary> Explain the custom score.
+ /// The doc is relative to the current reader, which is
+ /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9).
+ /// Please override <see cref="GetCustomScoreProvider" /> and return a subclass
+ /// of <see cref="CustomScoreProvider" /> for the given <see cref="IndexReader" />.
+ /// </summary>
+ [Obsolete("Will be removed in Lucene 3.1")]
+ public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl)
+ {
+ float valSrcScore = 1;
+ if (valSrcExpl != null)
+ {
+ valSrcScore *= valSrcExpl.Value;
+ }
+ Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:");
+ exp.AddDetail(subQueryExpl);
+ exp.AddDetail(valSrcExpl);
+ return exp;
+ }
+
+ //=========================== W E I G H T ============================
+
+ [Serializable]
+ private class CustomWeight:Weight
+ {
+ private void InitBlock(CustomScoreQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private CustomScoreQuery enclosingInstance;
+ public CustomScoreQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal Similarity similarity;
+ internal Weight subQueryWeight;
+ internal Weight[] valSrcWeights;
+ internal bool qStrict;
+
+ public CustomWeight(CustomScoreQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+ this.subQueryWeight = Enclosing_Instance.subQuery.Weight(searcher);
+ this.valSrcWeights = new Weight[Enclosing_Instance.valSrcQueries.Length];
+ for (int i = 0; i < Enclosing_Instance.valSrcQueries.Length; i++)
+ {
+ this.valSrcWeights[i] = Enclosing_Instance.valSrcQueries[i].CreateWeight(searcher);
+ }
+ this.qStrict = Enclosing_Instance.strict;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.getQuery() */
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.getValue() */
+
+ public override float Value
+ {
+ get { return Enclosing_Instance.Boost; }
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.sumOfSquaredWeights() */
+
+ public override float GetSumOfSquaredWeights()
+ {
+ float sum = subQueryWeight.GetSumOfSquaredWeights();
+ for (int i = 0; i < valSrcWeights.Length; i++)
+ {
+ if (qStrict)
+ {
+ var sumsq = valSrcWeights[i].GetSumOfSquaredWeights();
+ // do not include ValueSource part in the query normalization
+ }
+ else
+ {
+ sum += valSrcWeights[i].GetSumOfSquaredWeights();
+ }
+ }
+ sum *= Enclosing_Instance.Boost*Enclosing_Instance.Boost; // boost each sub-weight
+ return sum;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.normalize(float) */
+ public override void Normalize(float norm)
+ {
+ norm *= Enclosing_Instance.Boost; // incorporate boost
+ subQueryWeight.Normalize(norm);
+ for (int i = 0; i < valSrcWeights.Length; i++)
+ {
+ if (qStrict)
+ {
+ valSrcWeights[i].Normalize(1); // do not normalize the ValueSource part
+ }
+ else
+ {
+ valSrcWeights[i].Normalize(norm);
+ }
+ }
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ // Pass true for "scoresDocsInOrder", because we
+ // require in-order scoring, even if caller does not,
+ // since we call advance on the valSrcScorers. Pass
+ // false for "topScorer" because we will not invoke
+ // score(Collector) on these scorers:
+ Scorer subQueryScorer = subQueryWeight.Scorer(reader, true, false);
+ if (subQueryScorer == null)
+ {
+ return null;
+ }
+ Scorer[] valSrcScorers = new Scorer[valSrcWeights.Length];
+ for (int i = 0; i < valSrcScorers.Length; i++)
+ {
+ valSrcScorers[i] = valSrcWeights[i].Scorer(reader, true, topScorer);
+ }
+ return new CustomScorer(enclosingInstance, similarity, reader, this, subQueryScorer, valSrcScorers);
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+ Explanation explain = DoExplain(reader, doc);
+ return explain == null?new Explanation(0.0f, "no matching docs"):explain;
+ }
+
+ private Explanation DoExplain(IndexReader reader, int doc)
+ {
+ Explanation subQueryExpl = subQueryWeight.Explain(reader, doc);
+ if (!subQueryExpl.IsMatch)
+ {
+ return subQueryExpl;
+ }
+ // match
+ Explanation[] valSrcExpls = new Explanation[valSrcWeights.Length];
+ for (int i = 0; i < valSrcWeights.Length; i++)
+ {
+ valSrcExpls[i] = valSrcWeights[i].Explain(reader, doc);
+ }
+ Explanation customExp = Enclosing_Instance.GetCustomScoreProvider(reader).CustomExplain(doc, subQueryExpl, valSrcExpls);
+ float sc = Value * customExp.Value;
+ Explanation res = new ComplexExplanation(true, sc, Enclosing_Instance.ToString() + ", product of:");
+ res.AddDetail(customExp);
+ res.AddDetail(new Explanation(Value, "queryBoost")); // actually using the q boost as q weight (== weight value)
+ return res;
+ }
+
+ public override bool GetScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+
+ //=========================== S C O R E R ============================
+
+ /// <summary> A scorer that applies a (callback) function on scores of the subQuery.</summary>
+ private class CustomScorer:Scorer
+ {
+ private void InitBlock(CustomScoreQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private CustomScoreQuery enclosingInstance;
+ public CustomScoreQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private float qWeight;
+ private Scorer subQueryScorer;
+ private Scorer[] valSrcScorers;
+ private IndexReader reader;
+ private CustomScoreProvider provider;
+ private float[] vScores; // reused in score() to avoid allocating this array for each doc
+
+ // constructor
+ internal CustomScorer(CustomScoreQuery enclosingInstance, Similarity similarity, IndexReader reader, CustomWeight w, Scorer subQueryScorer, Scorer[] valSrcScorers):base(similarity)
+ {
+ InitBlock(enclosingInstance);
+ this.qWeight = w.Value;
+ this.subQueryScorer = subQueryScorer;
+ this.valSrcScorers = valSrcScorers;
+ this.reader = reader;
+ this.vScores = new float[valSrcScorers.Length];
+ this.provider = this.Enclosing_Instance.GetCustomScoreProvider(reader);
+ }
+
+ public override int NextDoc()
+ {
+ int doc = subQueryScorer.NextDoc();
+ if (doc != NO_MORE_DOCS)
+ {
+ for (int i = 0; i < valSrcScorers.Length; i++)
+ {
+ valSrcScorers[i].Advance(doc);
+ }
+ }
+ return doc;
+ }
+
+ public override int DocID()
+ {
+ return subQueryScorer.DocID();
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Scorer.score() */
+ public override float Score()
+ {
+ for (int i = 0; i < valSrcScorers.Length; i++)
+ {
+ vScores[i] = valSrcScorers[i].Score();
+ }
+ return qWeight * provider.CustomScore(subQueryScorer.DocID(), subQueryScorer.Score(), vScores);
+ }
+
+ public override int Advance(int target)
+ {
+ int doc = subQueryScorer.Advance(target);
+ if (doc != NO_MORE_DOCS)
+ {
+ for (int i = 0; i < valSrcScorers.Length; i++)
+ {
+ valSrcScorers[i].Advance(doc);
+ }
+ }
+ return doc;
+ }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new CustomWeight(this, searcher);
+ }
+
+ /// <summary> Checks if this is strict custom scoring.
+ /// In strict custom scoring, the ValueSource part does not participate in weight normalization.
+ /// This may be useful when one wants full control over how scores are modified, and does
+ /// not care about normalizing by the ValueSource part.
+ /// One particular case where this is useful if for testing this query.
+ /// <p/>
+ /// Note: only has effect when the ValueSource part is not null.
+ /// </summary>
+ public virtual bool IsStrict()
+ {
+ return strict;
+ }
+
+ /// <summary> Set the strict mode of this query. </summary>
+ /// <param name="strict">The strict mode to set.
+ /// </param>
+ /// <seealso cref="IsStrict()">
+ /// </seealso>
+ public virtual void SetStrict(bool strict)
+ {
+ this.strict = strict;
+ }
+
+ /// <summary> A short name of this query, used in <see cref="ToString(String)" />.</summary>
+ public virtual System.String Name()
+ {
+ return "custom";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/DocValues.cs b/src/core/Search/Function/DocValues.cs
new file mode 100644
index 0000000..fcb5e7d
--- /dev/null
+++ b/src/core/Search/Function/DocValues.cs
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Explanation = Lucene.Net.Search.Explanation;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: represents field values as different types.
+ /// Normally created via a
+ /// <see cref="Lucene.Net.Search.Function.ValueSource">ValueSuorce</see>
+ /// for a particular field and reader.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ ///
+ /// </summary>
+ public abstract class DocValues
+ {
+ /*
+ * DocValues is distinct from ValueSource because
+ * there needs to be an object created at query evaluation time that
+ * is not referenced by the query itself because:
+ * - Query objects should be MT safe
+ * - For caching, Query objects are often used as keys... you don't
+ * want the Query carrying around big objects
+ */
+
+ /// <summary> Return doc value as a float.
+ /// <p/>Mandatory: every DocValues implementation must implement at least this method.
+ /// </summary>
+ /// <param name="doc">document whose float value is requested.
+ /// </param>
+ public abstract float FloatVal(int doc);
+
+ /// <summary> Return doc value as an int.
+ /// <p/>Optional: DocValues implementation can (but don't have to) override this method.
+ /// </summary>
+ /// <param name="doc">document whose int value is requested.
+ /// </param>
+ public virtual int IntVal(int doc)
+ {
+ return (int) FloatVal(doc);
+ }
+
+ /// <summary> Return doc value as a long.
+ /// <p/>Optional: DocValues implementation can (but don't have to) override this method.
+ /// </summary>
+ /// <param name="doc">document whose long value is requested.
+ /// </param>
+ public virtual long LongVal(int doc)
+ {
+ return (long) FloatVal(doc);
+ }
+
+ /// <summary> Return doc value as a double.
+ /// <p/>Optional: DocValues implementation can (but don't have to) override this method.
+ /// </summary>
+ /// <param name="doc">document whose double value is requested.
+ /// </param>
+ public virtual double DoubleVal(int doc)
+ {
+ return (double) FloatVal(doc);
+ }
+
+ /// <summary> Return doc value as a string.
+ /// <p/>Optional: DocValues implementation can (but don't have to) override this method.
+ /// </summary>
+ /// <param name="doc">document whose string value is requested.
+ /// </param>
+ public virtual System.String StrVal(int doc)
+ {
+ return FloatVal(doc).ToString();
+ }
+
+ /// <summary> Return a string representation of a doc value, as reuired for Explanations.</summary>
+ public abstract System.String ToString(int doc);
+
+ /// <summary> Explain the scoring value for the input doc.</summary>
+ public virtual Explanation Explain(int doc)
+ {
+ return new Explanation(FloatVal(doc), ToString(doc));
+ }
+
+ /// <summary> Expert: for test purposes only, return the inner array of values, or null if not applicable.
+ /// <p/>
+ /// Allows tests to verify that loaded values are:
+ /// <list type="bullet">
+ /// <item>indeed cached/reused.</item>
+ /// <item>stored in the expected size/type (byte/short/int/float).</item>
+ /// </list>
+ /// Note: implementations of DocValues must override this method for
+ /// these test elements to be tested, Otherwise the test would not fail, just
+ /// print a warning.
+ /// </summary>
+ protected internal virtual object InnerArray
+ {
+ get { throw new System.NotSupportedException("this optional method is for test purposes only"); }
+ }
+
+ // --- some simple statistics on values
+ private float minVal = System.Single.NaN;
+ private float maxVal = System.Single.NaN;
+ private float avgVal = System.Single.NaN;
+ private bool computed = false;
+ // compute optional values
+ private void Compute()
+ {
+ if (computed)
+ {
+ return ;
+ }
+ float sum = 0;
+ int n = 0;
+ while (true)
+ {
+ float val;
+ try
+ {
+ val = FloatVal(n);
+ }
+ catch (System.IndexOutOfRangeException)
+ {
+ break;
+ }
+ sum += val;
+ minVal = System.Single.IsNaN(minVal)?val:System.Math.Min(minVal, val);
+ maxVal = System.Single.IsNaN(maxVal)?val:System.Math.Max(maxVal, val);
+ ++n;
+ }
+
+ avgVal = n == 0?System.Single.NaN:sum / n;
+ computed = true;
+ }
+
+ /// <summary> Returns the minimum of all values or <c>Float.NaN</c> if this
+ /// DocValues instance does not contain any value.
+ /// <p/>
+ /// This operation is optional
+ /// <p/>
+ ///
+ /// </summary>
+ /// <returns> the minimum of all values or <c>Float.NaN</c> if this
+ /// DocValues instance does not contain any value.
+ /// </returns>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual float GetMinValue()
+ {
+ Compute();
+ return minVal;
+ }
+
+ /// <summary> Returns the maximum of all values or <c>Float.NaN</c> if this
+ /// DocValues instance does not contain any value.
+ /// <p/>
+ /// This operation is optional
+ /// <p/>
+ ///
+ /// </summary>
+ /// <returns> the maximum of all values or <c>Float.NaN</c> if this
+ /// DocValues instance does not contain any value.
+ /// </returns>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual float GetMaxValue()
+ {
+ Compute();
+ return maxVal;
+ }
+
+ /// <summary> Returns the average of all values or <c>Float.NaN</c> if this
+ /// DocValues instance does not contain any value. *
+ /// <p/>
+ /// This operation is optional
+ /// <p/>
+ ///
+ /// </summary>
+ /// <returns> the average of all values or <c>Float.NaN</c> if this
+ /// DocValues instance does not contain any value
+ /// </returns>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual float GetAverageValue()
+ {
+ Compute();
+ return avgVal;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/FieldCacheSource.cs b/src/core/Search/Function/FieldCacheSource.cs
new file mode 100644
index 0000000..f5ccf1b
--- /dev/null
+++ b/src/core/Search/Function/FieldCacheSource.cs
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: A base class for ValueSource implementations that retrieve values for
+ /// a single field from the <see cref="Lucene.Net.Search.FieldCache">FieldCache</see>.
+ /// <p/>
+ /// Fields used herein nust be indexed (doesn't matter if these fields are stored or not).
+ /// <p/>
+ /// It is assumed that each such indexed field is untokenized, or at least has a single token in a document.
+ /// For documents with multiple tokens of the same field, behavior is undefined (It is likely that current
+ /// code would use the value of one of these tokens, but this is not guaranteed).
+ /// <p/>
+ /// Document with no tokens in this field are assigned the <c>Zero</c> value.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// <p/><b>NOTE</b>: with the switch in 2.9 to segment-based
+ /// searching, if <see cref="GetValues" /> is invoked with a
+ /// composite (multi-segment) reader, this can easily cause
+ /// double RAM usage for the values in the FieldCache. It's
+ /// best to switch your application to pass only atomic
+ /// (single segment) readers to this API.<p/>
+ /// </summary>
+ [Serializable]
+ public abstract class FieldCacheSource:ValueSource
+ {
+ private System.String field;
+
+ /// <summary> Create a cached field source for the input field. </summary>
+ protected FieldCacheSource(System.String field)
+ {
+ this.field = field;
+ }
+
+ /* (non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.getValues(Lucene.Net.Index.IndexReader) */
+ public override DocValues GetValues(IndexReader reader)
+ {
+ return GetCachedFieldValues(Lucene.Net.Search.FieldCache_Fields.DEFAULT, field, reader);
+ }
+
+ /* (non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.description() */
+ public override System.String Description()
+ {
+ return field;
+ }
+
+ /// <summary> Return cached DocValues for input field and reader.</summary>
+ /// <param name="cache">FieldCache so that values of a field are loaded once per reader (RAM allowing)
+ /// </param>
+ /// <param name="field">Field for which values are required.
+ /// </param>
+ /// <seealso cref="ValueSource">
+ /// </seealso>
+ public abstract DocValues GetCachedFieldValues(FieldCache cache, System.String field, IndexReader reader);
+
+ /*(non-Javadoc) <see cref="java.lang.Object.equals(java.lang.Object) */
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is FieldCacheSource))
+ {
+ return false;
+ }
+ FieldCacheSource other = (FieldCacheSource) o;
+ return this.field.Equals(other.field) && CachedFieldSourceEquals(other);
+ }
+
+ /*(non-Javadoc) <see cref="java.lang.Object.hashCode() */
+ public override int GetHashCode()
+ {
+ return field.GetHashCode() + CachedFieldSourceHashCode();
+ }
+
+ /// <summary> Check if equals to another <see cref="FieldCacheSource" />, already knowing that cache and field are equal. </summary>
+ /// <seealso cref="Object.Equals(Object)">
+ /// </seealso>
+ public abstract bool CachedFieldSourceEquals(FieldCacheSource other);
+
+ /// <summary> Return a hash code of a <see cref="FieldCacheSource" />, without the hash-codes of the field
+ /// and the cache (those are taken care of elsewhere).
+ /// </summary>
+ /// <seealso cref="Object.GetHashCode()">
+ /// </seealso>
+ public abstract int CachedFieldSourceHashCode();
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/FieldScoreQuery.cs b/src/core/Search/Function/FieldScoreQuery.cs
new file mode 100644
index 0000000..403fe5b
--- /dev/null
+++ b/src/core/Search/Function/FieldScoreQuery.cs
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> A query that scores each document as the value of the numeric input field.
+ /// <p/>
+ /// The query matches all documents, and scores each document according to the numeric
+ /// value of that field.
+ /// <p/>
+ /// It is assumed, and expected, that:
+ /// <list type="bullet">
+ /// <item>The field used here is indexed, and has exactly
+ /// one token in every scored document.</item>
+ /// <item>Best if this field is un_tokenized.</item>
+ /// <item>That token is parsable to the selected type.</item>
+ /// </list>
+ /// <p/>
+ /// Combining this query in a FunctionQuery allows much freedom in affecting document scores.
+ /// Note, that with this freedom comes responsibility: it is more than likely that the
+ /// default Lucene scoring is superior in quality to scoring modified as explained here.
+ /// However, in some cases, and certainly for research experiments, this capability may turn useful.
+ /// <p/>
+ /// When contructing this query, select the appropriate type. That type should match the data stored in the
+ /// field. So in fact the "right" type should be selected before indexing. Type selection
+ /// has effect on the RAM usage:
+ /// <list type="bullet">
+ /// <item><see cref="Type.BYTE" /> consumes 1 * maxDocs bytes.</item>
+ /// <item><see cref="Type.SHORT" /> consumes 2 * maxDocs bytes.</item>
+ /// <item><see cref="Type.INT" /> consumes 4 * maxDocs bytes.</item>
+ /// <item><see cref="Type.FLOAT" /> consumes 8 * maxDocs bytes.</item>
+ /// </list>
+ /// <p/>
+ /// <b>Caching:</b>
+ /// Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader.
+ /// To take advantage of this, it is extremely important to reuse index-readers or index-searchers,
+ /// otherwise, for instance if for each query a new index reader is opened, large penalties would be
+ /// paid for loading the field values into memory over and over again!
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ /// </summary>
+ [Serializable]
+ public class FieldScoreQuery:ValueSourceQuery
+ {
+
+ /// <summary> Type of score field, indicating how field values are interpreted/parsed.
+ /// <p/>
+ /// The type selected at search search time should match the data stored in the field.
+ /// Different types have different RAM requirements:
+ /// <list type="bullet">
+ /// <item><see cref="BYTE" /> consumes 1 * maxDocs bytes.</item>
+ /// <item><see cref="SHORT" /> consumes 2 * maxDocs bytes.</item>
+ /// <item><see cref="INT" /> consumes 4 * maxDocs bytes.</item>
+ /// <item><see cref="FLOAT" /> consumes 8 * maxDocs bytes.</item>
+ /// </list>
+ /// </summary>
+ public class Type
+ {
+
+ /// <summary>field values are interpreted as numeric byte values. </summary>
+ public static readonly Type BYTE = new Type("byte");
+
+ /// <summary>field values are interpreted as numeric short values. </summary>
+ public static readonly Type SHORT = new Type("short");
+
+ /// <summary>field values are interpreted as numeric int values. </summary>
+ public static readonly Type INT = new Type("int");
+
+ /// <summary>field values are interpreted as numeric float values. </summary>
+ public static readonly Type FLOAT = new Type("float");
+
+ private System.String typeName;
+ internal Type(System.String name)
+ {
+ this.typeName = name;
+ }
+ /*(non-Javadoc) <see cref="java.lang.Object.toString() */
+ public override System.String ToString()
+ {
+ return GetType().FullName + "::" + typeName;
+ }
+ }
+
+ /// <summary> Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field.
+ /// <p/>
+ /// The <c>type</c> param tells how to parse the field string values into a numeric score value.
+ /// </summary>
+ /// <param name="field">the numeric field to be used.
+ /// </param>
+ /// <param name="type">the type of the field: either
+ /// <see cref="Type.BYTE" />, <see cref="Type.SHORT" />, <see cref="Type.INT" />, or <see cref="Type.FLOAT" />.
+ /// </param>
+ public FieldScoreQuery(System.String field, Type type):base(GetValueSource(field, type))
+ {
+ }
+
+ // create the appropriate (cached) field value source.
+ private static ValueSource GetValueSource(System.String field, Type type)
+ {
+ if (type == Type.BYTE)
+ {
+ return new ByteFieldSource(field);
+ }
+ if (type == Type.SHORT)
+ {
+ return new ShortFieldSource(field);
+ }
+ if (type == Type.INT)
+ {
+ return new IntFieldSource(field);
+ }
+ if (type == Type.FLOAT)
+ {
+ return new FloatFieldSource(field);
+ }
+ throw new System.ArgumentException(type + " is not a known Field Score Query Type!");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/FloatFieldSource.cs b/src/core/Search/Function/FloatFieldSource.cs
new file mode 100644
index 0000000..45577ac
--- /dev/null
+++ b/src/core/Search/Function/FloatFieldSource.cs
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: obtains float field values from the
+ /// <see cref="Lucene.Net.Search.FieldCache">FieldCache</see>
+ /// using <c>getFloats()</c> and makes those values
+ /// available as other numeric types, casting as needed.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Search.Function.FieldCacheSource"> for requirements"
+ /// on the field.
+ ///
+ /// <p/><b>NOTE</b>: with the switch in 2.9 to segment-based
+ /// searching, if <see cref="FieldCacheSource.GetValues" /> is invoked with a
+ /// composite (multi-segment) reader, this can easily cause
+ /// double RAM usage for the values in the FieldCache. It's
+ /// best to switch your application to pass only atomic
+ /// (single segment) readers to this API.<p/>
+ /// </seealso>
+ [Serializable]
+ public class FloatFieldSource:FieldCacheSource
+ {
+ private class AnonymousClassDocValues:DocValues
+ {
+ public AnonymousClassDocValues(float[] arr, FloatFieldSource enclosingInstance)
+ {
+ InitBlock(arr, enclosingInstance);
+ }
+ private void InitBlock(float[] arr, FloatFieldSource enclosingInstance)
+ {
+ this.arr = arr;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private float[] arr;
+ private FloatFieldSource enclosingInstance;
+ public FloatFieldSource Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.floatVal(int) */
+ public override float FloatVal(int doc)
+ {
+ return arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.toString(int) */
+ public override System.String ToString(int doc)
+ {
+ return Enclosing_Instance.Description() + '=' + arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.getInnerArray() */
+
+ protected internal override object InnerArray
+ {
+ get { return arr; }
+ }
+ }
+ private Lucene.Net.Search.FloatParser parser;
+
+ /// <summary> Create a cached float field source with default string-to-float parser. </summary>
+ public FloatFieldSource(System.String field):this(field, null)
+ {
+ }
+
+ /// <summary> Create a cached float field source with a specific string-to-float parser. </summary>
+ public FloatFieldSource(System.String field, Lucene.Net.Search.FloatParser parser):base(field)
+ {
+ this.parser = parser;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.description() */
+ public override System.String Description()
+ {
+ return "float(" + base.Description() + ')';
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.getCachedValues(Lucene.Net.Search.FieldCache, java.lang.String, Lucene.Net.Index.IndexReader) */
+ public override DocValues GetCachedFieldValues(FieldCache cache, System.String field, IndexReader reader)
+ {
+ float[] arr = cache.GetFloats(reader, field, parser);
+ return new AnonymousClassDocValues(arr, this);
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceEquals(Lucene.Net.Search.Function.FieldCacheSource) */
+ public override bool CachedFieldSourceEquals(FieldCacheSource o)
+ {
+ if (o.GetType() != typeof(FloatFieldSource))
+ {
+ return false;
+ }
+ FloatFieldSource other = (FloatFieldSource) o;
+ return this.parser == null?other.parser == null:this.parser.GetType() == other.parser.GetType();
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceHashCode() */
+ public override int CachedFieldSourceHashCode()
+ {
+ return parser == null?typeof(System.Single).GetHashCode():parser.GetType().GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/IntFieldSource.cs b/src/core/Search/Function/IntFieldSource.cs
new file mode 100644
index 0000000..4572f89
--- /dev/null
+++ b/src/core/Search/Function/IntFieldSource.cs
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: obtains int field values from the
+ /// <see cref="Lucene.Net.Search.FieldCache">FieldCache</see>
+ /// using <c>getInts()</c> and makes those values
+ /// available as other numeric types, casting as needed.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Search.Function.FieldCacheSource"> for requirements
+ /// on the field.
+ ///
+ /// <p/><b>NOTE</b>: with the switch in 2.9 to segment-based
+ /// searching, if <see cref="FieldCacheSource.GetValues" /> is invoked with a
+ /// composite (multi-segment) reader, this can easily cause
+ /// double RAM usage for the values in the FieldCache. It's
+ /// best to switch your application to pass only atomic
+ /// (single segment) readers to this API.<p/>
+ /// </seealso>
+ [Serializable]
+ public class IntFieldSource:FieldCacheSource
+ {
+ private class AnonymousClassDocValues:DocValues
+ {
+ public AnonymousClassDocValues(int[] arr, IntFieldSource enclosingInstance)
+ {
+ InitBlock(arr, enclosingInstance);
+ }
+ private void InitBlock(int[] arr, IntFieldSource enclosingInstance)
+ {
+ this.arr = arr;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private int[] arr;
+ private IntFieldSource enclosingInstance;
+ public IntFieldSource Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.floatVal(int) */
+ public override float FloatVal(int doc)
+ {
+ return (float) arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.intVal(int) */
+ public override int IntVal(int doc)
+ {
+ return arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.toString(int) */
+ public override System.String ToString(int doc)
+ {
+ return Enclosing_Instance.Description() + '=' + IntVal(doc);
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.getInnerArray() */
+
+ protected internal override object InnerArray
+ {
+ get { return arr; }
+ }
+ }
+ private Lucene.Net.Search.IntParser parser;
+
+ /// <summary> Create a cached int field source with default string-to-int parser. </summary>
+ public IntFieldSource(System.String field):this(field, null)
+ {
+ }
+
+ /// <summary> Create a cached int field source with a specific string-to-int parser. </summary>
+ public IntFieldSource(System.String field, Lucene.Net.Search.IntParser parser):base(field)
+ {
+ this.parser = parser;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.description() */
+ public override System.String Description()
+ {
+ return "int(" + base.Description() + ')';
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.getCachedValues(Lucene.Net.Search.FieldCache, java.lang.String, Lucene.Net.Index.IndexReader) */
+ public override DocValues GetCachedFieldValues(FieldCache cache, System.String field, IndexReader reader)
+ {
+ int[] arr = cache.GetInts(reader, field, parser);
+ return new AnonymousClassDocValues(arr, this);
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceEquals(Lucene.Net.Search.Function.FieldCacheSource) */
+ public override bool CachedFieldSourceEquals(FieldCacheSource o)
+ {
+ if (o.GetType() != typeof(IntFieldSource))
+ {
+ return false;
+ }
+ IntFieldSource other = (IntFieldSource) o;
+ return this.parser == null?other.parser == null:this.parser.GetType() == other.parser.GetType();
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceHashCode() */
+ public override int CachedFieldSourceHashCode()
+ {
+ return parser == null?typeof(System.Int32).GetHashCode():parser.GetType().GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/OrdFieldSource.cs b/src/core/Search/Function/OrdFieldSource.cs
new file mode 100644
index 0000000..798d948
--- /dev/null
+++ b/src/core/Search/Function/OrdFieldSource.cs
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: obtains the ordinal of the field value from the default Lucene
+ /// <see cref="Lucene.Net.Search.FieldCache">Fieldcache</see> using getStringIndex().
+ /// <p/>
+ /// The native lucene index order is used to assign an ordinal value for each field value.
+ /// <p/>
+ /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
+ /// <p/>
+ /// Example:
+ /// <br/>If there were only three field values: "apple","banana","pear"
+ /// <br/>then ord("apple")=1, ord("banana")=2, ord("pear")=3
+ /// <p/>
+ /// WARNING:
+ /// ord() depends on the position in an index and can thus change
+ /// when other documents are inserted or deleted,
+ /// or if a MultiSearcher is used.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// <p/><b>NOTE</b>: with the switch in 2.9 to segment-based
+ /// searching, if <see cref="GetValues" /> is invoked with a
+ /// composite (multi-segment) reader, this can easily cause
+ /// double RAM usage for the values in the FieldCache. It's
+ /// best to switch your application to pass only atomic
+ /// (single segment) readers to this API.<p/>
+ /// </summary>
+
+ [Serializable]
+ public class OrdFieldSource:ValueSource
+ {
+ private class AnonymousClassDocValues:DocValues
+ {
+ public AnonymousClassDocValues(int[] arr, OrdFieldSource enclosingInstance)
+ {
+ InitBlock(arr, enclosingInstance);
+ }
+ private void InitBlock(int[] arr, OrdFieldSource enclosingInstance)
+ {
+ this.arr = arr;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private int[] arr;
+ private OrdFieldSource enclosingInstance;
+ public OrdFieldSource Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.floatVal(int) */
+ public override float FloatVal(int doc)
+ {
+ return (float) arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.strVal(int) */
+ public override System.String StrVal(int doc)
+ {
+ // the string value of the ordinal, not the string itself
+ return System.Convert.ToString(arr[doc]);
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.toString(int) */
+ public override System.String ToString(int doc)
+ {
+ return Enclosing_Instance.Description() + '=' + IntVal(doc);
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.getInnerArray() */
+
+ protected internal override object InnerArray
+ {
+ get { return arr; }
+ }
+ }
+ protected internal System.String field;
+
+ /// <summary> Constructor for a certain field.</summary>
+ /// <param name="field">field whose values order is used.
+ /// </param>
+ public OrdFieldSource(System.String field)
+ {
+ this.field = field;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.description() */
+ public override System.String Description()
+ {
+ return "ord(" + field + ')';
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.getValues(Lucene.Net.Index.IndexReader) */
+ public override DocValues GetValues(IndexReader reader)
+ {
+ int[] arr = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStringIndex(reader, field).order;
+ return new AnonymousClassDocValues(arr, this);
+ }
+
+ /*(non-Javadoc) <see cref="java.lang.Object.equals(java.lang.Object) */
+ public override bool Equals(System.Object o)
+ {
+ if (o.GetType() != typeof(OrdFieldSource))
+ return false;
+ OrdFieldSource other = (OrdFieldSource) o;
+ return this.field.Equals(other.field);
+ }
+
+ private static readonly int hcode;
+
+ /*(non-Javadoc) <see cref="java.lang.Object.hashCode() */
+ public override int GetHashCode()
+ {
+ return hcode + field.GetHashCode();
+ }
+ static OrdFieldSource()
+ {
+ hcode = typeof(OrdFieldSource).GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/ReverseOrdFieldSource.cs b/src/core/Search/Function/ReverseOrdFieldSource.cs
new file mode 100644
index 0000000..ec69b46
--- /dev/null
+++ b/src/core/Search/Function/ReverseOrdFieldSource.cs
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: obtains the ordinal of the field value from the default Lucene
+ /// <see cref="Lucene.Net.Search.FieldCache">FieldCache</see> using getStringIndex()
+ /// and reverses the order.
+ /// <p/>
+ /// The native lucene index order is used to assign an ordinal value for each field value.
+ /// <p/>
+ /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
+ /// <br/>
+ /// Example of reverse ordinal (rord):
+ /// <br/>If there were only three field values: "apple","banana","pear"
+ /// <br/>then rord("apple")=3, rord("banana")=2, ord("pear")=1
+ /// <p/>
+ /// WARNING:
+ /// rord() depends on the position in an index and can thus change
+ /// when other documents are inserted or deleted,
+ /// or if a MultiSearcher is used.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// <p/><b>NOTE</b>: with the switch in 2.9 to segment-based
+ /// searching, if <see cref="GetValues" /> is invoked with a
+ /// composite (multi-segment) reader, this can easily cause
+ /// double RAM usage for the values in the FieldCache. It's
+ /// best to switch your application to pass only atomic
+ /// (single segment) readers to this API.<p/>
+ /// </summary>
+
+ [Serializable]
+ public class ReverseOrdFieldSource:ValueSource
+ {
+ private class AnonymousClassDocValues:DocValues
+ {
+ public AnonymousClassDocValues(int end, int[] arr, ReverseOrdFieldSource enclosingInstance)
+ {
+ InitBlock(end, arr, enclosingInstance);
+ }
+ private void InitBlock(int end, int[] arr, ReverseOrdFieldSource enclosingInstance)
+ {
+ this.end = end;
+ this.arr = arr;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private int end;
+ private int[] arr;
+ private ReverseOrdFieldSource enclosingInstance;
+ public ReverseOrdFieldSource Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.floatVal(int) */
+ public override float FloatVal(int doc)
+ {
+ return (float) (end - arr[doc]);
+ }
+ /* (non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.intVal(int) */
+ public override int IntVal(int doc)
+ {
+ return end - arr[doc];
+ }
+ /* (non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.strVal(int) */
+ public override System.String StrVal(int doc)
+ {
+ // the string value of the ordinal, not the string itself
+ return System.Convert.ToString(IntVal(doc));
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.toString(int) */
+ public override System.String ToString(int doc)
+ {
+ return Enclosing_Instance.Description() + '=' + StrVal(doc);
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.getInnerArray() */
+
+ protected internal override object InnerArray
+ {
+ get { return arr; }
+ }
+ }
+ public System.String field;
+
+ /// <summary> Contructor for a certain field.</summary>
+ /// <param name="field">field whose values reverse order is used.
+ /// </param>
+ public ReverseOrdFieldSource(System.String field)
+ {
+ this.field = field;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.description() */
+ public override System.String Description()
+ {
+ return "rord(" + field + ')';
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.getValues(Lucene.Net.Index.IndexReader) */
+ public override DocValues GetValues(IndexReader reader)
+ {
+ Lucene.Net.Search.StringIndex sindex = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStringIndex(reader, field);
+
+ int[] arr = sindex.order;
+ int end = sindex.lookup.Length;
+
+ return new AnonymousClassDocValues(end, arr, this);
+ }
+
+ /*(non-Javadoc) <see cref="java.lang.Object.equals(java.lang.Object) */
+ public override bool Equals(System.Object o)
+ {
+ if (o.GetType() != typeof(ReverseOrdFieldSource))
+ return false;
+ ReverseOrdFieldSource other = (ReverseOrdFieldSource) o;
+ return this.field.Equals(other.field);
+ }
+
+ private static readonly int hcode;
+
+ /*(non-Javadoc) <see cref="java.lang.Object.hashCode() */
+ public override int GetHashCode()
+ {
+ return hcode + field.GetHashCode();
+ }
+ static ReverseOrdFieldSource()
+ {
+ hcode = typeof(ReverseOrdFieldSource).GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/ShortFieldSource.cs b/src/core/Search/Function/ShortFieldSource.cs
new file mode 100644
index 0000000..6f4953e
--- /dev/null
+++ b/src/core/Search/Function/ShortFieldSource.cs
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: obtains short field values from the
+ /// <see cref="Lucene.Net.Search.FieldCache">FieldCache</see>
+ /// using <c>getShorts()</c> and makes those values
+ /// available as other numeric types, casting as needed.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Search.Function.FieldCacheSource"> for requirements
+ /// on the field.
+ ///
+ /// <p/><b>NOTE</b>: with the switch in 2.9 to segment-based
+ /// searching, if <see cref="FieldCacheSource.GetValues" /> is invoked with a
+ /// composite (multi-segment) reader, this can easily cause
+ /// double RAM usage for the values in the FieldCache. It's
+ /// best to switch your application to pass only atomic
+ /// (single segment) readers to this API.<p/>
+ /// </seealso>
+ [Serializable]
+ public class ShortFieldSource:FieldCacheSource
+ {
+ private class AnonymousClassDocValues:DocValues
+ {
+ public AnonymousClassDocValues(short[] arr, ShortFieldSource enclosingInstance)
+ {
+ InitBlock(arr, enclosingInstance);
+ }
+ private void InitBlock(short[] arr, ShortFieldSource enclosingInstance)
+ {
+ this.arr = arr;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private short[] arr;
+ private ShortFieldSource enclosingInstance;
+ public ShortFieldSource Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.floatVal(int) */
+ public override float FloatVal(int doc)
+ {
+ return (float) arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.intVal(int) */
+ public override int IntVal(int doc)
+ {
+ return arr[doc];
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.toString(int) */
+ public override System.String ToString(int doc)
+ {
+ return Enclosing_Instance.Description() + '=' + IntVal(doc);
+ }
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.DocValues.getInnerArray() */
+
+ protected internal override object InnerArray
+ {
+ get { return arr; }
+ }
+ }
+ private Lucene.Net.Search.ShortParser parser;
+
+ /// <summary> Create a cached short field source with default string-to-short parser. </summary>
+ public ShortFieldSource(System.String field):this(field, null)
+ {
+ }
+
+ /// <summary> Create a cached short field source with a specific string-to-short parser. </summary>
+ public ShortFieldSource(System.String field, Lucene.Net.Search.ShortParser parser):base(field)
+ {
+ this.parser = parser;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.ValueSource.description() */
+ public override System.String Description()
+ {
+ return "short(" + base.Description() + ')';
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.getCachedValues(Lucene.Net.Search.FieldCache, java.lang.String, Lucene.Net.Index.IndexReader) */
+ public override DocValues GetCachedFieldValues(FieldCache cache, System.String field, IndexReader reader)
+ {
+ short[] arr = cache.GetShorts(reader, field, parser);
+ return new AnonymousClassDocValues(arr, this);
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceEquals(Lucene.Net.Search.Function.FieldCacheSource) */
+ public override bool CachedFieldSourceEquals(FieldCacheSource o)
+ {
+ if (o.GetType() != typeof(ShortFieldSource))
+ {
+ return false;
+ }
+ ShortFieldSource other = (ShortFieldSource) o;
+ return this.parser == null?other.parser == null:this.parser.GetType() == other.parser.GetType();
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Function.FieldCacheSource.cachedFieldSourceHashCode() */
+ public override int CachedFieldSourceHashCode()
+ {
+ return parser == null?typeof(System.Int16).GetHashCode():parser.GetType().GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/ValueSource.cs b/src/core/Search/Function/ValueSource.cs
new file mode 100644
index 0000000..c570d27
--- /dev/null
+++ b/src/core/Search/Function/ValueSource.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: source of values for basic function queries.
+ /// <p/>At its default/simplest form, values - one per doc - are used as the score of that doc.
+ /// <p/>Values are instantiated as
+ /// <see cref="Lucene.Net.Search.Function.DocValues">DocValues</see> for a particular reader.
+ /// <p/>ValueSource implementations differ in RAM requirements: it would always be a factor
+ /// of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8.
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public abstract class ValueSource
+ {
+
+ /// <summary> Return the DocValues used by the function query.</summary>
+ /// <param name="reader">the IndexReader used to read these values.
+ /// If any caching is involved, that caching would also be IndexReader based.
+ /// </param>
+ /// <throws> IOException for any error. </throws>
+ public abstract DocValues GetValues(IndexReader reader);
+
+ /// <summary> description of field, used in explain() </summary>
+ public abstract System.String Description();
+
+ /* (non-Javadoc) <see cref="java.lang.Object.toString() */
+ public override System.String ToString()
+ {
+ return Description();
+ }
+
+ /// <summary> Needed for possible caching of query results - used by <see cref="ValueSourceQuery.Equals(Object)" />.</summary>
+ /// <seealso cref="Object.Equals(Object)">
+ /// </seealso>
+ abstract public override bool Equals(System.Object o);
+
+ /// <summary> Needed for possible caching of query results - used by <see cref="ValueSourceQuery.GetHashCode()" />.</summary>
+ /// <seealso cref="Object.GetHashCode()">
+ /// </seealso>
+ abstract public override int GetHashCode();
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Function/ValueSourceQuery.cs b/src/core/Search/Function/ValueSourceQuery.cs
new file mode 100644
index 0000000..66593a4
--- /dev/null
+++ b/src/core/Search/Function/ValueSourceQuery.cs
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using TermDocs = Lucene.Net.Index.TermDocs;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Search.Function
+{
+
+ /// <summary> Expert: A Query that sets the scores of document to the
+ /// values obtained from a <see cref="Lucene.Net.Search.Function.ValueSource">ValueSource</see>.
+ /// <p/>
+ /// This query provides a score for <em>each and every</em> undeleted document in the index.
+ /// <p/>
+ /// The value source can be based on a (cached) value of an indexed field, but it
+ /// can also be based on an external source, e.g. values read from an external database.
+ /// <p/>
+ /// Score is set as: Score(doc,query) = query.getBoost()<sup>2</sup> * valueSource(doc).
+ ///
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Search.Function</b> package is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ /// </summary>
+ [Serializable]
+ public class ValueSourceQuery:Query
+ {
+ internal ValueSource valSrc;
+
+ /// <summary> Create a value source query</summary>
+ /// <param name="valSrc">provides the values defines the function to be used for scoring
+ /// </param>
+ public ValueSourceQuery(ValueSource valSrc)
+ {
+ this.valSrc = valSrc;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Query.rewrite(Lucene.Net.Index.IndexReader) */
+ public override Query Rewrite(IndexReader reader)
+ {
+ return this;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Query.extractTerms(java.util.Set) */
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ // no terms involved here
+ }
+
+ [Serializable]
+ internal class ValueSourceWeight:Weight
+ {
+ private void InitBlock(ValueSourceQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ValueSourceQuery enclosingInstance;
+ public ValueSourceQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal Similarity similarity;
+ internal float queryNorm;
+ internal float queryWeight;
+
+ public ValueSourceWeight(ValueSourceQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.getQuery() */
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.getValue() */
+
+ public override float Value
+ {
+ get { return queryWeight; }
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.sumOfSquaredWeights() */
+
+ public override float GetSumOfSquaredWeights()
+ {
+ queryWeight = Enclosing_Instance.Boost;
+ return queryWeight*queryWeight;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.normalize(float) */
+ public override void Normalize(float norm)
+ {
+ this.queryNorm = norm;
+ queryWeight *= this.queryNorm;
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ return new ValueSourceScorer(enclosingInstance, similarity, reader, this);
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Weight.explain(Lucene.Net.Index.IndexReader, int) */
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+ DocValues vals = enclosingInstance.valSrc.GetValues(reader);
+ float sc = queryWeight*vals.FloatVal(doc);
+
+ Explanation result = new ComplexExplanation(true, sc, enclosingInstance.ToString() + ", product of:")
+ ;
+ result.AddDetail(vals.Explain(doc));
+ result.AddDetail(new Explanation(enclosingInstance.Boost, "boost"));
+ result.AddDetail(new Explanation(queryNorm, "queryNorm"));
+ return result;
+ }
+ }
+
+ /// <summary> A scorer that (simply) matches all documents, and scores each document with
+ /// the value of the value soure in effect. As an example, if the value source
+ /// is a (cached) field source, then value of that field in that document will
+ /// be used. (assuming field is indexed for this doc, with a single token.)
+ /// </summary>
+ private class ValueSourceScorer : Scorer
+ {
+ private void InitBlock(ValueSourceQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ValueSourceQuery enclosingInstance;
+ public ValueSourceQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private ValueSourceWeight weight;
+ private float qWeight;
+ private DocValues vals;
+ private TermDocs termDocs;
+ private int doc = -1;
+
+ // constructor
+ internal ValueSourceScorer(ValueSourceQuery enclosingInstance, Similarity similarity, IndexReader reader, ValueSourceWeight w)
+ : base(similarity)
+ {
+ InitBlock(enclosingInstance);
+ this.weight = w;
+ this.qWeight = w.Value;
+ // this is when/where the values are first created.
+ vals = Enclosing_Instance.valSrc.GetValues(reader);
+ termDocs = reader.TermDocs(null);
+ }
+
+ public override int NextDoc()
+ {
+ return doc = termDocs.Next() ? termDocs.Doc : NO_MORE_DOCS;
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ return doc = termDocs.SkipTo(target) ? termDocs.Doc : NO_MORE_DOCS;
+ }
+
+ /*(non-Javadoc) <see cref="Lucene.Net.Search.Scorer.explain(int) */
+ public override float Score()
+ {
+ return qWeight * vals.FloatVal(termDocs.Doc);
+ }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new ValueSourceQuery.ValueSourceWeight(this, searcher);
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ return valSrc.ToString() + ToStringUtils.Boost(Boost);
+ }
+
+ /// <summary>Returns true if <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (GetType() != o.GetType())
+ {
+ return false;
+ }
+ ValueSourceQuery other = (ValueSourceQuery) o;
+ return this.Boost == other.Boost && this.valSrc.Equals(other.valSrc);
+ }
+
+ /// <summary>Returns a hash code value for this object. </summary>
+ public override int GetHashCode()
+ {
+ return (GetType().GetHashCode() + valSrc.GetHashCode()) ^ BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0);
+ }
+
+ override public System.Object Clone()
+ {
+ return this.MemberwiseClone();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FuzzyQuery.cs b/src/core/Search/FuzzyQuery.cs
new file mode 100644
index 0000000..bdf5af7
--- /dev/null
+++ b/src/core/Search/FuzzyQuery.cs
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Single = Lucene.Net.Support.Single;
+using Term = Lucene.Net.Index.Term;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Implements the fuzzy search query. The similarity measurement
+ /// is based on the Levenshtein (edit distance) algorithm.
+ ///
+ /// Warning: this query is not very scalable with its default prefix
+ /// length of 0 - in this case, *every* term will be enumerated and
+ /// cause an edit score calculation.
+ ///
+ /// </summary>
+ [Serializable]
+ public class FuzzyQuery : MultiTermQuery
+ {
+
+ public const float defaultMinSimilarity = 0.5f;
+ public const int defaultPrefixLength = 0;
+
+ private float minimumSimilarity;
+ private int prefixLength;
+ private bool termLongEnough = false;
+
+ /// <summary> Returns the pattern term.</summary>
+ public Term Term { get; protected internal set; }
+
+ /// <summary> Create a new FuzzyQuery that will match terms with a similarity
+ /// of at least <c>minimumSimilarity</c> to <c>term</c>.
+ /// If a <c>prefixLength</c> &gt; 0 is specified, a common prefix
+ /// of that length is also required.
+ ///
+ /// </summary>
+ /// <param name="term">the term to search for
+ /// </param>
+ /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
+ /// between the query term and the matching terms. For example, for a
+ /// <c>minimumSimilarity</c> of <c>0.5</c> a term of the same length
+ /// as the query term is considered similar to the query term if the edit distance
+ /// between both terms is less than <c>length(term)*0.5</c>
+ /// </param>
+ /// <param name="prefixLength">length of common (non-fuzzy) prefix
+ /// </param>
+ /// <throws> IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0 </throws>
+ /// <summary> or if prefixLength &lt; 0
+ /// </summary>
+ public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength)
+ {
+ this.Term = term;
+
+ if (minimumSimilarity >= 1.0f)
+ throw new System.ArgumentException("minimumSimilarity >= 1");
+ else if (minimumSimilarity < 0.0f)
+ throw new System.ArgumentException("minimumSimilarity < 0");
+ if (prefixLength < 0)
+ throw new System.ArgumentException("prefixLength < 0");
+
+ if (term.Text.Length > 1.0f / (1.0f - minimumSimilarity))
+ {
+ this.termLongEnough = true;
+ }
+
+ this.minimumSimilarity = minimumSimilarity;
+ this.prefixLength = prefixLength;
+ internalRewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE;
+ }
+
+ /// <summary> Calls <see cref="FuzzyQuery(Index.Term, float)">FuzzyQuery(term, minimumSimilarity, 0)</see>.</summary>
+ public FuzzyQuery(Term term, float minimumSimilarity):this(term, minimumSimilarity, defaultPrefixLength)
+ {
+ }
+
+ /// <summary> Calls <see cref="FuzzyQuery(Index.Term, float)">FuzzyQuery(term, 0.5f, 0)</see>.</summary>
+ public FuzzyQuery(Term term):this(term, defaultMinSimilarity, defaultPrefixLength)
+ {
+ }
+
+ /// <summary> Returns the minimum similarity that is required for this query to match.</summary>
+ /// <value> float value between 0.0 and 1.0 </value>
+ public virtual float MinSimilarity
+ {
+ get { return minimumSimilarity; }
+ }
+
+ /// <summary> Returns the non-fuzzy prefix length. This is the number of characters at the start
+ /// of a term that must be identical (not fuzzy) to the query term if the query
+ /// is to match that term.
+ /// </summary>
+ public virtual int PrefixLength
+ {
+ get { return prefixLength; }
+ }
+
+ protected internal override FilteredTermEnum GetEnum(IndexReader reader)
+ {
+ return new FuzzyTermEnum(reader, Term, minimumSimilarity, prefixLength);
+ }
+
+ public override RewriteMethod RewriteMethod
+ {
+ set { throw new System.NotSupportedException("FuzzyQuery cannot change rewrite method"); }
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ if (!termLongEnough)
+ {
+ // can only match if it's exact
+ return new TermQuery(Term);
+ }
+
+ int maxSize = BooleanQuery.MaxClauseCount;
+
+ // TODO: Java uses a PriorityQueue. Using Linq, we can emulate it,
+ // however it's considerable slower than the java counterpart.
+ // this should be a temporary thing, fixed before release
+ SortedList<ScoreTerm, ScoreTerm> stQueue = new SortedList<ScoreTerm, ScoreTerm>();
+ FilteredTermEnum enumerator = GetEnum(reader);
+
+ try
+ {
+ ScoreTerm st = new ScoreTerm();
+ do
+ {
+ Term t = enumerator.Term;
+ if (t == null) break;
+ float score = enumerator.Difference();
+ //ignore uncompetetive hits
+ if (stQueue.Count >= maxSize && score <= stQueue.Keys.First().score)
+ continue;
+ // add new entry in PQ
+ st.term = t;
+ st.score = score;
+ stQueue.Add(st, st);
+ // possibly drop entries from queue
+ if (stQueue.Count > maxSize)
+ {
+ st = stQueue.Keys.First();
+ stQueue.Remove(st);
+ }
+ else
+ {
+ st = new ScoreTerm();
+ }
+ }
+ while (enumerator.Next());
+ }
+ finally
+ {
+ enumerator.Close();
+ }
+
+ BooleanQuery query = new BooleanQuery(true);
+ foreach(ScoreTerm st in stQueue.Keys)
+ {
+ TermQuery tq = new TermQuery(st.term); // found a match
+ tq.Boost = Boost * st.score; // set the boost
+ query.Add(tq, Occur.SHOULD); // add to query
+ }
+
+ return query;
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (!Term.Field.Equals(field))
+ {
+ buffer.Append(Term.Field);
+ buffer.Append(":");
+ }
+ buffer.Append(Term.Text);
+ buffer.Append('~');
+ buffer.Append(Single.ToString(minimumSimilarity));
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ protected internal class ScoreTerm : IComparable<ScoreTerm>
+ {
+ public Term term;
+ public float score;
+
+ public int CompareTo(ScoreTerm other)
+ {
+ if (Comparer<float>.Default.Compare(this.score, other.score) == 0)
+ {
+ return other.term.CompareTo(this.term);
+ }
+ else
+ {
+ return Comparer<float>.Default.Compare(this.score, other.score);
+ }
+ }
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0);
+ result = prime * result + prefixLength;
+ result = prime * result + ((Term == null)?0:Term.GetHashCode());
+ return result;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!base.Equals(obj))
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ FuzzyQuery other = (FuzzyQuery) obj;
+ if (BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.minimumSimilarity), 0))
+ return false;
+ if (prefixLength != other.prefixLength)
+ return false;
+ if (Term == null)
+ {
+ if (other.Term != null)
+ return false;
+ }
+ else if (!Term.Equals(other.Term))
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/FuzzyTermEnum.cs b/src/core/Search/FuzzyTermEnum.cs
new file mode 100644
index 0000000..6e4fc7b
--- /dev/null
+++ b/src/core/Search/FuzzyTermEnum.cs
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Subclass of FilteredTermEnum for enumerating all terms that are similiar
+ /// to the specified filter term.
+ ///
+ /// <p/>Term enumerations are always ordered by Term.compareTo(). Each term in
+ /// the enumeration is greater than all that precede it.
+ /// </summary>
+ public sealed class FuzzyTermEnum:FilteredTermEnum
+ {
+ /* Allows us save time required to create a new array
+ * everytime similarity is called.
+ */
+ private int[] p;
+ private int[] d;
+
+ private float similarity;
+ private bool endEnum = false;
+
+ private bool isDisposed;
+
+ private Term searchTerm = null;
+ private System.String field;
+ private System.String text;
+ private System.String prefix;
+
+ private float minimumSimilarity;
+ private float scale_factor;
+
+ /// <summary> Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f.
+ /// <p/>
+ /// After calling the constructor the enumeration is already pointing to the first
+ /// valid term if such a term exists.
+ ///
+ /// </summary>
+ /// <param name="reader">
+ /// </param>
+ /// <param name="term">
+ /// </param>
+ /// <throws> IOException </throws>
+ /// <seealso cref="FuzzyTermEnum(IndexReader, Term, float, int)">
+ /// </seealso>
+ public FuzzyTermEnum(IndexReader reader, Term term):this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength)
+ {
+ }
+
+ /// <summary> Creates a FuzzyTermEnum with an empty prefix.
+ /// <p/>
+ /// After calling the constructor the enumeration is already pointing to the first
+ /// valid term if such a term exists.
+ ///
+ /// </summary>
+ /// <param name="reader">
+ /// </param>
+ /// <param name="term">
+ /// </param>
+ /// <param name="minSimilarity">
+ /// </param>
+ /// <throws> IOException </throws>
+ /// <seealso cref="FuzzyTermEnum(IndexReader, Term, float, int)">
+ /// </seealso>
+ public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity):this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength)
+ {
+ }
+
+ /// <summary> Constructor for enumeration of all terms from specified <c>reader</c> which share a prefix of
+ /// length <c>prefixLength</c> with <c>term</c> and which have a fuzzy similarity &gt;
+ /// <c>minSimilarity</c>.
+ /// <p/>
+ /// After calling the constructor the enumeration is already pointing to the first
+ /// valid term if such a term exists.
+ ///
+ /// </summary>
+ /// <param name="reader">Delivers terms.
+ /// </param>
+ /// <param name="term">Pattern term.
+ /// </param>
+ /// <param name="minSimilarity">Minimum required similarity for terms from the reader. Default value is 0.5f.
+ /// </param>
+ /// <param name="prefixLength">Length of required common prefix. Default value is 0.
+ /// </param>
+ /// <throws> IOException </throws>
+ public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength):base()
+ {
+
+ if (minSimilarity >= 1.0f)
+ throw new System.ArgumentException("minimumSimilarity cannot be greater than or equal to 1");
+ else if (minSimilarity < 0.0f)
+ throw new System.ArgumentException("minimumSimilarity cannot be less than 0");
+ if (prefixLength < 0)
+ throw new System.ArgumentException("prefixLength cannot be less than 0");
+
+ this.minimumSimilarity = minSimilarity;
+ this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
+ this.searchTerm = term;
+ this.field = searchTerm.Field;
+
+ //The prefix could be longer than the word.
+ //It's kind of silly though. It means we must match the entire word.
+ int fullSearchTermLength = searchTerm.Text.Length;
+ int realPrefixLength = prefixLength > fullSearchTermLength?fullSearchTermLength:prefixLength;
+
+ this.text = searchTerm.Text.Substring(realPrefixLength);
+ this.prefix = searchTerm.Text.Substring(0, (realPrefixLength) - (0));
+
+ this.p = new int[this.text.Length + 1];
+ this.d = new int[this.text.Length + 1];
+
+ SetEnum(reader.Terms(new Term(searchTerm.Field, prefix)));
+ }
+
+ /// <summary> The termCompare method in FuzzyTermEnum uses Levenshtein distance to
+ /// calculate the distance between the given term and the comparing term.
+ /// </summary>
+ protected internal override bool TermCompare(Term term)
+ {
+ if ((System.Object) field == (System.Object) term.Field && term.Text.StartsWith(prefix))
+ {
+ System.String target = term.Text.Substring(prefix.Length);
+ this.similarity = Similarity(target);
+ return (similarity > minimumSimilarity);
+ }
+ endEnum = true;
+ return false;
+ }
+
+ public override float Difference()
+ {
+ return ((similarity - minimumSimilarity) * scale_factor);
+ }
+
+ public override bool EndEnum()
+ {
+ return endEnum;
+ }
+
+ // <summary>
+ // ***************************
+ // Compute Levenshtein distance
+ // ****************************
+ // </summary>
+
+ /// <summary> <p/>Similarity returns a number that is 1.0f or less (including negative numbers)
+ /// based on how similar the Term is compared to a target term. It returns
+ /// exactly 0.0f when
+ /// <c>
+ /// editDistance &gt; maximumEditDistance</c>
+ /// Otherwise it returns:
+ /// <c>
+ /// 1 - (editDistance / length)</c>
+ /// where length is the length of the shortest term (text or target) including a
+ /// prefix that are identical and editDistance is the Levenshtein distance for
+ /// the two words.<p/>
+ ///
+ /// <p/>Embedded within this algorithm is a fail-fast Levenshtein distance
+ /// algorithm. The fail-fast algorithm differs from the standard Levenshtein
+ /// distance algorithm in that it is aborted if it is discovered that the
+ /// mimimum distance between the words is greater than some threshold.
+ ///
+ /// <p/>To calculate the maximum distance threshold we use the following formula:
+ /// <c>
+ /// (1 - minimumSimilarity) * length</c>
+ /// where length is the shortest term including any prefix that is not part of the
+ /// similarity comparision. This formula was derived by solving for what maximum value
+ /// of distance returns false for the following statements:
+ /// <code>
+ /// similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
+ /// return (similarity > minimumSimilarity);</code>
+ /// where distance is the Levenshtein distance for the two words.
+ /// <p/>
+ /// <p/>Levenshtein distance (also known as edit distance) is a measure of similiarity
+ /// between two strings where the distance is measured as the number of character
+ /// deletions, insertions or substitutions required to transform one string to
+ /// the other string.
+ /// </summary>
+ /// <param name="target">the target word or phrase
+ /// </param>
+ /// <returns> the similarity, 0.0 or less indicates that it matches less than the required
+ /// threshold and 1.0 indicates that the text and target are identical
+ /// </returns>
+ private float Similarity(System.String target)
+ {
+
+ int m = target.Length;
+ int n = text.Length;
+ if (n == 0)
+ {
+ //we don't have anything to compare. That means if we just add
+ //the letters for m we get the new word
+ return prefix.Length == 0 ? 0.0f : 1.0f - ((float)m / prefix.Length);
+ }
+ if (m == 0)
+ {
+ return prefix.Length == 0 ? 0.0f : 1.0f - ((float)n / prefix.Length);
+ }
+
+ int maxDistance = CalculateMaxDistance(m);
+
+ if (maxDistance < System.Math.Abs(m - n))
+ {
+ //just adding the characters of m to n or vice-versa results in
+ //too many edits
+ //for example "pre" length is 3 and "prefixes" length is 8. We can see that
+ //given this optimal circumstance, the edit distance cannot be less than 5.
+ //which is 8-3 or more precisesly Math.abs(3-8).
+ //if our maximum edit distance is 4, then we can discard this word
+ //without looking at it.
+ return 0.0f;
+ }
+
+ // init matrix d
+ for (int i = 0; i < n; ++i)
+ {
+ p[i] = i;
+ }
+
+ // start computing edit distance
+ for (int j = 1; j <= m; ++j)
+ {
+ int bestPossibleEditDistance = m;
+ char t_j = target[j - 1];
+ d[0] = j;
+ for (int i = 1; i <= n; ++i)
+ {
+ // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1)
+ if (t_j != text[i - 1])
+ {
+ d[i] = Math.Min(Math.Min(d[i - 1], p[i]), p[i - 1]) + 1;
+ }
+ else
+ {
+ d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1]);
+ }
+ bestPossibleEditDistance = System.Math.Min(bestPossibleEditDistance, d[i]);
+ }
+
+ //After calculating row i, the best possible edit distance
+ //can be found by found by finding the smallest value in a given column.
+ //If the bestPossibleEditDistance is greater than the max distance, abort.
+
+ if (j > maxDistance && bestPossibleEditDistance > maxDistance)
+ {
+ //equal is okay, but not greater
+ //the closest the target can be to the text is just too far away.
+ //this target is leaving the party early.
+ return 0.0f;
+ }
+
+ // copy current distance counts to 'previous row' distance counts: swap p and d
+ int[] _d = p;
+ p = d;
+ d = _d;
+ }
+
+ // our last action in the above loop was to switch d and p, so p now
+ // actually has the most recent cost counts
+
+ // this will return less than 0.0 when the edit distance is
+ // greater than the number of characters in the shorter word.
+ // but this was the formula that was previously used in FuzzyTermEnum,
+ // so it has not been changed (even though minimumSimilarity must be
+ // greater than 0.0)
+ return 1.0f - (p[n] / (float)(prefix.Length + System.Math.Min(n, m)));
+
+ }
+
+ /// <summary> The max Distance is the maximum Levenshtein distance for the text
+ /// compared to some other value that results in score that is
+ /// better than the minimum similarity.
+ /// </summary>
+ /// <param name="m">the length of the "other value"
+ /// </param>
+ /// <returns> the maximum levenshtein distance that we care about
+ /// </returns>
+ private int CalculateMaxDistance(int m)
+ {
+ return (int) ((1 - minimumSimilarity) * (System.Math.Min(text.Length, m) + prefix.Length));
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ p = null;
+ d = null;
+ searchTerm = null;
+ }
+
+ isDisposed = true;
+ base.Dispose(disposing); //call super.close() and let the garbage collector do its work.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/HitQueue.cs b/src/core/Search/HitQueue.cs
new file mode 100644
index 0000000..925d3a6
--- /dev/null
+++ b/src/core/Search/HitQueue.cs
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search
+{
+
+ public sealed class HitQueue : PriorityQueue<ScoreDoc>
+ {
+
+ private bool prePopulate;
+
+ /// <summary> Creates a new instance with <c>size</c> elements. If
+ /// <c>prePopulate</c> is set to true, the queue will pre-populate itself
+ /// with sentinel objects and set its <see cref="PriorityQueue{T}.Size()" /> to <c>size</c>. In
+ /// that case, you should not rely on <see cref="PriorityQueue{T}.Size()" /> to get the number of
+ /// actual elements that were added to the queue, but keep track yourself.<br/>
+ /// <b>NOTE:</b> in case <c>prePopulate</c> is true, you should pop
+ /// elements from the queue using the following code example:
+ ///
+ /// <code>
+ /// PriorityQueue pq = new HitQueue(10, true); // pre-populate.
+ /// ScoreDoc top = pq.top();
+ ///
+ /// // Add/Update one element.
+ /// top.score = 1.0f;
+ /// top.doc = 0;
+ /// top = (ScoreDoc) pq.updateTop();
+ /// int totalHits = 1;
+ ///
+ /// // Now pop only the elements that were *truly* inserted.
+ /// // First, pop all the sentinel elements (there are pq.size() - totalHits).
+ /// for (int i = pq.size() - totalHits; i &gt; 0; i--) pq.pop();
+ ///
+ /// // Now pop the truly added elements.
+ /// ScoreDoc[] results = new ScoreDoc[totalHits];
+ /// for (int i = totalHits - 1; i &gt;= 0; i--) {
+ /// results[i] = (ScoreDoc) pq.pop();
+ /// }
+ /// </code>
+ ///
+ /// <p/><b>NOTE</b>: This class pre-allocate a full array of
+ /// length <c>size</c>.
+ ///
+ /// </summary>
+ /// <param name="size">the requested size of this queue.
+ /// </param>
+ /// <param name="prePopulate">specifies whether to pre-populate the queue with sentinel values.
+ /// </param>
+ /// <seealso cref="SentinelObject">
+ /// </seealso>
+ public /*internal*/ HitQueue(int size, bool prePopulate)
+ {
+ this.prePopulate = prePopulate;
+ Initialize(size);
+ }
+
+ // Returns null if prePopulate is false.
+
+ protected internal override ScoreDoc SentinelObject
+ {
+ get
+ {
+ // Always set the doc Id to MAX_VALUE so that it won't be favored by
+ // lessThan. This generally should not happen since if score is not NEG_INF,
+ // TopScoreDocCollector will always add the object to the queue.
+ return !prePopulate ? null : new ScoreDoc(System.Int32.MaxValue, System.Single.NegativeInfinity);
+ }
+ }
+
+ public override bool LessThan(ScoreDoc hitA, ScoreDoc hitB)
+ {
+ if (hitA.Score == hitB.Score)
+ return hitA.Doc > hitB.Doc;
+ else
+ return hitA.Score < hitB.Score;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/IndexSearcher.cs b/src/core/Search/IndexSearcher.cs
new file mode 100644
index 0000000..f77ff20
--- /dev/null
+++ b/src/core/Search/IndexSearcher.cs
@@ -0,0 +1,343 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Index;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using Directory = Lucene.Net.Store.Directory;
+using ReaderUtil = Lucene.Net.Util.ReaderUtil;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Implements search over a single IndexReader.
+ ///
+ /// <p/>Applications usually need only call the inherited <see cref="Searcher.Search(Query,int)" />
+ /// or <see cref="Searcher.Search(Query,Filter,int)" /> methods. For performance reasons it is
+ /// recommended to open only one IndexSearcher and use it for all of your searches.
+ ///
+ /// <a name="thread-safety"></a><p/><b>NOTE</b>:
+ /// <see cref="IndexSearcher" /> instances are completely
+ /// thread safe, meaning multiple threads can call any of its
+ /// methods, concurrently. If your application requires
+ /// external synchronization, you should <b>not</b>
+ /// synchronize on the <c>IndexSearcher</c> instance;
+ /// use your own (non-Lucene) objects instead.<p/>
+ /// </summary>
+ [Serializable]
+ public class IndexSearcher : Searcher
+ {
+ internal IndexReader reader;
+ private bool closeReader;
+ private bool isDisposed;
+
+ // NOTE: these members might change in incompatible ways
+ // in the next release
+ private IndexReader[] subReaders;
+ private int[] docStarts;
+
+ /// <summary>Creates a searcher searching the index in the named
+ /// directory, with readOnly=true</summary>
+ /// <throws>CorruptIndexException if the index is corrupt</throws>
+ /// <throws>IOException if there is a low-level IO error</throws>
+ public IndexSearcher(Directory path)
+ : this(IndexReader.Open(path, true), true)
+ {
+ }
+
+ /// <summary>Creates a searcher searching the index in the named
+ /// directory. You should pass readOnly=true, since it
+ /// gives much better concurrent performance, unless you
+ /// intend to do write operations (delete documents or
+ /// change norms) with the underlying IndexReader.
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ /// <param name="path">directory where IndexReader will be opened
+ /// </param>
+ /// <param name="readOnly">if true, the underlying IndexReader
+ /// will be opened readOnly
+ /// </param>
+ public IndexSearcher(Directory path, bool readOnly):this(IndexReader.Open(path, readOnly), true)
+ {
+ }
+
+ /// <summary>Creates a searcher searching the provided index
+ /// <para>
+ /// Note that the underlying IndexReader is not closed, if
+ /// IndexSearcher was constructed with IndexSearcher(IndexReader r).
+ /// If the IndexReader was supplied implicitly by specifying a directory, then
+ /// the IndexReader gets closed.
+ /// </para>
+ /// </summary>
+ public IndexSearcher(IndexReader r):this(r, false)
+ {
+ }
+
+ /// <summary>
+ /// Expert: directly specify the reader, subReaders and their
+ /// DocID starts
+ /// <p/>
+ /// <b>NOTE:</b> This API is experimental and
+ /// might change in incompatible ways in the next
+ /// release<p/>
+ /// </summary>
+ public IndexSearcher(IndexReader reader, IndexReader[] subReaders, int[] docStarts)
+ {
+ this.reader = reader;
+ this.subReaders = subReaders;
+ this.docStarts = docStarts;
+ this.closeReader = false;
+ }
+
+ private IndexSearcher(IndexReader r, bool closeReader)
+ {
+ reader = r;
+ this.closeReader = closeReader;
+
+ System.Collections.Generic.IList<IndexReader> subReadersList = new System.Collections.Generic.List<IndexReader>();
+ GatherSubReaders(subReadersList, reader);
+ subReaders = subReadersList.ToArray();
+ docStarts = new int[subReaders.Length];
+ int maxDoc = 0;
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ docStarts[i] = maxDoc;
+ maxDoc += subReaders[i].MaxDoc;
+ }
+ }
+
+ protected internal virtual void GatherSubReaders(System.Collections.Generic.IList<IndexReader> allSubReaders, IndexReader r)
+ {
+ ReaderUtil.GatherSubReaders(allSubReaders, r);
+ }
+
+ /// <summary>Return the <see cref="Index.IndexReader" /> this searches. </summary>
+ public virtual IndexReader IndexReader
+ {
+ get { return reader; }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (closeReader)
+ reader.Close();
+ }
+
+ isDisposed = true;
+ }
+
+ // inherit javadoc
+ public override int DocFreq(Term term)
+ {
+ return reader.DocFreq(term);
+ }
+
+ // inherit javadoc
+ public override Document Doc(int i)
+ {
+ return reader.Document(i);
+ }
+
+ // inherit javadoc
+ public override Document Doc(int i, FieldSelector fieldSelector)
+ {
+ return reader.Document(i, fieldSelector);
+ }
+
+ // inherit javadoc
+ public override int MaxDoc
+ {
+ get { return reader.MaxDoc; }
+ }
+
+ // inherit javadoc
+ public override TopDocs Search(Weight weight, Filter filter, int nDocs)
+ {
+
+ if (nDocs <= 0)
+ {
+ throw new System.ArgumentException("nDocs must be > 0");
+ }
+ nDocs = Math.Min(nDocs, reader.MaxDoc);
+
+ TopScoreDocCollector collector = TopScoreDocCollector.Create(nDocs, !weight.GetScoresDocsOutOfOrder());
+ Search(weight, filter, collector);
+ return collector.TopDocs();
+ }
+
+ public override TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort)
+ {
+ return Search(weight, filter, nDocs, sort, true);
+ }
+
+ /// <summary> Just like <see cref="Search(Weight, Filter, int, Sort)" />, but you choose
+ /// whether or not the fields in the returned <see cref="FieldDoc" /> instances
+ /// should be set by specifying fillFields.
+ /// <p/>
+ /// NOTE: this does not compute scores by default. If you need scores, create
+ /// a <see cref="TopFieldCollector" /> instance by calling
+ /// <see cref="TopFieldCollector.Create" /> and then pass that to
+ /// <see cref="Search(Weight, Filter, Collector)" />.
+ /// <p/>
+ /// </summary>
+ public virtual TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort, bool fillFields)
+ {
+ nDocs = Math.Min(nDocs, reader.MaxDoc);
+
+ TopFieldCollector collector2 = TopFieldCollector.Create(sort, nDocs, fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.GetScoresDocsOutOfOrder());
+ Search(weight, filter, collector2);
+ return (TopFieldDocs) collector2.TopDocs();
+ }
+
+ public override void Search(Weight weight, Filter filter, Collector collector)
+ {
+
+ if (filter == null)
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ // search each subreader
+ collector.SetNextReader(subReaders[i], docStarts[i]);
+ Scorer scorer = weight.Scorer(subReaders[i], !collector.AcceptsDocsOutOfOrder, true);
+ if (scorer != null)
+ {
+ scorer.Score(collector);
+ }
+ }
+ }
+ else
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ // search each subreader
+ collector.SetNextReader(subReaders[i], docStarts[i]);
+ SearchWithFilter(subReaders[i], weight, filter, collector);
+ }
+ }
+ }
+
+ private void SearchWithFilter(IndexReader reader, Weight weight, Filter filter, Collector collector)
+ {
+
+ System.Diagnostics.Debug.Assert(filter != null);
+
+ Scorer scorer = weight.Scorer(reader, true, false);
+ if (scorer == null)
+ {
+ return ;
+ }
+
+ int docID = scorer.DocID();
+ System.Diagnostics.Debug.Assert(docID == - 1 || docID == DocIdSetIterator.NO_MORE_DOCS);
+
+ // CHECKME: use ConjunctionScorer here?
+ DocIdSet filterDocIdSet = filter.GetDocIdSet(reader);
+ if (filterDocIdSet == null)
+ {
+ // this means the filter does not accept any documents.
+ return ;
+ }
+
+ DocIdSetIterator filterIter = filterDocIdSet.Iterator();
+ if (filterIter == null)
+ {
+ // this means the filter does not accept any documents.
+ return ;
+ }
+ int filterDoc = filterIter.NextDoc();
+ int scorerDoc = scorer.Advance(filterDoc);
+
+ collector.SetScorer(scorer);
+ while (true)
+ {
+ if (scorerDoc == filterDoc)
+ {
+ // Check if scorer has exhausted, only before collecting.
+ if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ break;
+ }
+ collector.Collect(scorerDoc);
+ filterDoc = filterIter.NextDoc();
+ scorerDoc = scorer.Advance(filterDoc);
+ }
+ else if (scorerDoc > filterDoc)
+ {
+ filterDoc = filterIter.Advance(scorerDoc);
+ }
+ else
+ {
+ scorerDoc = scorer.Advance(filterDoc);
+ }
+ }
+ }
+
+ public override Query Rewrite(Query original)
+ {
+ Query query = original;
+ for (Query rewrittenQuery = query.Rewrite(reader); rewrittenQuery != query; rewrittenQuery = query.Rewrite(reader))
+ {
+ query = rewrittenQuery;
+ }
+ return query;
+ }
+
+ public override Explanation Explain(Weight weight, int doc)
+ {
+ int n = ReaderUtil.SubIndex(doc, docStarts);
+ int deBasedDoc = doc - docStarts[n];
+
+ return weight.Explain(subReaders[n], deBasedDoc);
+ }
+
+ private bool fieldSortDoTrackScores;
+ private bool fieldSortDoMaxScore;
+
+ /// <summary> By default, no scores are computed when sorting by field (using
+ /// <see cref="Searcher.Search(Query,Filter,int,Sort)" />). You can change that, per
+ /// IndexSearcher instance, by calling this method. Note that this will incur
+ /// a CPU cost.
+ ///
+ /// </summary>
+ /// <param name="doTrackScores">If true, then scores are returned for every matching document
+ /// in <see cref="TopFieldDocs" />.
+ ///
+ /// </param>
+ /// <param name="doMaxScore">If true, then the max score for all matching docs is computed.
+ /// </param>
+ public virtual void SetDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore)
+ {
+ fieldSortDoTrackScores = doTrackScores;
+ fieldSortDoMaxScore = doMaxScore;
+ }
+
+ public IndexReader reader_ForNUnit
+ {
+ get { return reader; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/MatchAllDocsQuery.cs b/src/core/Search/MatchAllDocsQuery.cs
new file mode 100644
index 0000000..a380b35
--- /dev/null
+++ b/src/core/Search/MatchAllDocsQuery.cs
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using TermDocs = Lucene.Net.Index.TermDocs;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A query that matches all documents.
+ ///
+ /// </summary>
+ [Serializable]
+ public class MatchAllDocsQuery:Query
+ {
+
+ public MatchAllDocsQuery():this(null)
+ {
+ }
+
+ private System.String normsField;
+
+ /// <param name="normsField">Field used for normalization factor (document boost). Null if nothing.
+ /// </param>
+ public MatchAllDocsQuery(System.String normsField)
+ {
+ this.normsField = normsField;
+ }
+
+ private class MatchAllScorer:Scorer
+ {
+ private void InitBlock(MatchAllDocsQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MatchAllDocsQuery enclosingInstance;
+ public MatchAllDocsQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal TermDocs termDocs;
+ internal float score;
+ internal byte[] norms;
+ private int doc = - 1;
+
+ internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms):base(similarity)
+ {
+ InitBlock(enclosingInstance);
+ this.termDocs = reader.TermDocs(null);
+ score = w.Value;
+ this.norms = norms;
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ return doc = termDocs.Next()?termDocs.Doc:NO_MORE_DOCS;
+ }
+
+ public override float Score()
+ {
+ return norms == null?score:score * Similarity.DecodeNorm(norms[DocID()]);
+ }
+
+ public override int Advance(int target)
+ {
+ return doc = termDocs.SkipTo(target)?termDocs.Doc:NO_MORE_DOCS;
+ }
+ }
+
+ [Serializable]
+ private class MatchAllDocsWeight:Weight
+ {
+ private void InitBlock(MatchAllDocsQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MatchAllDocsQuery enclosingInstance;
+ public MatchAllDocsQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Similarity similarity;
+ private float queryWeight;
+ private float queryNorm;
+
+ public MatchAllDocsWeight(MatchAllDocsQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = searcher.Similarity;
+ }
+
+ public override System.String ToString()
+ {
+ return "weight(" + Enclosing_Instance + ")";
+ }
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ public override float Value
+ {
+ get { return queryWeight; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ queryWeight = Enclosing_Instance.Boost;
+ return queryWeight*queryWeight;
+ }
+
+ public override void Normalize(float queryNorm)
+ {
+ this.queryNorm = queryNorm;
+ queryWeight *= this.queryNorm;
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ return new MatchAllScorer(enclosingInstance, reader, similarity, this, Enclosing_Instance.normsField != null?reader.Norms(Enclosing_Instance.normsField):null);
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+ // explain query weight
+ Explanation queryExpl = new ComplexExplanation(true, Value, "MatchAllDocsQuery, product of:");
+ if (Enclosing_Instance.Boost != 1.0f)
+ {
+ queryExpl.AddDetail(new Explanation(Enclosing_Instance.Boost, "boost"));
+ }
+ queryExpl.AddDetail(new Explanation(queryNorm, "queryNorm"));
+
+ return queryExpl;
+ }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new MatchAllDocsWeight(this, searcher);
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("*:*");
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is MatchAllDocsQuery))
+ return false;
+ MatchAllDocsQuery other = (MatchAllDocsQuery) o;
+ return this.Boost == other.Boost;
+ }
+
+ public override int GetHashCode()
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ 0x1AA71190;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/MultiPhraseQuery.cs b/src/core/Search/MultiPhraseQuery.cs
new file mode 100644
index 0000000..38e98ca
--- /dev/null
+++ b/src/core/Search/MultiPhraseQuery.cs
@@ -0,0 +1,496 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using MultipleTermPositions = Lucene.Net.Index.MultipleTermPositions;
+using Term = Lucene.Net.Index.Term;
+using TermPositions = Lucene.Net.Index.TermPositions;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> MultiPhraseQuery is a generalized version of PhraseQuery, with an added
+ /// method <see cref="Add(Term[])" />.
+ /// To use this class, to search for the phrase "Microsoft app*" first use
+ /// add(Term) on the term "Microsoft", then find all terms that have "app" as
+ /// prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
+ /// terms) to add them to the query.
+ ///
+ /// </summary>
+ /// <version> 1.0
+ /// </version>
+ [Serializable]
+ public class MultiPhraseQuery:Query
+ {
+ private System.String field;
+ private System.Collections.Generic.List<Term[]> termArrays = new System.Collections.Generic.List<Term[]>();
+ private System.Collections.Generic.List<int> positions = new System.Collections.Generic.List<int>();
+
+ private int slop = 0;
+
+ /// <summary>Gets or sets the phrase slop for this query.</summary>
+ /// <seealso cref="PhraseQuery.Slop">
+ /// </seealso>
+ public virtual int Slop
+ {
+ get { return slop; }
+ set { slop = value; }
+ }
+
+ /// <summary>Add a single term at the next position in the phrase.</summary>
+ /// <seealso cref="PhraseQuery.Add(Term)">
+ /// </seealso>
+ public virtual void Add(Term term)
+ {
+ Add(new Term[]{term});
+ }
+
+ /// <summary>Add multiple terms at the next position in the phrase. Any of the terms
+ /// may match.
+ ///
+ /// </summary>
+ /// <seealso cref="PhraseQuery.Add(Term)">
+ /// </seealso>
+ public virtual void Add(Term[] terms)
+ {
+ int position = 0;
+ if (positions.Count > 0)
+ position = positions[positions.Count - 1] + 1;
+
+ Add(terms, position);
+ }
+
+ /// <summary> Allows to specify the relative position of terms within the phrase.
+ ///
+ /// </summary>
+ /// <seealso cref="PhraseQuery.Add(Term, int)">
+ /// </seealso>
+ /// <param name="terms">
+ /// </param>
+ /// <param name="position">
+ /// </param>
+ public virtual void Add(Term[] terms, int position)
+ {
+ if (termArrays.Count == 0)
+ field = terms[0].Field;
+
+ for (int i = 0; i < terms.Length; i++)
+ {
+ if ((System.Object) terms[i].Field != (System.Object) field)
+ {
+ throw new System.ArgumentException("All phrase terms must be in the same field (" + field + "): " + terms[i]);
+ }
+ }
+
+ termArrays.Add(terms);
+ positions.Add(position);
+ }
+
+ /// <summary> Returns a List&lt;Term[]&gt; of the terms in the multiphrase.
+ /// Do not modify the List or its contents.
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual System.Collections.Generic.IList<Term[]> GetTermArrays()
+ {
+ return termArrays.AsReadOnly();
+ }
+
+ /// <summary> Returns the relative positions of terms in this phrase.</summary>
+ public virtual int[] GetPositions()
+ {
+ int[] result = new int[positions.Count];
+ for (int i = 0; i < positions.Count; i++)
+ result[i] = positions[i];
+ return result;
+ }
+
+ // inherit javadoc
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ foreach(Term[] arr in termArrays)
+ {
+ terms.UnionWith(arr);
+ }
+ }
+
+
+ [Serializable]
+ private class MultiPhraseWeight:Weight
+ {
+ private void InitBlock(MultiPhraseQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MultiPhraseQuery enclosingInstance;
+ public MultiPhraseQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Similarity similarity;
+ private float value_Renamed;
+ private float idf;
+ private float queryNorm;
+ private float queryWeight;
+
+ public MultiPhraseWeight(MultiPhraseQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+
+ // compute idf
+ int maxDoc = searcher.MaxDoc;
+ foreach (Term[] terms in enclosingInstance.termArrays)
+ {
+ foreach (Term term in terms)
+ {
+ idf += similarity.Idf(searcher.DocFreq(term), maxDoc);
+ }
+ }
+ }
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ public override float Value
+ {
+ get { return value_Renamed; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ queryWeight = idf*Enclosing_Instance.Boost; // compute query weight
+ return queryWeight*queryWeight; // square it
+ }
+
+ public override void Normalize(float queryNorm)
+ {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value_Renamed = queryWeight * idf; // idf for document
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ if (Enclosing_Instance.termArrays.Count == 0)
+ // optimize zero-term case
+ return null;
+
+ TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count];
+ for (int i = 0; i < tps.Length; i++)
+ {
+ Term[] terms = Enclosing_Instance.termArrays[i];
+
+ TermPositions p;
+ if (terms.Length > 1)
+ p = new MultipleTermPositions(reader, terms);
+ else
+ p = reader.TermPositions(terms[0]);
+
+ if (p == null)
+ return null;
+
+ tps[i] = p;
+ }
+
+ if (Enclosing_Instance.slop == 0)
+ return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
+ else
+ return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+ ComplexExplanation result = new ComplexExplanation();
+ result.Description = "weight(" + Query + " in " + doc + "), product of:";
+
+ Explanation idfExpl = new Explanation(idf, "idf(" + Query + ")");
+
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.Description = "queryWeight(" + Query + "), product of:";
+
+ Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");
+ if (Enclosing_Instance.Boost != 1.0f)
+ queryExpl.AddDetail(boostExpl);
+
+ queryExpl.AddDetail(idfExpl);
+
+ Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
+ queryExpl.AddDetail(queryNormExpl);
+
+ queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value;
+
+ result.AddDetail(queryExpl);
+
+ // explain field weight
+ ComplexExplanation fieldExpl = new ComplexExplanation();
+ fieldExpl.Description = "fieldWeight(" + Query + " in " + doc + "), product of:";
+
+ PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false);
+ if (scorer == null)
+ {
+ return new Explanation(0.0f, "no matching docs");
+ }
+ Explanation tfExplanation = new Explanation();
+ int d = scorer.Advance(doc);
+ float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f;
+ tfExplanation.Value = similarity.Tf(phraseFreq);
+ tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")";
+ fieldExpl.AddDetail(tfExplanation);
+ fieldExpl.AddDetail(idfExpl);
+
+ Explanation fieldNormExpl = new Explanation();
+ byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
+ float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f;
+ fieldNormExpl.Value = fieldNorm;
+ fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")";
+ fieldExpl.AddDetail(fieldNormExpl);
+
+ fieldExpl.Match = tfExplanation.IsMatch;
+ fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value;
+
+ result.AddDetail(fieldExpl);
+ System.Boolean? tempAux = fieldExpl.Match;
+ result.Match = tempAux;
+
+ // combine them
+ result.Value = queryExpl.Value * fieldExpl.Value;
+
+ if (queryExpl.Value == 1.0f)
+ return fieldExpl;
+
+ return result;
+ }
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ if (termArrays.Count == 1)
+ {
+ // optimize one-term case
+ Term[] terms = termArrays[0];
+ BooleanQuery boq = new BooleanQuery(true);
+ for (int i = 0; i < terms.Length; i++)
+ {
+ boq.Add(new TermQuery(terms[i]), Occur.SHOULD);
+ }
+ boq.Boost = Boost;
+ return boq;
+ }
+ else
+ {
+ return this;
+ }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new MultiPhraseWeight(this, searcher);
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String f)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (!field.Equals(f))
+ {
+ buffer.Append(field);
+ buffer.Append(":");
+ }
+
+ buffer.Append("\"");
+ System.Collections.Generic.IEnumerator<Term[]> i = termArrays.GetEnumerator();
+ bool first = true;
+ while (i.MoveNext())
+ {
+ if (first)
+ {
+ first = false;
+ }
+ else
+ {
+ buffer.Append(" ");
+ }
+
+ Term[] terms = i.Current;
+ if (terms.Length > 1)
+ {
+ buffer.Append("(");
+ for (int j = 0; j < terms.Length; j++)
+ {
+ buffer.Append(terms[j].Text);
+ if (j < terms.Length - 1)
+ buffer.Append(" ");
+ }
+ buffer.Append(")");
+ }
+ else
+ {
+ buffer.Append(terms[0].Text);
+ }
+ }
+ buffer.Append("\"");
+
+ if (slop != 0)
+ {
+ buffer.Append("~");
+ buffer.Append(slop);
+ }
+
+ buffer.Append(ToStringUtils.Boost(Boost));
+
+ return buffer.ToString();
+ }
+
+
+ /// <summary>Returns true if <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is MultiPhraseQuery))
+ return false;
+ MultiPhraseQuery other = (MultiPhraseQuery) o;
+ bool eq = this.Boost == other.Boost && this.slop == other.slop;
+ if(!eq)
+ {
+ return false;
+ }
+ eq = this.termArrays.Count.Equals(other.termArrays.Count);
+ if (!eq)
+ {
+ return false;
+ }
+
+ for (int i = 0; i < this.termArrays.Count; i++)
+ {
+ if (!Compare.CompareTermArrays((Term[])this.termArrays[i], (Term[])other.termArrays[i]))
+ {
+ return false;
+ }
+ }
+ if(!eq)
+ {
+ return false;
+ }
+ eq = this.positions.Count.Equals(other.positions.Count);
+ if (!eq)
+ {
+ return false;
+ }
+ for (int i = 0; i < this.positions.Count; i++)
+ {
+ if (!((int)this.positions[i] == (int)other.positions[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// <summary>Returns a hash code value for this object.</summary>
+ public override int GetHashCode()
+ {
+ int posHash = 0;
+ foreach(int pos in positions)
+ {
+ posHash += pos.GetHashCode();
+ }
+ return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ slop ^ TermArraysHashCode() ^ posHash ^ 0x4AC65113;
+ }
+
+ // Breakout calculation of the termArrays hashcode
+ private int TermArraysHashCode()
+ {
+ int hashCode = 1;
+ foreach(Term[] termArray in termArrays)
+ {
+ // Java uses Arrays.hashCode(termArray)
+ hashCode = 31*hashCode + (termArray == null ? 0 : ArraysHashCode(termArray));
+ }
+ return hashCode;
+ }
+
+ private int ArraysHashCode(Term[] termArray)
+ {
+ if (termArray == null)
+ return 0;
+
+ int result = 1;
+
+ for (int i = 0; i < termArray.Length; i++)
+ {
+ Term term = termArray[i];
+ result = 31 * result + (term == null?0:term.GetHashCode());
+ }
+
+ return result;
+ }
+
+ // Breakout calculation of the termArrays equals
+ private bool TermArraysEquals(System.Collections.Generic.List<Term[]> termArrays1, System.Collections.Generic.List<Term[]> termArrays2)
+ {
+ if (termArrays1.Count != termArrays2.Count)
+ {
+ return false;
+ }
+ var iterator1 = termArrays1.GetEnumerator();
+ var iterator2 = termArrays2.GetEnumerator();
+ while (iterator1.MoveNext())
+ {
+ Term[] termArray1 = iterator1.Current;
+ Term[] termArray2 = iterator2.Current;
+ if (!(termArray1 == null ? termArray2 == null : TermEquals(termArray1, termArray2)))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static bool TermEquals(System.Array array1, System.Array array2)
+ {
+ bool result = false;
+ if ((array1 == null) && (array2 == null))
+ result = true;
+ else if ((array1 != null) && (array2 != null))
+ {
+ if (array1.Length == array2.Length)
+ {
+ int length = array1.Length;
+ result = true;
+ for (int index = 0; index < length; index++)
+ {
+ if (!(array1.GetValue(index).Equals(array2.GetValue(index))))
+ {
+ result = false;
+ break;
+ }
+ }
+ }
+ }
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/MultiSearcher.cs b/src/core/Search/MultiSearcher.cs
new file mode 100644
index 0000000..3d6ef07
--- /dev/null
+++ b/src/core/Search/MultiSearcher.cs
@@ -0,0 +1,458 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using ReaderUtil = Lucene.Net.Util.ReaderUtil;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Implements search over a set of <c>Searchables</c>.
+ ///
+ /// <p/>Applications usually need only call the inherited <see cref="Searcher.Search(Query, int)" />
+ /// or <see cref="Searcher.Search(Query,Filter, int)" /> methods.
+ /// </summary>
+ public class MultiSearcher:Searcher
+ {
+ private class AnonymousClassCollector:Collector
+ {
+ public AnonymousClassCollector(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
+ {
+ InitBlock(collector, start, enclosingInstance);
+ }
+ private void InitBlock(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
+ {
+ this.collector = collector;
+ this.start = start;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Search.Collector collector;
+ private int start;
+ private MultiSearcher enclosingInstance;
+ public MultiSearcher Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public override void SetScorer(Scorer scorer)
+ {
+ collector.SetScorer(scorer);
+ }
+ public override void Collect(int doc)
+ {
+ collector.Collect(doc);
+ }
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ collector.SetNextReader(reader, start + docBase);
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return collector.AcceptsDocsOutOfOrder; }
+ }
+ }
+
+ /// <summary> Document Frequency cache acting as a Dummy-Searcher. This class is no
+ /// full-fledged Searcher, but only supports the methods necessary to
+ /// initialize Weights.
+ /// </summary>
+ private class CachedDfSource:Searcher
+ {
+ private readonly Dictionary<Term,int> dfMap; // Map from Terms to corresponding doc freqs
+ private readonly int maxDoc; // document count
+
+ public CachedDfSource(Dictionary<Term,int> dfMap, int maxDoc, Similarity similarity)
+ {
+ this.dfMap = dfMap;
+ this.maxDoc = maxDoc;
+ Similarity = similarity;
+ }
+
+ public override int DocFreq(Term term)
+ {
+ int df;
+ try
+ {
+ df = dfMap[term];
+ }
+ catch (KeyNotFoundException) // C# equiv. of java code.
+ {
+ throw new System.ArgumentException("df for term " + term.Text + " not available");
+ }
+ return df;
+ }
+
+ public override int[] DocFreqs(Term[] terms)
+ {
+ int[] result = new int[terms.Length];
+ for (int i = 0; i < terms.Length; i++)
+ {
+ result[i] = DocFreq(terms[i]);
+ }
+ return result;
+ }
+
+ public override int MaxDoc
+ {
+ get { return maxDoc; }
+ }
+
+ public override Query Rewrite(Query query)
+ {
+ // this is a bit of a hack. We know that a query which
+ // creates a Weight based on this Dummy-Searcher is
+ // always already rewritten (see preparedWeight()).
+ // Therefore we just return the unmodified query here
+ return query;
+ }
+
+ // TODO: This probably shouldn't throw an exception?
+ protected override void Dispose(bool disposing)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override Document Doc(int i)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override Document Doc(int i, FieldSelector fieldSelector)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override Explanation Explain(Weight weight, int doc)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override void Search(Weight weight, Filter filter, Collector results)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override TopDocs Search(Weight weight, Filter filter, int n)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+
+ private Searchable[] searchables;
+ private int[] starts;
+ private int maxDoc = 0;
+
+ private bool isDisposed;
+
+ /// <summary>Creates a searcher which searches <i>searchers</i>. </summary>
+ public MultiSearcher(params Searchable[] searchables)
+ {
+ this.searchables = searchables;
+
+ starts = new int[searchables.Length + 1]; // build starts array
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ starts[i] = maxDoc;
+ maxDoc += searchables[i].MaxDoc; // compute maxDocs
+ }
+ starts[searchables.Length] = maxDoc;
+ }
+
+ /// <summary>Return the array of <see cref="Searchable" />s this searches. </summary>
+ public virtual Searchable[] GetSearchables()
+ {
+ return searchables;
+ }
+
+ protected internal virtual int[] GetStarts()
+ {
+ return starts;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ for (int i = 0; i < searchables.Length; i++)
+ searchables[i].Close();
+ }
+
+ isDisposed = true;
+ }
+
+ public override int DocFreq(Term term)
+ {
+ int docFreq = 0;
+ for (int i = 0; i < searchables.Length; i++)
+ docFreq += searchables[i].DocFreq(term);
+ return docFreq;
+ }
+
+ // inherit javadoc
+ public override Document Doc(int n)
+ {
+ int i = SubSearcher(n); // find searcher index
+ return searchables[i].Doc(n - starts[i]); // dispatch to searcher
+ }
+
+ // inherit javadoc
+ public override Document Doc(int n, FieldSelector fieldSelector)
+ {
+ int i = SubSearcher(n); // find searcher index
+ return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher
+ }
+
+ /// <summary>Returns index of the searcher for document <c>n</c> in the array
+ /// used to construct this searcher.
+ /// </summary>
+ public virtual int SubSearcher(int n)
+ {
+ // find searcher for doc n:
+ return ReaderUtil.SubIndex(n, starts);
+ }
+
+ /// <summary>Returns the document number of document <c>n</c> within its
+ /// sub-index.
+ /// </summary>
+ public virtual int SubDoc(int n)
+ {
+ return n - starts[SubSearcher(n)];
+ }
+
+ public override int MaxDoc
+ {
+ get { return maxDoc; }
+ }
+
+ public override TopDocs Search(Weight weight, Filter filter, int nDocs)
+ {
+ HitQueue hq = new HitQueue(nDocs, false);
+ int totalHits = 0;
+
+ var lockObj = new object();
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ // search each searcher
+ // use NullLock, we don't care about synchronization for these
+ TopDocs docs = MultiSearcherCallableNoSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, nDocs, hq, i, starts);
+ totalHits += docs.TotalHits; // update totalHits
+ }
+
+ ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
+ for (int i = hq.Size() - 1; i >= 0; i--)
+ // put docs in array
+ scoreDocs2[i] = hq.Pop();
+
+ float maxScore = (totalHits == 0)?System.Single.NegativeInfinity:scoreDocs2[0].Score;
+
+ return new TopDocs(totalHits, scoreDocs2, maxScore);
+ }
+
+ public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
+ {
+ var hq = new FieldDocSortedHitQueue(n);
+ int totalHits = 0;
+
+ float maxScore = System.Single.NegativeInfinity;
+
+ var lockObj = new object();
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ // search each searcher
+ // use NullLock, we don't care about synchronization for these
+ TopFieldDocs docs = MultiSearcherCallableWithSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, n, hq, sort,
+ i, starts);
+ totalHits += docs.TotalHits;
+ maxScore = System.Math.Max(maxScore, docs.MaxScore);
+ }
+
+ ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
+ for (int i = hq.Size() - 1; i >= 0; i--)
+ // put docs in array
+ scoreDocs2[i] = hq.Pop();
+
+ return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
+ }
+
+ ///<inheritdoc />
+ public override void Search(Weight weight, Filter filter, Collector collector)
+ {
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ int start = starts[i];
+
+ Collector hc = new AnonymousClassCollector(collector, start, this);
+ searchables[i].Search(weight, filter, hc);
+ }
+ }
+
+ public override Query Rewrite(Query original)
+ {
+ Query[] queries = new Query[searchables.Length];
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ queries[i] = searchables[i].Rewrite(original);
+ }
+ return queries[0].Combine(queries);
+ }
+
+ public override Explanation Explain(Weight weight, int doc)
+ {
+ int i = SubSearcher(doc); // find searcher index
+ return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
+ }
+
+ /// <summary> Create weight in multiple index scenario.
+ ///
+ /// Distributed query processing is done in the following steps:
+ /// 1. rewrite query
+ /// 2. extract necessary terms
+ /// 3. collect dfs for these terms from the Searchables
+ /// 4. create query weight using aggregate dfs.
+ /// 5. distribute that weight to Searchables
+ /// 6. merge results
+ ///
+ /// Steps 1-4 are done here, 5+6 in the search() methods
+ ///
+ /// </summary>
+ /// <returns> rewritten queries
+ /// </returns>
+ public /*protected internal*/ override Weight CreateWeight(Query original)
+ {
+ // step 1
+ Query rewrittenQuery = Rewrite(original);
+
+ // step 2
+ ISet<Term> terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<Term>();
+ rewrittenQuery.ExtractTerms(terms);
+
+ // step3
+ Term[] allTermsArray = terms.ToArray();
+ int[] aggregatedDfs = new int[terms.Count];
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ int[] dfs = searchables[i].DocFreqs(allTermsArray);
+ for (int j = 0; j < aggregatedDfs.Length; j++)
+ {
+ aggregatedDfs[j] += dfs[j];
+ }
+ }
+
+ var dfMap = new Dictionary<Term, int>();
+ for (int i = 0; i < allTermsArray.Length; i++)
+ {
+ dfMap[allTermsArray[i]] = aggregatedDfs[i];
+ }
+
+ // step4
+ int numDocs = MaxDoc;
+ CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, Similarity);
+
+ return rewrittenQuery.Weight(cacheSim);
+ }
+
+ internal Func<ThreadLock, object, Searchable, Weight, Filter, int, HitQueue, int, int[], TopDocs> MultiSearcherCallableNoSort =
+ (threadLock, lockObj, searchable, weight, filter, nDocs, hq, i, starts) =>
+ {
+ TopDocs docs = searchable.Search(weight, filter, nDocs);
+ ScoreDoc[] scoreDocs = docs.ScoreDocs;
+ for(int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq
+ {
+ ScoreDoc scoreDoc = scoreDocs[j];
+ scoreDoc.Doc += starts[i]; //convert doc
+ //it would be so nice if we had a thread-safe insert
+ try
+ {
+ threadLock.Enter(lockObj);
+ if (scoreDoc == hq.InsertWithOverflow(scoreDoc))
+ break;
+ }
+ finally
+ {
+ threadLock.Exit(lockObj);
+ }
+ }
+ return docs;
+ };
+
+ internal Func<ThreadLock, object, Searchable, Weight, Filter, int, FieldDocSortedHitQueue, Sort, int, int[], TopFieldDocs>
+ MultiSearcherCallableWithSort = (threadLock, lockObj, searchable, weight, filter, nDocs, hq, sort, i, starts) =>
+ {
+ TopFieldDocs docs = searchable.Search(weight, filter, nDocs, sort);
+ // if one of the Sort fields is FIELD_DOC, need to fix its values, so that
+ // it will break ties by doc Id properly. Otherwise, it will compare to
+ // 'relative' doc Ids, that belong to two different searchables.
+ for (int j = 0; j < docs.fields.Length; j++)
+ {
+ if (docs.fields[j].Type == SortField.DOC)
+ {
+ // iterate over the score docs and change their fields value
+ for (int j2 = 0; j2 < docs.ScoreDocs.Length; j2++)
+ {
+ FieldDoc fd = (FieldDoc) docs.ScoreDocs[j2];
+ fd.fields[j] = (int)fd.fields[j] + starts[i];
+ }
+ break;
+ }
+ }
+ try
+ {
+ threadLock.Enter(lockObj);
+ hq.SetFields(docs.fields);
+ }
+ finally
+ {
+ threadLock.Exit(lockObj);
+ }
+
+ ScoreDoc[] scoreDocs = docs.ScoreDocs;
+ for (int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq
+ {
+ FieldDoc fieldDoc = (FieldDoc) scoreDocs[j];
+ fieldDoc.Doc += starts[i]; //convert doc
+ //it would be so nice if we had a thread-safe insert
+ lock (lockObj)
+ {
+ if (fieldDoc == hq.InsertWithOverflow(fieldDoc))
+ break;
+
+ }
+ }
+ return docs;
+ };
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/MultiTermQuery.cs b/src/core/Search/MultiTermQuery.cs
new file mode 100644
index 0000000..430a521
--- /dev/null
+++ b/src/core/Search/MultiTermQuery.cs
@@ -0,0 +1,465 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using QueryParser = Lucene.Net.QueryParsers.QueryParser;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> An abstract <see cref="Query" /> that matches documents
+ /// containing a subset of terms provided by a <see cref="FilteredTermEnum" />
+ /// enumeration.
+ ///
+ /// <p/>This query cannot be used directly; you must subclass
+ /// it and define <see cref="GetEnum" /> to provide a <see cref="FilteredTermEnum" />
+ /// that iterates through the terms to be
+ /// matched.
+ ///
+ /// <p/><b>NOTE</b>: if <see cref="RewriteMethod" /> is either
+ /// <see cref="CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE" /> or <see cref="SCORING_BOOLEAN_QUERY_REWRITE" />
+ ///, you may encounter a
+ /// <see cref="BooleanQuery.TooManyClauses" /> exception during
+ /// searching, which happens when the number of terms to be
+ /// searched exceeds <see cref="BooleanQuery.MaxClauseCount" />
+ ///. Setting <see cref="RewriteMethod" />
+ /// to <see cref="CONSTANT_SCORE_FILTER_REWRITE" />
+ /// prevents this.
+ ///
+ /// <p/>The recommended rewrite method is <see cref="CONSTANT_SCORE_AUTO_REWRITE_DEFAULT" />
+ ///: it doesn't spend CPU
+ /// computing unhelpful scores, and it tries to pick the most
+ /// performant rewrite method given the query.
+ ///
+ /// Note that <see cref="QueryParser" /> produces
+ /// MultiTermQueries using <see cref="CONSTANT_SCORE_AUTO_REWRITE_DEFAULT" />
+ /// by default.
+ /// </summary>
+ [Serializable]
+ public abstract class MultiTermQuery:Query
+ {
+ [Serializable]
+ public class AnonymousClassConstantScoreAutoRewrite:ConstantScoreAutoRewrite
+ {
+ public override int TermCountCutoff
+ {
+ set { throw new System.NotSupportedException("Please create a private instance"); }
+ }
+
+ public override double DocCountPercent
+ {
+ set { throw new System.NotSupportedException("Please create a private instance"); }
+ }
+
+ // Make sure we are still a singleton even after deserializing
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
+ }
+ }
+ protected internal RewriteMethod internalRewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
+ [NonSerialized]
+ internal int numberOfTerms = 0;
+
+ [Serializable]
+ private sealed class ConstantScoreFilterRewrite:RewriteMethod
+ {
+ public override Query Rewrite(IndexReader reader, MultiTermQuery query)
+ {
+ Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter<MultiTermQuery>(query));
+ result.Boost = query.Boost;
+ return result;
+ }
+
+ // Make sure we are still a singleton even after deserializing
+ internal System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
+ }
+ }
+
+ /// <summary>A rewrite method that first creates a private Filter,
+ /// by visiting each term in sequence and marking all docs
+ /// for that term. Matching documents are assigned a
+ /// constant score equal to the query's boost.
+ ///
+ /// <p/> This method is faster than the BooleanQuery
+ /// rewrite methods when the number of matched terms or
+ /// matched documents is non-trivial. Also, it will never
+ /// hit an errant <see cref="BooleanQuery.TooManyClauses" />
+ /// exception.
+ ///
+ /// </summary>
+ /// <seealso cref="RewriteMethod">
+ /// </seealso>
+ public static readonly RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite();
+
+ [Serializable]
+ private class ScoringBooleanQueryRewrite:RewriteMethod
+ {
+ public override Query Rewrite(IndexReader reader, MultiTermQuery query)
+ {
+
+ FilteredTermEnum enumerator = query.GetEnum(reader);
+ BooleanQuery result = new BooleanQuery(true);
+ int count = 0;
+ try
+ {
+ do
+ {
+ Term t = enumerator.Term;
+ if (t != null)
+ {
+ TermQuery tq = new TermQuery(t); // found a match
+ tq.Boost = query.Boost * enumerator.Difference(); // set the boost
+ result.Add(tq, Occur.SHOULD); // add to query
+ count++;
+ }
+ }
+ while (enumerator.Next());
+ }
+ finally
+ {
+ enumerator.Close();
+ }
+ query.IncTotalNumberOfTerms(count);
+ return result;
+ }
+
+ // Make sure we are still a singleton even after deserializing
+ protected internal virtual System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
+ }
+ }
+
+ /// <summary>A rewrite method that first translates each term into
+ /// <see cref="Occur.SHOULD" /> clause in a
+ /// BooleanQuery, and keeps the scores as computed by the
+ /// query. Note that typically such scores are
+ /// meaningless to the user, and require non-trivial CPU
+ /// to compute, so it's almost always better to use <see cref="CONSTANT_SCORE_AUTO_REWRITE_DEFAULT" />
+ /// instead.
+ ///
+ /// <p/><b>NOTE</b>: This rewrite method will hit <see cref="BooleanQuery.TooManyClauses" />
+ /// if the number of terms
+ /// exceeds <see cref="BooleanQuery.MaxClauseCount" />.
+ ///
+ /// </summary>
+ /// <seealso cref="RewriteMethod">
+ /// </seealso>
+ public static readonly RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite();
+
+ [Serializable]
+ private class ConstantScoreBooleanQueryRewrite:ScoringBooleanQueryRewrite
+ {
+ public override Query Rewrite(IndexReader reader, MultiTermQuery query)
+ {
+ // strip the scores off
+ Query result = new ConstantScoreQuery(new QueryWrapperFilter(base.Rewrite(reader, query)));
+ result.Boost = query.Boost;
+ return result;
+ }
+
+ // Make sure we are still a singleton even after deserializing
+ protected internal override System.Object ReadResolve()
+ {
+ return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
+ }
+ }
+
+ /// <summary>Like <see cref="SCORING_BOOLEAN_QUERY_REWRITE" /> except
+ /// scores are not computed. Instead, each matching
+ /// document receives a constant score equal to the
+ /// query's boost.
+ ///
+ /// <p/><b>NOTE</b>: This rewrite method will hit <see cref="BooleanQuery.TooManyClauses" />
+ /// if the number of terms
+ /// exceeds <see cref="BooleanQuery.MaxClauseCount" />.
+ ///
+ /// </summary>
+ /// <seealso cref="RewriteMethod">
+ /// </seealso>
+ public static readonly RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite();
+
+
+ /// <summary>A rewrite method that tries to pick the best
+ /// constant-score rewrite method based on term and
+ /// document counts from the query. If both the number of
+ /// terms and documents is small enough, then <see cref="CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE" />
+ /// is used.
+ /// Otherwise, <see cref="CONSTANT_SCORE_FILTER_REWRITE" /> is
+ /// used.
+ /// </summary>
+ [Serializable]
+ public class ConstantScoreAutoRewrite:RewriteMethod
+ {
+ public ConstantScoreAutoRewrite()
+ {
+ InitBlock();
+ }
+ private void InitBlock()
+ {
+ termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
+ docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
+ }
+
+ // Defaults derived from rough tests with a 20.0 million
+ // doc Wikipedia index. With more than 350 terms in the
+ // query, the filter method is fastest:
+ public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
+
+ // If the query will hit more than 1 in 1000 of the docs
+ // in the index (0.1%), the filter method is fastest:
+ public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
+
+ private int termCountCutoff;
+ private double docCountPercent;
+
+ /// <summary>If the number of terms in this query is equal to or
+ /// larger than this setting then <see cref="CONSTANT_SCORE_FILTER_REWRITE" />
+ /// is used.
+ /// </summary>
+ public virtual int TermCountCutoff
+ {
+ get { return termCountCutoff; }
+ set { termCountCutoff = value; }
+ }
+
+ /// <summary>If the number of documents to be visited in the
+ /// postings exceeds this specified percentage of the
+ /// MaxDoc for the index, then <see cref="CONSTANT_SCORE_FILTER_REWRITE" />
+ /// is used.
+ /// </summary>
+ /// <value> 0.0 to 100.0 </value>
+ public virtual double DocCountPercent
+ {
+ get { return docCountPercent; }
+ set { docCountPercent = value; }
+ }
+
+ public override Query Rewrite(IndexReader reader, MultiTermQuery query)
+ {
+ // Get the enum and start visiting terms. If we
+ // exhaust the enum before hitting either of the
+ // cutoffs, we use ConstantBooleanQueryRewrite; else,
+ // ConstantFilterRewrite:
+ ICollection<Term> pendingTerms = new List<Term>();
+ int docCountCutoff = (int) ((docCountPercent / 100.0) * reader.MaxDoc);
+ int termCountLimit = System.Math.Min(BooleanQuery.MaxClauseCount, termCountCutoff);
+ int docVisitCount = 0;
+
+ FilteredTermEnum enumerator = query.GetEnum(reader);
+ try
+ {
+ while (true)
+ {
+ Term t = enumerator.Term;
+ if (t != null)
+ {
+ pendingTerms.Add(t);
+ // Loading the TermInfo from the terms dict here
+ // should not be costly, because 1) the
+ // query/filter will load the TermInfo when it
+ // runs, and 2) the terms dict has a cache:
+ docVisitCount += reader.DocFreq(t);
+ }
+
+ if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff)
+ {
+ // Too many terms -- make a filter.
+ Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter<MultiTermQuery>(query));
+ result.Boost = query.Boost;
+ return result;
+ }
+ else if (!enumerator.Next())
+ {
+ // Enumeration is done, and we hit a small
+ // enough number of terms & docs -- just make a
+ // BooleanQuery, now
+ BooleanQuery bq = new BooleanQuery(true);
+ foreach(Term term in pendingTerms)
+ {
+ TermQuery tq = new TermQuery(term);
+ bq.Add(tq, Occur.SHOULD);
+ }
+ // Strip scores
+ Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+ result.Boost = query.Boost;
+ query.IncTotalNumberOfTerms(pendingTerms.Count);
+ return result;
+ }
+ }
+ }
+ finally
+ {
+ enumerator.Close();
+ }
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 1279;
+ return (int) (prime * termCountCutoff + BitConverter.DoubleToInt64Bits(docCountPercent));
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+
+ ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
+ if (other.termCountCutoff != termCountCutoff)
+ {
+ return false;
+ }
+
+ if (BitConverter.DoubleToInt64Bits(other.docCountPercent) != BitConverter.DoubleToInt64Bits(docCountPercent))
+ {
+ return false;
+ }
+
+ return true;
+ }
+ }
+
+ /// <summary>Read-only default instance of <see cref="ConstantScoreAutoRewrite" />
+ ///, with <see cref="ConstantScoreAutoRewrite.TermCountCutoff" />
+ /// set to
+ /// <see cref="ConstantScoreAutoRewrite.DEFAULT_TERM_COUNT_CUTOFF" />
+ ///
+ /// and <see cref="ConstantScoreAutoRewrite.DocCountPercent" />
+ /// set to
+ /// <see cref="ConstantScoreAutoRewrite.DEFAULT_DOC_COUNT_PERCENT" />
+ ///.
+ /// Note that you cannot alter the configuration of this
+ /// instance; you'll need to create a private instance
+ /// instead.
+ /// </summary>
+ public static readonly RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
+
+ /// <summary> Constructs a query matching terms that cannot be represented with a single
+ /// Term.
+ /// </summary>
+ protected MultiTermQuery()
+ {
+ }
+
+ /// <summary>Construct the enumeration to be used, expanding the pattern term. </summary>
+ protected internal abstract FilteredTermEnum GetEnum(IndexReader reader);
+
+ /// <summary> Expert: Return the number of unique terms visited during execution of the query.
+ /// If there are many of them, you may consider using another query type
+ /// or optimize your total term count in index.
+ /// <p/>This method is not thread safe, be sure to only call it when no query is running!
+ /// If you re-use the same query instance for another
+ /// search, be sure to first reset the term counter
+ /// with <see cref="ClearTotalNumberOfTerms" />.
+ /// <p/>On optimized indexes / no MultiReaders, you get the correct number of
+ /// unique terms for the whole index. Use this number to compare different queries.
+ /// For non-optimized indexes this number can also be achived in
+ /// non-constant-score mode. In constant-score mode you get the total number of
+ /// terms seeked for all segments / sub-readers.
+ /// </summary>
+ /// <seealso cref="ClearTotalNumberOfTerms">
+ /// </seealso>
+ public virtual int TotalNumberOfTerms
+ {
+ get { return numberOfTerms; }
+ }
+
+ /// <summary> Expert: Resets the counting of unique terms.
+ /// Do this before executing the query/filter.
+ /// </summary>
+ /// <seealso cref="TotalNumberOfTerms">
+ /// </seealso>
+ public virtual void ClearTotalNumberOfTerms()
+ {
+ numberOfTerms = 0;
+ }
+
+ protected internal virtual void IncTotalNumberOfTerms(int inc)
+ {
+ numberOfTerms += inc;
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ return internalRewriteMethod.Rewrite(reader, this);
+ }
+
+ /// <summary> Sets the rewrite method to be used when executing the
+ /// query. You can use one of the four core methods, or
+ /// implement your own subclass of <see cref="Search.RewriteMethod" />.
+ /// </summary>
+ public virtual RewriteMethod RewriteMethod
+ {
+ get { return internalRewriteMethod; }
+ set { internalRewriteMethod = value; }
+ }
+
+ //@Override
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = 1;
+ result = prime * result + System.Convert.ToInt32(Boost);
+ result = prime * result;
+ result += internalRewriteMethod.GetHashCode();
+ return result;
+ }
+
+ //@Override
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ MultiTermQuery other = (MultiTermQuery) obj;
+ if (System.Convert.ToInt32(Boost) != System.Convert.ToInt32(other.Boost))
+ return false;
+ if (!internalRewriteMethod.Equals(other.internalRewriteMethod))
+ {
+ return false;
+ }
+ return true;
+ }
+ static MultiTermQuery()
+ {
+ CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new AnonymousClassConstantScoreAutoRewrite();
+ }
+ }
+
+ /// <summary>Abstract class that defines how the query is rewritten. </summary>
+ [Serializable]
+ public abstract class RewriteMethod
+ {
+ public abstract Query Rewrite(IndexReader reader, MultiTermQuery query);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/MultiTermQueryWrapperFilter.cs b/src/core/Search/MultiTermQueryWrapperFilter.cs
new file mode 100644
index 0000000..3cffb1a
--- /dev/null
+++ b/src/core/Search/MultiTermQueryWrapperFilter.cs
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using TermDocs = Lucene.Net.Index.TermDocs;
+using TermEnum = Lucene.Net.Index.TermEnum;
+using OpenBitSet = Lucene.Net.Util.OpenBitSet;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A wrapper for <see cref="MultiTermQuery" />, that exposes its
+ /// functionality as a <see cref="Filter" />.
+ /// <p/>
+ /// <c>MultiTermQueryWrapperFilter</c> is not designed to
+ /// be used by itself. Normally you subclass it to provide a Filter
+ /// counterpart for a <see cref="MultiTermQuery" /> subclass.
+ /// <p/>
+ /// For example, <see cref="TermRangeFilter" /> and <see cref="PrefixFilter" /> extend
+ /// <c>MultiTermQueryWrapperFilter</c>.
+ /// This class also provides the functionality behind
+ /// <see cref="MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE" />;
+ /// this is why it is not abstract.
+ /// </summary>
+ [Serializable]
+ public class MultiTermQueryWrapperFilter<T> : Filter
+ where T : MultiTermQuery
+ {
+ protected internal T query;
+
+ /// <summary> Wrap a <see cref="MultiTermQuery" /> as a Filter.</summary>
+ protected internal MultiTermQueryWrapperFilter(T query)
+ {
+ this.query = query;
+ }
+
+ //@Override
+ public override System.String ToString()
+ {
+ // query.toString should be ok for the filter, too, if the query boost is 1.0f
+ return query.ToString();
+ }
+
+ //@Override
+ public override bool Equals(System.Object o)
+ {
+ if (o == this)
+ return true;
+ if (o == null)
+ return false;
+ if (this.GetType().Equals(o.GetType()))
+ {
+ return this.query.Equals(((MultiTermQueryWrapperFilter<T>) o).query);
+ }
+ return false;
+ }
+
+ //@Override
+ public override int GetHashCode()
+ {
+ return query.GetHashCode();
+ }
+
+ /// <summary> Expert: Return the number of unique terms visited during execution of the filter.
+ /// If there are many of them, you may consider using another filter type
+ /// or optimize your total term count in index.
+ /// <p/>This method is not thread safe, be sure to only call it when no filter is running!
+ /// If you re-use the same filter instance for another
+ /// search, be sure to first reset the term counter
+ /// with <see cref="ClearTotalNumberOfTerms" />.
+ /// </summary>
+ /// <seealso cref="ClearTotalNumberOfTerms">
+ /// </seealso>
+ public virtual int TotalNumberOfTerms
+ {
+ get { return query.TotalNumberOfTerms; }
+ }
+
+ /// <summary> Expert: Resets the counting of unique terms.
+ /// Do this before executing the filter.
+ /// </summary>
+ /// <seealso cref="TotalNumberOfTerms">
+ /// </seealso>
+ public virtual void ClearTotalNumberOfTerms()
+ {
+ query.ClearTotalNumberOfTerms();
+ }
+
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ TermEnum enumerator = query.GetEnum(reader);
+ try
+ {
+ // if current term in enum is null, the enum is empty -> shortcut
+ if (enumerator.Term == null)
+ return DocIdSet.EMPTY_DOCIDSET;
+ // else fill into an OpenBitSet
+ OpenBitSet bitSet = new OpenBitSet(reader.MaxDoc);
+ int[] docs = new int[32];
+ int[] freqs = new int[32];
+ TermDocs termDocs = reader.TermDocs();
+ try
+ {
+ int termCount = 0;
+ do
+ {
+ Term term = enumerator.Term;
+ if (term == null)
+ break;
+ termCount++;
+ termDocs.Seek(term);
+ while (true)
+ {
+ int count = termDocs.Read(docs, freqs);
+ if (count != 0)
+ {
+ for (int i = 0; i < count; i++)
+ {
+ bitSet.Set(docs[i]);
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ } while (enumerator.Next());
+
+ query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right?
+ }
+ finally
+ {
+ termDocs.Close();
+ }
+
+ return bitSet;
+ }
+ finally
+ {
+ enumerator.Close();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/NumericRangeFilter.cs b/src/core/Search/NumericRangeFilter.cs
new file mode 100644
index 0000000..5ca0790
--- /dev/null
+++ b/src/core/Search/NumericRangeFilter.cs
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
+using NumericField = Lucene.Net.Documents.NumericField;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A <see cref="Filter" /> that only accepts numeric values within
+ /// a specified range. To use this, you must first index the
+ /// numeric values using <see cref="NumericField" /> (expert: <see cref="NumericTokenStream" />
+ ///).
+ ///
+ /// <p/>You create a new NumericRangeFilter with the static
+ /// factory methods, eg:
+ ///
+ /// <code>
+ /// Filter f = NumericRangeFilter.newFloatRange("weight",
+ /// new Float(0.3f), new Float(0.10f),
+ /// true, true);
+ /// </code>
+ ///
+ /// accepts all documents whose float valued "weight" field
+ /// ranges from 0.3 to 0.10, inclusive.
+ /// See <see cref="NumericRangeQuery{T}" /> for details on how Lucene
+ /// indexes and searches numeric valued fields.
+ ///
+ /// <p/><font color="red"><b>NOTE:</b> This API is experimental and
+ /// might change in incompatible ways in the next
+ /// release.</font>
+ ///
+ /// </summary>
+ /// <since> 2.9
+ ///
+ /// </since>
+ [Serializable]
+ public sealed class NumericRangeFilter<T> : MultiTermQueryWrapperFilter<NumericRangeQuery<T>>
+ where T : struct, IComparable<T>
+ // real numbers in C# are structs and IComparable with themselves, best constraint we have
+ {
+ internal NumericRangeFilter(NumericRangeQuery<T> query)
+ : base(query)
+ {
+ }
+
+ /// <summary>Returns the field name for this filter </summary>
+ public string Field
+ {
+ get { return query.Field; }
+ }
+
+ /// <summary>Returns <c>true</c> if the lower endpoint is inclusive </summary>
+ public bool IncludesMin
+ {
+ get { return query.IncludesMin; }
+ }
+
+ /// <summary>Returns <c>true</c> if the upper endpoint is inclusive </summary>
+ public bool IncludesMax
+ {
+ get { return query.IncludesMax; }
+ }
+
+ /// <summary>Returns the lower value of this range filter </summary>
+ public T? Min
+ {
+ get { return query.Min; }
+ }
+
+ /// <summary>Returns the upper value of this range filter </summary>
+ public T? Max
+ {
+ get { return query.Max; }
+ }
+ }
+
+ public static class NumericRangeFilter
+ {
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that filters a <c>long</c>
+ /// range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<long> NewLongRange(System.String field, int precisionStep, long? min, long? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<long>(NumericRangeQuery.NewLongRange(field, precisionStep, min, max, minInclusive, maxInclusive));
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that queries a <c>long</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<long> NewLongRange(System.String field, long? min, long? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<long>(NumericRangeQuery.NewLongRange(field, min, max, minInclusive, maxInclusive));
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that filters a <c>int</c>
+ /// range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<int> NewIntRange(System.String field, int precisionStep, int? min, int? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<int>(NumericRangeQuery.NewIntRange(field, precisionStep, min, max, minInclusive, maxInclusive));
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that queries a <c>int</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<int> NewIntRange(System.String field, int? min, int? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<int>(NumericRangeQuery.NewIntRange(field, min, max, minInclusive, maxInclusive));
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that filters a <c>double</c>
+ /// range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<double> NewDoubleRange(System.String field, int precisionStep, double? min, double? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<double>(NumericRangeQuery.NewDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive));
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that queries a <c>double</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<double> NewDoubleRange(System.String field, double? min, double? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<double>(NumericRangeQuery.NewDoubleRange(field, min, max, minInclusive, maxInclusive));
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that filters a <c>float</c>
+ /// range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<float> NewFloatRange(System.String field, int precisionStep, float? min, float? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<float>(NumericRangeQuery.NewFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive));
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeFilter</c>, that queries a <c>float</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeFilter<float> NewFloatRange(System.String field, float? min, float? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeFilter<float>(NumericRangeQuery.NewFloatRange(field, min, max, minInclusive, maxInclusive));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/NumericRangeQuery.cs b/src/core/Search/NumericRangeQuery.cs
new file mode 100644
index 0000000..46b2025
--- /dev/null
+++ b/src/core/Search/NumericRangeQuery.cs
@@ -0,0 +1,665 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
+using NumericField = Lucene.Net.Documents.NumericField;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using StringHelper = Lucene.Net.Util.StringHelper;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> <p/>A <see cref="Query" /> that matches numeric values within a
+ /// specified range. To use this, you must first index the
+ /// numeric values using <see cref="NumericField" /> (expert: <see cref="NumericTokenStream" />
+ ///). If your terms are instead textual,
+ /// you should use <see cref="TermRangeQuery" />. <see cref="NumericRangeFilter{T}" />
+ /// is the filter equivalent of this
+ /// query.<p/>
+ ///
+ /// <p/>You create a new NumericRangeQuery with the static
+ /// factory methods, eg:
+ ///
+ /// <code>
+ /// Query q = NumericRangeQuery.newFloatRange("weight",
+ /// new Float(0.3f), new Float(0.10f),
+ /// true, true);
+ /// </code>
+ ///
+ /// matches all documents whose float valued "weight" field
+ /// ranges from 0.3 to 0.10, inclusive.
+ ///
+ /// <p/>The performance of NumericRangeQuery is much better
+ /// than the corresponding <see cref="TermRangeQuery" /> because the
+ /// number of terms that must be searched is usually far
+ /// fewer, thanks to trie indexing, described below.<p/>
+ ///
+ /// <p/>You can optionally specify a <a
+ /// href="#precisionStepDesc"><c>precisionStep</c></a>
+ /// when creating this query. This is necessary if you've
+ /// changed this configuration from its default (4) during
+ /// indexing. Lower values consume more disk space but speed
+ /// up searching. Suitable values are between <b>1</b> and
+ /// <b>8</b>. A good starting point to test is <b>4</b>,
+ /// which is the default value for all <c>Numeric*</c>
+ /// classes. See <a href="#precisionStepDesc">below</a> for
+ /// details.
+ ///
+ /// <p/>This query defaults to
+ /// <see cref="MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT"/> for
+ /// 32 bit (int/float) ranges with precisionStep &lt;8 and 64
+ /// bit (long/double) ranges with precisionStep &lt;6.
+ /// Otherwise it uses
+ /// <see cref="MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE"/> as the
+ /// number of terms is likely to be high. With precision
+ /// steps of &lt;4, this query can be run with one of the
+ /// BooleanQuery rewrite methods without changing
+ /// BooleanQuery's default max clause count.
+ ///
+ /// <p/><font color="red"><b>NOTE:</b> This API is experimental and
+ /// might change in incompatible ways in the next release.</font>
+ ///
+ /// <br/><h3>How it works</h3>
+ ///
+ /// <p/>See the publication about <a target="_blank" href="http://www.panfmp.org">panFMP</a>,
+ /// where this algorithm was described (referred to as <c>TrieRangeQuery</c>):
+ ///
+ /// <blockquote><strong>Schindler, U, Diepenbroek, M</strong>, 2008.
+ /// <em>Generic XML-based Framework for Metadata Portals.</em>
+ /// Computers &amp; Geosciences 34 (12), 1947-1955.
+ /// <a href="http://dx.doi.org/10.1016/j.cageo.2008.02.023"
+ /// target="_blank">doi:10.1016/j.cageo.2008.02.023</a></blockquote>
+ ///
+ /// <p/><em>A quote from this paper:</em> Because Apache Lucene is a full-text
+ /// search engine and not a conventional database, it cannot handle numerical ranges
+ /// (e.g., field value is inside user defined bounds, even dates are numerical values).
+ /// We have developed an extension to Apache Lucene that stores
+ /// the numerical values in a special string-encoded format with variable precision
+ /// (all numerical values like doubles, longs, floats, and ints are converted to
+ /// lexicographic sortable string representations and stored with different precisions
+ /// (for a more detailed description of how the values are stored,
+ /// see <see cref="NumericUtils" />). A range is then divided recursively into multiple intervals for searching:
+ /// The center of the range is searched only with the lowest possible precision in the <em>trie</em>,
+ /// while the boundaries are matched more exactly. This reduces the number of terms dramatically.<p/>
+ ///
+ /// <p/>For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that
+ /// uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the
+ /// lowest precision. Overall, a range could consist of a theoretical maximum of
+ /// <c>7*255*2 + 255 = 3825</c> distinct terms (when there is a term for every distinct value of an
+ /// 8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used
+ /// because it would always be possible to reduce the full 256 values to one term with degraded precision).
+ /// In practice, we have seen up to 300 terms in most cases (index with 500,000 metadata records
+ /// and a uniform value distribution).<p/>
+ ///
+ /// <a name="precisionStepDesc"/><h3>Precision Step</h3>
+ /// <p/>You can choose any <c>precisionStep</c> when encoding values.
+ /// Lower step values mean more precisions and so more terms in index (and index gets larger).
+ /// On the other hand, the maximum number of terms to match reduces, which optimized query speed.
+ /// The formula to calculate the maximum term count is:
+ /// <code>
+ /// n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
+ /// </code>
+ /// <p/><em>(this formula is only correct, when <c>bitsPerValue/precisionStep</c> is an integer;
+ /// in other cases, the value must be rounded up and the last summand must contain the modulo of the division as
+ /// precision step)</em>.
+ /// For longs stored using a precision step of 4, <c>n = 15*15*2 + 15 = 465</c>, and for a precision
+ /// step of 2, <c>n = 31*3*2 + 3 = 189</c>. But the faster search speed is reduced by more seeking
+ /// in the term enum of the index. Because of this, the ideal <c>precisionStep</c> value can only
+ /// be found out by testing. <b>Important:</b> You can index with a lower precision step value and test search speed
+ /// using a multiple of the original step value.<p/>
+ ///
+ /// <p/>Good values for <c>precisionStep</c> are depending on usage and data type:
+ /// <list type="bullet">
+ /// <item>The default for all data types is <b>4</b>, which is used, when no <c>precisionStep</c> is given.</item>
+ /// <item>Ideal value in most cases for <em>64 bit</em> data types <em>(long, double)</em> is <b>6</b> or <b>8</b>.</item>
+ /// <item>Ideal value in most cases for <em>32 bit</em> data types <em>(int, float)</em> is <b>4</b>.</item>
+ /// <item>Steps <b>&gt;64</b> for <em>long/double</em> and <b>&gt;32</b> for <em>int/float</em> produces one token
+ /// per value in the index and querying is as slow as a conventional <see cref="TermRangeQuery" />. But it can be used
+ /// to produce fields, that are solely used for sorting (in this case simply use <see cref="int.MaxValue" /> as
+ /// <c>precisionStep</c>). Using <see cref="NumericField">NumericFields</see> for sorting
+ /// is ideal, because building the field cache is much faster than with text-only numbers.
+ /// Sorting is also possible with range query optimized fields using one of the above <c>precisionSteps</c>.</item>
+ /// </list>
+ ///
+ /// <p/>Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed
+ /// that <see cref="TermRangeQuery" /> in boolean rewrite mode (with raised <see cref="BooleanQuery" /> clause count)
+ /// took about 30-40 secs to complete, <see cref="TermRangeQuery" /> in constant score filter rewrite mode took 5 secs
+ /// and executing this class took &lt;100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit
+ /// precision step). This query type was developed for a geographic portal, where the performance for
+ /// e.g. bounding boxes or exact date/time stamps is important.<p/>
+ ///
+ /// </summary>
+ /// <since> 2.9
+ ///
+ /// </since>
+ [Serializable]
+ public sealed class NumericRangeQuery<T> : MultiTermQuery
+ where T : struct, IComparable<T> // best equiv constraint for java's number class
+ {
+ internal NumericRangeQuery(System.String field, int precisionStep, int valSize, T? min, T? max, bool minInclusive, bool maxInclusive)
+ {
+ System.Diagnostics.Debug.Assert((valSize == 32 || valSize == 64));
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ this.field = StringHelper.Intern(field);
+ this.precisionStep = precisionStep;
+ this.valSize = valSize;
+ this.min = min;
+ this.max = max;
+ this.minInclusive = minInclusive;
+ this.maxInclusive = maxInclusive;
+
+ // For bigger precisionSteps this query likely
+ // hits too many terms, so set to CONSTANT_SCORE_FILTER right off
+ // (especially as the FilteredTermEnum is costly if wasted only for AUTO tests because it
+ // creates new enums from IndexReader for each sub-range)
+ switch (valSize)
+ {
+
+ case 64:
+ RewriteMethod = (precisionStep > 6)?CONSTANT_SCORE_FILTER_REWRITE:CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
+ break;
+
+ case 32:
+ RewriteMethod = (precisionStep > 8)?CONSTANT_SCORE_FILTER_REWRITE:CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
+ break;
+
+ default:
+ // should never happen
+ throw new System.ArgumentException("valSize must be 32 or 64");
+
+ }
+
+ // shortcut if upper bound == lower bound
+ if (min != null && min.Equals(max))
+ {
+ RewriteMethod = CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
+ }
+ }
+
+ //@Override
+ protected internal override FilteredTermEnum GetEnum(IndexReader reader)
+ {
+ return new NumericRangeTermEnum(this, reader);
+ }
+
+ /// <summary>Returns the field name for this query </summary>
+ public string Field
+ {
+ get { return field; }
+ }
+
+ /// <summary>Returns <c>true</c> if the lower endpoint is inclusive </summary>
+ public bool IncludesMin
+ {
+ get { return minInclusive; }
+ }
+
+ /// <summary>Returns <c>true</c> if the upper endpoint is inclusive </summary>
+ public bool IncludesMax
+ {
+ get { return maxInclusive; }
+ }
+
+ /// <summary>Returns the lower value of this range query </summary>
+ public T? Min
+ {
+ get { return min; }
+ }
+
+ /// <summary>Returns the upper value of this range query </summary>
+ public T? Max
+ {
+ get { return max; }
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder();
+ if (!this.field.Equals(field))
+ sb.Append(this.field).Append(':');
+ return sb.Append(minInclusive ? '[' : '{').Append((min == null) ? "*" : min.ToString()).Append(" TO ").Append((max == null) ? "*" : max.ToString()).Append(maxInclusive ? ']' : '}').Append(ToStringUtils.Boost(Boost)).ToString();
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (o == this)
+ return true;
+ if (!base.Equals(o))
+ return false;
+ if (o is NumericRangeQuery<T>)
+ {
+ NumericRangeQuery<T> q = (NumericRangeQuery<T>)o;
+ return ((System.Object)field == (System.Object)q.field && (q.min == null ? min == null : q.min.Equals(min)) && (q.max == null ? max == null : q.max.Equals(max)) && minInclusive == q.minInclusive && maxInclusive == q.maxInclusive && precisionStep == q.precisionStep);
+ }
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ int hash = base.GetHashCode();
+ hash += (field.GetHashCode() ^ 0x4565fd66 + precisionStep ^ 0x64365465);
+ if (min != null)
+ hash += (min.GetHashCode() ^ 0x14fa55fb);
+ if (max != null)
+ hash += (max.GetHashCode() ^ 0x733fa5fe);
+ return hash + (minInclusive.GetHashCode() ^ 0x14fa55fb) + (maxInclusive.GetHashCode() ^ 0x733fa5fe);
+ }
+
+ // field must be interned after reading from stream
+ //private void ReadObject(java.io.ObjectInputStream in)
+ //{
+ // in.defaultReadObject();
+ // field = StringHelper.intern(field);
+ //}
+
+
+ [System.Runtime.Serialization.OnDeserialized]
+ internal void OnDeserialized(System.Runtime.Serialization.StreamingContext context)
+ {
+ field = StringHelper.Intern(field);
+ }
+
+ // members (package private, to be also fast accessible by NumericRangeTermEnum)
+ internal System.String field;
+ internal int precisionStep;
+ internal int valSize;
+ internal T? min;
+ internal T? max;
+ internal bool minInclusive;
+ internal bool maxInclusive;
+
+ /// <summary> Subclass of FilteredTermEnum for enumerating all terms that match the
+ /// sub-ranges for trie range queries.
+ /// <p/>
+ /// WARNING: This term enumeration is not guaranteed to be always ordered by
+ /// <see cref="Term.CompareTo(Term)" />.
+ /// The ordering depends on how <see cref="NumericUtils.SplitLongRange" /> and
+ /// <see cref="NumericUtils.SplitIntRange" /> generates the sub-ranges. For
+ /// <see cref="MultiTermQuery" /> ordering is not relevant.
+ /// </summary>
+ private sealed class NumericRangeTermEnum:FilteredTermEnum
+ {
+ private class AnonymousClassLongRangeBuilder:NumericUtils.LongRangeBuilder
+ {
+ public AnonymousClassLongRangeBuilder(NumericRangeTermEnum enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(NumericRangeTermEnum enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private NumericRangeTermEnum enclosingInstance;
+ public NumericRangeTermEnum Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ //@Override
+ public override void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded)
+ {
+ Enclosing_Instance.rangeBounds.AddLast(minPrefixCoded);
+ Enclosing_Instance.rangeBounds.AddLast(maxPrefixCoded);
+ }
+ }
+ private class AnonymousClassIntRangeBuilder:NumericUtils.IntRangeBuilder
+ {
+ public AnonymousClassIntRangeBuilder(NumericRangeTermEnum enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(NumericRangeTermEnum enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private NumericRangeTermEnum enclosingInstance;
+ public NumericRangeTermEnum Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ //@Override
+ public override void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded)
+ {
+ Enclosing_Instance.rangeBounds.AddLast(minPrefixCoded);
+ Enclosing_Instance.rangeBounds.AddLast(maxPrefixCoded);
+ }
+ }
+ private void InitBlock(NumericRangeQuery<T> enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ termTemplate = new Term(Enclosing_Instance.field);
+ }
+ private NumericRangeQuery<T> enclosingInstance;
+ public NumericRangeQuery<T> Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ private IndexReader reader;
+ private LinkedList<string> rangeBounds = new LinkedList<string>();
+ private Term termTemplate;
+ private System.String currentUpperBound = null;
+
+ private bool isDisposed;
+
+ internal NumericRangeTermEnum(NumericRangeQuery<T> enclosingInstance, IndexReader reader)
+ {
+ InitBlock(enclosingInstance);
+ this.reader = reader;
+
+ Type rangeType = Nullable.GetUnderlyingType(typeof(T?));
+ switch (Enclosing_Instance.valSize)
+ {
+ case 64: {
+ // lower
+ long minBound = System.Int64.MinValue;
+ if (rangeType == typeof(System.Int64))
+ {
+ // added in these checks to emulate java. passing null give it no type (in old code),
+ // but .net can identifies it with generics and sets the bounds to 0, causing tests to fail
+ if (Enclosing_Instance.min != null)
+ minBound = System.Convert.ToInt64(Enclosing_Instance.min);
+ }
+ else if (rangeType == typeof(System.Double))
+ {
+ if (Enclosing_Instance.min != null)
+ minBound = NumericUtils.DoubleToSortableLong(System.Convert.ToDouble(Enclosing_Instance.min));
+ }
+ if (!Enclosing_Instance.minInclusive && Enclosing_Instance.min != null)
+ {
+ if (minBound == System.Int64.MaxValue)
+ break;
+ minBound++;
+ }
+
+ // upper
+ long maxBound = System.Int64.MaxValue;
+ if (rangeType == typeof(System.Int64))
+ {
+ if (Enclosing_Instance.max != null)
+ maxBound = System.Convert.ToInt64(Enclosing_Instance.max);
+ }
+ else if (rangeType == typeof(System.Double))
+ {
+ if (Enclosing_Instance.max != null)
+ maxBound = NumericUtils.DoubleToSortableLong(System.Convert.ToDouble(Enclosing_Instance.max));
+ }
+ if (!Enclosing_Instance.maxInclusive && Enclosing_Instance.max != null)
+ {
+ if (maxBound == System.Int64.MinValue)
+ break;
+ maxBound--;
+ }
+
+ NumericUtils.SplitLongRange(new AnonymousClassLongRangeBuilder(this), Enclosing_Instance.precisionStep, minBound, maxBound);
+ break;
+ }
+
+
+ case 32: {
+ // lower
+ int minBound = System.Int32.MinValue;
+ if (rangeType == typeof(System.Int32))
+ {
+ if (Enclosing_Instance.min != null)
+ minBound = System.Convert.ToInt32(Enclosing_Instance.min);
+ }
+ else if (rangeType == typeof(System.Single))
+ {
+ if (Enclosing_Instance.min != null)
+ minBound = NumericUtils.FloatToSortableInt(System.Convert.ToSingle(Enclosing_Instance.min));
+ }
+ if (!Enclosing_Instance.minInclusive && Enclosing_Instance.min != null)
+ {
+ if (minBound == System.Int32.MaxValue)
+ break;
+ minBound++;
+ }
+
+ // upper
+ int maxBound = System.Int32.MaxValue;
+ if (rangeType == typeof(System.Int32))
+ {
+ if (Enclosing_Instance.max != null)
+ maxBound = System.Convert.ToInt32(Enclosing_Instance.max);
+ }
+ else if (rangeType == typeof(System.Single))
+ {
+ if (Enclosing_Instance.max != null)
+ maxBound = NumericUtils.FloatToSortableInt(System.Convert.ToSingle(Enclosing_Instance.max));
+ }
+ if (!Enclosing_Instance.maxInclusive && Enclosing_Instance.max != null)
+ {
+ if (maxBound == System.Int32.MinValue)
+ break;
+ maxBound--;
+ }
+
+ NumericUtils.SplitIntRange(new AnonymousClassIntRangeBuilder(this), Enclosing_Instance.precisionStep, minBound, maxBound);
+ break;
+ }
+
+
+ default:
+ // should never happen
+ throw new System.ArgumentException("valSize must be 32 or 64");
+
+ }
+
+ // seek to first term
+ Next();
+ }
+
+ //@Override
+ public override float Difference()
+ {
+ return 1.0f;
+ }
+
+ /// <summary>this is a dummy, it is not used by this class. </summary>
+ //@Override
+ public override bool EndEnum()
+ {
+ throw new NotSupportedException("not implemented");
+ }
+
+ /// <summary>this is a dummy, it is not used by this class. </summary>
+ protected internal override void SetEnum(TermEnum tenum)
+ {
+ throw new NotSupportedException("not implemented");
+ }
+
+ /// <summary> Compares if current upper bound is reached,
+ /// this also updates the term count for statistics.
+ /// In contrast to <see cref="FilteredTermEnum" />, a return value
+ /// of <c>false</c> ends iterating the current enum
+ /// and forwards to the next sub-range.
+ /// </summary>
+ //@Override
+ protected internal override bool TermCompare(Term term)
+ {
+ return (term.Field == Enclosing_Instance.field && String.CompareOrdinal(term.Text, currentUpperBound) <= 0);
+ }
+
+ /// <summary>Increments the enumeration to the next element. True if one exists. </summary>
+ //@Override
+ public override bool Next()
+ {
+ // if a current term exists, the actual enum is initialized:
+ // try change to next term, if no such term exists, fall-through
+ if (currentTerm != null)
+ {
+ System.Diagnostics.Debug.Assert(actualEnum != null);
+ if (actualEnum.Next())
+ {
+ currentTerm = actualEnum.Term;
+ if (TermCompare(currentTerm))
+ return true;
+ }
+ }
+ // if all above fails, we go forward to the next enum,
+ // if one is available
+ currentTerm = null;
+ while (rangeBounds.Count >= 2)
+ {
+ // close the current enum and read next bounds
+ if (actualEnum != null)
+ {
+ actualEnum.Close();
+ actualEnum = null;
+ }
+ string lowerBound = rangeBounds.First.Value;
+ rangeBounds.RemoveFirst();
+ this.currentUpperBound = rangeBounds.First.Value;
+ rangeBounds.RemoveFirst();
+ // create a new enum
+ actualEnum = reader.Terms(termTemplate.CreateTerm(lowerBound));
+ currentTerm = actualEnum.Term;
+ if (currentTerm != null && TermCompare(currentTerm))
+ return true;
+ // clear the current term for next iteration
+ currentTerm = null;
+ }
+
+ // no more sub-range enums available
+ System.Diagnostics.Debug.Assert(rangeBounds.Count == 0 && currentTerm == null);
+ return false;
+ }
+
+ /// <summary>Closes the enumeration to further activity, freeing resources. </summary>
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ rangeBounds.Clear();
+ currentUpperBound = null;
+
+ isDisposed = true;
+ base.Dispose(disposing);
+ }
+ }
+ }
+
+ public static class NumericRangeQuery
+ {
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>long</c>
+ /// range using the given <a href="#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<long> NewLongRange(System.String field, int precisionStep, long? min, long? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<long>(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>long</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<long> NewLongRange(System.String field, long? min, long? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<long>(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive);
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>int</c>
+ /// range using the given <a href="#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<int> NewIntRange(System.String field, int precisionStep, int? min, int? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<int>(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>int</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<int> NewIntRange(System.String field, int? min, int? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<int>(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>double</c>
+ /// range using the given <a href="#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<double> NewDoubleRange(System.String field, int precisionStep, double? min, double? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<double>(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>double</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<double> NewDoubleRange(System.String field, double? min, double? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<double>(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive);
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>float</c>
+ /// range using the given <a href="#precisionStepDesc"><c>precisionStep</c></a>.
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<float> NewFloatRange(System.String field, int precisionStep, float? min, float? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<float>(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
+ }
+
+ /// <summary> Factory that creates a <c>NumericRangeQuery</c>, that queries a <c>float</c>
+ /// range using the default <c>precisionStep</c> <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4).
+ /// You can have half-open ranges (which are in fact &lt;/&#8804; or &gt;/&#8805; queries)
+ /// by setting the min or max value to <c>null</c>. By setting inclusive to false, it will
+ /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ /// </summary>
+ public static NumericRangeQuery<float> NewFloatRange(System.String field, float? min, float? max, bool minInclusive, bool maxInclusive)
+ {
+ return new NumericRangeQuery<float>(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ParallelMultiSearcher.cs b/src/core/Search/ParallelMultiSearcher.cs
new file mode 100644
index 0000000..def231a
--- /dev/null
+++ b/src/core/Search/ParallelMultiSearcher.cs
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if !NET35
+
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using System.Linq;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+
+namespace Lucene.Net.Search
+{
+ /// <summary>Implements parallel search over a set of <c>Searchables</c>.
+ ///
+ /// <p/>Applications usually need only call the inherited <see cref="Searcher.Search(Query, int)" />
+ /// or <see cref="Searcher.Search(Query,Filter,int)" /> methods.
+ /// </summary>
+ public class ParallelMultiSearcher : MultiSearcher/*, IDisposable*/ //No need to implement IDisposable like java, nothing to dispose with the TPL
+ {
+ private class AnonymousClassCollector1:Collector
+ {
+ public AnonymousClassCollector1(Lucene.Net.Search.Collector collector, int start, ParallelMultiSearcher enclosingInstance)
+ {
+ InitBlock(collector, start, enclosingInstance);
+ }
+ private void InitBlock(Lucene.Net.Search.Collector collector, int start, ParallelMultiSearcher enclosingInstance)
+ {
+ this.collector = collector;
+ this.start = start;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Search.Collector collector;
+ private int start;
+ private ParallelMultiSearcher enclosingInstance;
+ public ParallelMultiSearcher Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public override void SetScorer(Scorer scorer)
+ {
+ collector.SetScorer(scorer);
+ }
+ public override void Collect(int doc)
+ {
+ collector.Collect(doc);
+ }
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ collector.SetNextReader(reader, start + docBase);
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return collector.AcceptsDocsOutOfOrder; }
+ }
+ }
+
+ private Searchable[] searchables;
+ private int[] starts;
+
+ /// <summary>Creates a <see cref="Searchable"/> which searches <i>searchables</i>. </summary>
+ public ParallelMultiSearcher(params Searchable[] searchables)
+ : base(searchables)
+ {
+ this.searchables = searchables;
+ this.starts = GetStarts();
+ }
+
+ /// <summary>
+ /// Executes each <see cref="Searchable"/>'s docFreq() in its own thread and
+ /// waits for each search to complete and merge the results back together.
+ /// </summary>
+ public override int DocFreq(Term term)
+ {
+ Task<int>[] tasks = new Task<int>[searchables.Length];
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ Searchable searchable = searchables[i];
+ tasks[i] = Task.Factory.StartNew(() => searchable.DocFreq(term));
+ }
+
+ Task.WaitAll(tasks);
+ return tasks.Sum(task => task.Result);
+ }
+
+ /// <summary> A search implementation which executes each
+ /// <see cref="Searchable"/> in its own thread and waits for each search to complete
+ /// and merge the results back together.
+ /// </summary>
+ public override TopDocs Search(Weight weight, Filter filter, int nDocs)
+ {
+ HitQueue hq = new HitQueue(nDocs, false);
+ object lockObj = new object();
+
+ Task<TopDocs>[] tasks = new Task<TopDocs>[searchables.Length];
+ //search each searchable
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ int cur = i;
+ tasks[i] =
+ Task.Factory.StartNew(() => MultiSearcherCallableNoSort(ThreadLock.MonitorLock, lockObj, searchables[cur], weight, filter,
+ nDocs, hq, cur, starts));
+ }
+
+ int totalHits = 0;
+ float maxScore = float.NegativeInfinity;
+
+
+ Task.WaitAll(tasks);
+ foreach(TopDocs topDocs in tasks.Select(x => x.Result))
+ {
+ totalHits += topDocs.TotalHits;
+ maxScore = Math.Max(maxScore, topDocs.MaxScore);
+ }
+
+ ScoreDoc[] scoreDocs = new ScoreDoc[hq.Size()];
+ for (int i = hq.Size() - 1; i >= 0; i--) // put docs in array
+ scoreDocs[i] = hq.Pop();
+
+ return new TopDocs(totalHits, scoreDocs, maxScore);
+ }
+
+ /// <summary> A search implementation allowing sorting which spans a new thread for each
+ /// Searchable, waits for each search to complete and merges
+ /// the results back together.
+ /// </summary>
+ public override TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort)
+ {
+ if (sort == null) throw new ArgumentNullException("sort");
+
+ FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(nDocs);
+ object lockObj = new object();
+
+ Task<TopFieldDocs>[] tasks = new Task<TopFieldDocs>[searchables.Length];
+ for (int i = 0; i < searchables.Length; i++) // search each searchable
+ {
+ int cur = i;
+ tasks[i] =
+ Task<TopFieldDocs>.Factory.StartNew(
+ () => MultiSearcherCallableWithSort(ThreadLock.MonitorLock, lockObj, searchables[cur], weight, filter, nDocs, hq, sort, cur,
+ starts));
+ }
+
+ int totalHits = 0;
+ float maxScore = float.NegativeInfinity;
+
+ Task.WaitAll(tasks);
+ foreach (TopFieldDocs topFieldDocs in tasks.Select(x => x.Result))
+ {
+ totalHits += topFieldDocs.TotalHits;
+ maxScore = Math.Max(maxScore, topFieldDocs.MaxScore);
+ }
+
+ ScoreDoc[] scoreDocs = new ScoreDoc[hq.Size()];
+ for (int i = hq.Size() - 1; i >= 0; i--)
+ scoreDocs[i] = hq.Pop();
+
+ return new TopFieldDocs(totalHits, scoreDocs, hq.GetFields(), maxScore);
+ }
+
+ /// <summary>Lower-level search API.
+ ///
+ /// <p/><see cref="Collector.Collect(int)" /> is called for every matching document.
+ ///
+ /// <p/>Applications should only use this if they need <i>all</i> of the
+ /// matching documents. The high-level search API (<see cref="Searcher.Search(Query, int)" />)
+ /// is usually more efficient, as it skips
+ /// non-high-scoring hits.
+ /// <p/>This method cannot be parallelized, because <see cref="Collector"/>
+ /// supports no concurrent access.
+ /// </summary>
+ /// <param name="weight">to match documents
+ /// </param>
+ /// <param name="filter">if non-null, a bitset used to eliminate some documents
+ /// </param>
+ /// <param name="collector">to receive hits
+ ///
+ /// TODO: parallelize this one too
+ /// </param>
+ public override void Search(Weight weight, Filter filter, Collector collector)
+ {
+ for (int i = 0; i < searchables.Length; i++)
+ {
+
+ int start = starts[i];
+
+ Collector hc = new AnonymousClassCollector1(collector, start, this);
+
+ searchables[i].Search(weight, filter, hc);
+ }
+ }
+ }
+}
+
+#endif \ No newline at end of file
diff --git a/src/core/Search/Payloads/AveragePayloadFunction.cs b/src/core/Search/Payloads/AveragePayloadFunction.cs
new file mode 100644
index 0000000..b262867
--- /dev/null
+++ b/src/core/Search/Payloads/AveragePayloadFunction.cs
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search.Payloads
+{
+
+
+ /// <summary> Calculate the final score as the average score of all payloads seen.
+ /// <p/>
+ /// Is thread safe and completely reusable.
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public class AveragePayloadFunction:PayloadFunction
+ {
+
+ public override float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore)
+ {
+ return currentPayloadScore + currentScore;
+ }
+
+ public override float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore)
+ {
+ return numPayloadsSeen > 0?(payloadScore / numPayloadsSeen):1;
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = 1;
+ result = prime * result + this.GetType().GetHashCode();
+ return result;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Payloads/MaxPayloadFunction.cs b/src/core/Search/Payloads/MaxPayloadFunction.cs
new file mode 100644
index 0000000..3c02a80
--- /dev/null
+++ b/src/core/Search/Payloads/MaxPayloadFunction.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search.Payloads
+{
+
+
+ /// <summary> Returns the maximum payload score seen, else 1 if there are no payloads on the doc.
+ /// <p/>
+ /// Is thread safe and completely reusable.
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public class MaxPayloadFunction:PayloadFunction
+ {
+ public override float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore)
+ {
+ if (numPayloadsSeen == 0)
+ {
+ return currentPayloadScore;
+ }
+ else
+ {
+ return System.Math.Max(currentPayloadScore, currentScore);
+ }
+ }
+
+ public override float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore)
+ {
+ return numPayloadsSeen > 0?payloadScore:1;
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = 1;
+ result = prime * result + this.GetType().GetHashCode();
+ return result;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Payloads/MinPayloadFunction.cs b/src/core/Search/Payloads/MinPayloadFunction.cs
new file mode 100644
index 0000000..0dfa82d
--- /dev/null
+++ b/src/core/Search/Payloads/MinPayloadFunction.cs
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search.Payloads
+{
+
+ /// <summary> Calculates the minimum payload seen
+ ///
+ ///
+ /// </summary>
+ [Serializable]
+ public class MinPayloadFunction:PayloadFunction
+ {
+
+ public override float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore)
+ {
+ if (numPayloadsSeen == 0)
+ {
+ return currentPayloadScore;
+ }
+ else
+ {
+ return System.Math.Min(currentPayloadScore, currentScore);
+ }
+ }
+
+ public override float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore)
+ {
+ return numPayloadsSeen > 0?payloadScore:1;
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = 1;
+ result = prime * result + this.GetType().GetHashCode();
+ return result;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Payloads/PayloadFunction.cs b/src/core/Search/Payloads/PayloadFunction.cs
new file mode 100644
index 0000000..c4a522e
--- /dev/null
+++ b/src/core/Search/Payloads/PayloadFunction.cs
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search.Payloads
+{
+
+
+ /// <summary> An abstract class that defines a way for Payload*Query instances
+ /// to transform the cumulative effects of payload scores for a document.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Search.Payloads.PayloadTermQuery"> for more information
+ ///
+ /// <p/>
+ /// This class and its derivations are experimental and subject to change
+ ///
+ ///
+ /// </seealso>
+ [Serializable]
+ public abstract class PayloadFunction
+ {
+
+ /// <summary> Calculate the score up to this point for this doc and field</summary>
+ /// <param name="docId">The current doc
+ /// </param>
+ /// <param name="field">The field
+ /// </param>
+ /// <param name="start">The start position of the matching Span
+ /// </param>
+ /// <param name="end">The end position of the matching Span
+ /// </param>
+ /// <param name="numPayloadsSeen">The number of payloads seen so far
+ /// </param>
+ /// <param name="currentScore">The current score so far
+ /// </param>
+ /// <param name="currentPayloadScore">The score for the current payload
+ /// </param>
+ /// <returns> The new current Score
+ ///
+ /// </returns>
+ /// <seealso cref="Lucene.Net.Search.Spans.Spans">
+ /// </seealso>
+ public abstract float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore);
+
+ /// <summary> Calculate the final score for all the payloads seen so far for this doc/field</summary>
+ /// <param name="docId">The current doc
+ /// </param>
+ /// <param name="field">The current field
+ /// </param>
+ /// <param name="numPayloadsSeen">The total number of payloads seen on this document
+ /// </param>
+ /// <param name="payloadScore">The raw score for those payloads
+ /// </param>
+ /// <returns> The final score for the payloads
+ /// </returns>
+ public abstract float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore);
+
+ abstract public override int GetHashCode();
+
+ abstract public override bool Equals(System.Object o);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Payloads/PayloadNearQuery.cs b/src/core/Search/Payloads/PayloadNearQuery.cs
new file mode 100644
index 0000000..6b99f59
--- /dev/null
+++ b/src/core/Search/Payloads/PayloadNearQuery.cs
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Explanation = Lucene.Net.Search.Explanation;
+using Scorer = Lucene.Net.Search.Scorer;
+using Searcher = Lucene.Net.Search.Searcher;
+using Similarity = Lucene.Net.Search.Similarity;
+using Weight = Lucene.Net.Search.Weight;
+using NearSpansOrdered = Lucene.Net.Search.Spans.NearSpansOrdered;
+using NearSpansUnordered = Lucene.Net.Search.Spans.NearSpansUnordered;
+using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
+using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
+using SpanScorer = Lucene.Net.Search.Spans.SpanScorer;
+using SpanWeight = Lucene.Net.Search.Spans.SpanWeight;
+
+namespace Lucene.Net.Search.Payloads
+{
+
+ /// <summary> This class is very similar to
+ /// <see cref="Lucene.Net.Search.Spans.SpanNearQuery" /> except that it factors
+ /// in the value of the payloads located at each of the positions where the
+ /// <see cref="Lucene.Net.Search.Spans.TermSpans" /> occurs.
+ /// <p/>
+ /// In order to take advantage of this, you must override
+ /// <see cref="Lucene.Net.Search.Similarity.ScorePayload" />
+ /// which returns 1 by default.
+ /// <p/>
+ /// Payload scores are aggregated using a pluggable <see cref="PayloadFunction" />.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Search.Similarity.ScorePayload">
+ /// </seealso>
+ [Serializable]
+ public class PayloadNearQuery:SpanNearQuery, System.ICloneable
+ {
+ protected internal System.String fieldName;
+ protected internal PayloadFunction function;
+
+ public PayloadNearQuery(SpanQuery[] clauses, int slop, bool inOrder):this(clauses, slop, inOrder, new AveragePayloadFunction())
+ {
+ }
+
+ public PayloadNearQuery(SpanQuery[] clauses, int slop, bool inOrder, PayloadFunction function):base(clauses, slop, inOrder)
+ {
+ fieldName = clauses[0].Field; // all clauses must have same field
+ this.function = function;
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new PayloadNearSpanWeight(this, this, searcher);
+ }
+
+ public override System.Object Clone()
+ {
+ int sz = clauses.Count;
+ SpanQuery[] newClauses = new SpanQuery[sz];
+
+ for (int i = 0; i < sz; i++)
+ {
+ newClauses[i] = clauses[i];
+ }
+ PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, internalSlop, inOrder);
+ boostingNearQuery.Boost = Boost;
+ return boostingNearQuery;
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("payloadNear([");
+ var i = clauses.GetEnumerator();
+ while (i.MoveNext())
+ {
+ SpanQuery clause = i.Current;
+ buffer.Append(clause.ToString(field));
+ if (i.MoveNext())
+ {
+ buffer.Append(", ");
+ }
+ }
+ buffer.Append("], ");
+ buffer.Append(internalSlop);
+ buffer.Append(", ");
+ buffer.Append(inOrder);
+ buffer.Append(")");
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ // @Override
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((fieldName == null)?0:fieldName.GetHashCode());
+ result = prime * result + ((function == null)?0:function.GetHashCode());
+ return result;
+ }
+
+ // @Override
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!base.Equals(obj))
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ PayloadNearQuery other = (PayloadNearQuery) obj;
+ if (fieldName == null)
+ {
+ if (other.fieldName != null)
+ return false;
+ }
+ else if (!fieldName.Equals(other.fieldName))
+ return false;
+ if (function == null)
+ {
+ if (other.function != null)
+ return false;
+ }
+ else if (!function.Equals(other.function))
+ return false;
+ return true;
+ }
+
+ [Serializable]
+ public class PayloadNearSpanWeight:SpanWeight
+ {
+ private void InitBlock(PayloadNearQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private PayloadNearQuery enclosingInstance;
+ public PayloadNearQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public PayloadNearSpanWeight(PayloadNearQuery enclosingInstance, SpanQuery query, Searcher searcher):base(query, searcher)
+ {
+ InitBlock(enclosingInstance);
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ return new PayloadNearSpanScorer(enclosingInstance, internalQuery.GetSpans(reader), this, similarity, reader.Norms(internalQuery.Field));
+ }
+ }
+
+ public class PayloadNearSpanScorer:SpanScorer
+ {
+ private void InitBlock(PayloadNearQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ similarity = Similarity;
+ }
+ private PayloadNearQuery enclosingInstance;
+ public PayloadNearQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ new internal Lucene.Net.Search.Spans.Spans spans;
+
+ protected internal float payloadScore;
+ private int payloadsSeen;
+ internal Similarity similarity;
+
+ protected internal PayloadNearSpanScorer(PayloadNearQuery enclosingInstance, Lucene.Net.Search.Spans.Spans spans, Weight weight, Similarity similarity, byte[] norms):base(spans, weight, similarity, norms)
+ {
+ InitBlock(enclosingInstance);
+ this.spans = spans;
+ }
+
+ // Get the payloads associated with all underlying subspans
+ public virtual void GetPayloads(Lucene.Net.Search.Spans.Spans[] subSpans)
+ {
+ for (int i = 0; i < subSpans.Length; i++)
+ {
+ if (subSpans[i] is NearSpansOrdered)
+ {
+ if (((NearSpansOrdered) subSpans[i]).IsPayloadAvailable())
+ {
+ ProcessPayloads(((NearSpansOrdered) subSpans[i]).GetPayload(), subSpans[i].Start(), subSpans[i].End());
+ }
+ GetPayloads(((NearSpansOrdered) subSpans[i]).GetSubSpans());
+ }
+ else if (subSpans[i] is NearSpansUnordered)
+ {
+ if (((NearSpansUnordered) subSpans[i]).IsPayloadAvailable())
+ {
+ ProcessPayloads(((NearSpansUnordered) subSpans[i]).GetPayload(), subSpans[i].Start(), subSpans[i].End());
+ }
+ GetPayloads(((NearSpansUnordered) subSpans[i]).GetSubSpans());
+ }
+ }
+ }
+
+ /// <summary> By default, uses the <see cref="PayloadFunction" /> to score the payloads, but
+ /// can be overridden to do other things.
+ ///
+ /// </summary>
+ /// <param name="payLoads">The payloads
+ /// </param>
+ /// <param name="start">The start position of the span being scored
+ /// </param>
+ /// <param name="end">The end position of the span being scored
+ ///
+ /// </param>
+ /// <seealso cref="Spans">
+ /// </seealso>
+ protected internal virtual void ProcessPayloads(System.Collections.Generic.ICollection<byte[]> payLoads, int start, int end)
+ {
+ foreach (byte[] thePayload in payLoads)
+ {
+ payloadScore = Enclosing_Instance.function.CurrentScore(doc, Enclosing_Instance.fieldName, start, end, payloadsSeen, payloadScore, similarity.ScorePayload(doc, Enclosing_Instance.fieldName, spans.Start(), spans.End(), thePayload, 0, thePayload.Length));
+ ++payloadsSeen;
+ }
+ }
+
+ //
+ public /*protected internal*/ override bool SetFreqCurrentDoc()
+ {
+ if (!more)
+ {
+ return false;
+ }
+ Lucene.Net.Search.Spans.Spans[] spansArr = new Lucene.Net.Search.Spans.Spans[1];
+ spansArr[0] = spans;
+ payloadScore = 0;
+ payloadsSeen = 0;
+ GetPayloads(spansArr);
+ return base.SetFreqCurrentDoc();
+ }
+
+ public override float Score()
+ {
+
+ return base.Score() * Enclosing_Instance.function.DocScore(doc, Enclosing_Instance.fieldName, payloadsSeen, payloadScore);
+ }
+
+ protected internal override Explanation Explain(int doc)
+ {
+ Explanation result = new Explanation();
+ Explanation nonPayloadExpl = base.Explain(doc);
+ result.AddDetail(nonPayloadExpl);
+ Explanation payloadBoost = new Explanation();
+ result.AddDetail(payloadBoost);
+ float avgPayloadScore = (payloadsSeen > 0?(payloadScore / payloadsSeen):1);
+ payloadBoost.Value = avgPayloadScore;
+ payloadBoost.Description = "scorePayload(...)";
+ result.Value = nonPayloadExpl.Value * avgPayloadScore;
+ result.Description = "bnq, product of:";
+ return result;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Payloads/PayloadSpanUtil.cs b/src/core/Search/Payloads/PayloadSpanUtil.cs
new file mode 100644
index 0000000..f7cd2aa
--- /dev/null
+++ b/src/core/Search/Payloads/PayloadSpanUtil.cs
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using BooleanClause = Lucene.Net.Search.BooleanClause;
+using BooleanQuery = Lucene.Net.Search.BooleanQuery;
+using DisjunctionMaxQuery = Lucene.Net.Search.DisjunctionMaxQuery;
+using FilteredQuery = Lucene.Net.Search.FilteredQuery;
+using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery;
+using PhraseQuery = Lucene.Net.Search.PhraseQuery;
+using Query = Lucene.Net.Search.Query;
+using TermQuery = Lucene.Net.Search.TermQuery;
+using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
+using SpanOrQuery = Lucene.Net.Search.Spans.SpanOrQuery;
+using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
+using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
+
+namespace Lucene.Net.Search.Payloads
+{
+
+ /// <summary> Experimental class to get set of payloads for most standard Lucene queries.
+ /// Operates like Highlighter - IndexReader should only contain doc of interest,
+ /// best to use MemoryIndex.
+ ///
+ /// <p/>
+ /// <font color="#FF0000">
+ /// WARNING: The status of the <b>Payloads</b> feature is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font>
+ ///
+ /// </summary>
+ public class PayloadSpanUtil
+ {
+ private IndexReader reader;
+
+ /// <param name="reader">that contains doc with payloads to extract
+ /// </param>
+ public PayloadSpanUtil(IndexReader reader)
+ {
+ this.reader = reader;
+ }
+
+ /// <summary> Query should be rewritten for wild/fuzzy support.
+ ///
+ /// </summary>
+ /// <param name="query">
+ /// </param>
+ /// <returns> payloads Collection
+ /// </returns>
+ /// <throws> IOException </throws>
+ public virtual ICollection<byte[]> GetPayloadsForQuery(Query query)
+ {
+ ICollection<byte[]> payloads = new List<byte[]>();
+ QueryToSpanQuery(query, payloads);
+ return payloads;
+ }
+
+ private void QueryToSpanQuery(Query query, ICollection<byte[]> payloads)
+ {
+ if (query is BooleanQuery)
+ {
+ BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses();
+
+ for (int i = 0; i < queryClauses.Length; i++)
+ {
+ if (!queryClauses[i].IsProhibited)
+ {
+ QueryToSpanQuery(queryClauses[i].Query, payloads);
+ }
+ }
+ }
+ else if (query is PhraseQuery)
+ {
+ Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms();
+ SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
+ for (int i = 0; i < phraseQueryTerms.Length; i++)
+ {
+ clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+ }
+
+ int slop = ((PhraseQuery) query).Slop;
+ bool inorder = false;
+
+ if (slop == 0)
+ {
+ inorder = true;
+ }
+
+ SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
+ sp.Boost = query.Boost;
+ GetPayloads(payloads, sp);
+ }
+ else if (query is TermQuery)
+ {
+ SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).Term);
+ stq.Boost = query.Boost;
+ GetPayloads(payloads, stq);
+ }
+ else if (query is SpanQuery)
+ {
+ GetPayloads(payloads, (SpanQuery) query);
+ }
+ else if (query is FilteredQuery)
+ {
+ QueryToSpanQuery(((FilteredQuery) query).Query, payloads);
+ }
+ else if (query is DisjunctionMaxQuery)
+ {
+
+ for (IEnumerator<Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext(); )
+ {
+ QueryToSpanQuery(iterator.Current, payloads);
+ }
+ }
+ else if (query is MultiPhraseQuery)
+ {
+ MultiPhraseQuery mpq = (MultiPhraseQuery) query;
+ System.Collections.Generic.IList<Term[]> termArrays = mpq.GetTermArrays();
+ int[] positions = mpq.GetPositions();
+ if (positions.Length > 0)
+ {
+
+ int maxPosition = positions[positions.Length - 1];
+ for (int i = 0; i < positions.Length - 1; ++i)
+ {
+ if (positions[i] > maxPosition)
+ {
+ maxPosition = positions[i];
+ }
+ }
+
+ IList<Query>[] disjunctLists = new IList<Query>[maxPosition + 1];
+ int distinctPositions = 0;
+
+ for (int i = 0; i < termArrays.Count; ++i)
+ {
+ Term[] termArray = termArrays[i];
+ IList<Query> disjuncts = disjunctLists[positions[i]];
+ if (disjuncts == null)
+ {
+ disjuncts = (disjunctLists[positions[i]] = new List<Query>(termArray.Length));
+ ++distinctPositions;
+ }
+ foreach(Term term in termArray)
+ {
+ disjuncts.Add(new SpanTermQuery(term));
+ }
+ }
+
+ int positionGaps = 0;
+ int position = 0;
+ SpanQuery[] clauses = new SpanQuery[distinctPositions];
+ for (int i = 0; i < disjunctLists.Length; ++i)
+ {
+ IList<Query> disjuncts = disjunctLists[i];
+ if (disjuncts != null)
+ {
+ clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray()));
+ }
+ else
+ {
+ ++positionGaps;
+ }
+ }
+
+ int slop = mpq.Slop;
+ bool inorder = (slop == 0);
+
+ SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
+ sp.Boost = query.Boost;
+ GetPayloads(payloads, sp);
+ }
+ }
+ }
+
+ private void GetPayloads(ICollection<byte[]> payloads, SpanQuery query)
+ {
+ Spans.Spans spans = query.GetSpans(reader);
+
+ while (spans.Next() == true)
+ {
+ if (spans.IsPayloadAvailable())
+ {
+ ICollection<byte[]> payload = spans.GetPayload();
+ foreach (byte[] bytes in payload)
+ {
+ payloads.Add(bytes);
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Payloads/PayloadTermQuery.cs b/src/core/Search/Payloads/PayloadTermQuery.cs
new file mode 100644
index 0000000..d6ec5bd
--- /dev/null
+++ b/src/core/Search/Payloads/PayloadTermQuery.cs
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using TermPositions = Lucene.Net.Index.TermPositions;
+using ComplexExplanation = Lucene.Net.Search.ComplexExplanation;
+using Explanation = Lucene.Net.Search.Explanation;
+using Scorer = Lucene.Net.Search.Scorer;
+using Searcher = Lucene.Net.Search.Searcher;
+using Similarity = Lucene.Net.Search.Similarity;
+using Weight = Lucene.Net.Search.Weight;
+using SpanScorer = Lucene.Net.Search.Spans.SpanScorer;
+using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
+using SpanWeight = Lucene.Net.Search.Spans.SpanWeight;
+using TermSpans = Lucene.Net.Search.Spans.TermSpans;
+
+namespace Lucene.Net.Search.Payloads
+{
+
+ /// <summary> This class is very similar to
+ /// <see cref="Lucene.Net.Search.Spans.SpanTermQuery" /> except that it factors
+ /// in the value of the payload located at each of the positions where the
+ /// <see cref="Lucene.Net.Index.Term" /> occurs.
+ /// <p/>
+ /// In order to take advantage of this, you must override
+ /// <see cref="Lucene.Net.Search.Similarity.ScorePayload(int, String, int, int, byte[],int,int)" />
+ /// which returns 1 by default.
+ /// <p/>
+ /// Payload scores are aggregated using a pluggable <see cref="PayloadFunction" />.
+ ///
+ /// </summary>
+ [Serializable]
+ public class PayloadTermQuery:SpanTermQuery
+ {
+ protected internal PayloadFunction function;
+ private bool includeSpanScore;
+
+ public PayloadTermQuery(Term term, PayloadFunction function):this(term, function, true)
+ {
+ }
+
+ public PayloadTermQuery(Term term, PayloadFunction function, bool includeSpanScore):base(term)
+ {
+ this.function = function;
+ this.includeSpanScore = includeSpanScore;
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new PayloadTermWeight(this, this, searcher);
+ }
+
+ [Serializable]
+ protected internal class PayloadTermWeight:SpanWeight
+ {
+ private void InitBlock(PayloadTermQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private PayloadTermQuery enclosingInstance;
+ public PayloadTermQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ public PayloadTermWeight(PayloadTermQuery enclosingInstance, PayloadTermQuery query, Searcher searcher):base(query, searcher)
+ {
+ InitBlock(enclosingInstance);
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ return new PayloadTermSpanScorer(this, (TermSpans) internalQuery.GetSpans(reader), this, similarity, reader.Norms(internalQuery.Field));
+ }
+
+ protected internal class PayloadTermSpanScorer:SpanScorer
+ {
+ private void InitBlock(PayloadTermWeight enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private PayloadTermWeight enclosingInstance;
+ public PayloadTermWeight Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ // TODO: is this the best way to allocate this?
+ protected internal byte[] payload = new byte[256];
+ protected internal TermPositions positions;
+ protected internal float payloadScore;
+ protected internal int payloadsSeen;
+
+ public PayloadTermSpanScorer(PayloadTermWeight enclosingInstance, TermSpans spans, Weight weight, Similarity similarity, byte[] norms):base(spans, weight, similarity, norms)
+ {
+ InitBlock(enclosingInstance);
+ positions = spans.Positions;
+ }
+
+ public /*protected internal*/ override bool SetFreqCurrentDoc()
+ {
+ if (!more)
+ {
+ return false;
+ }
+ doc = spans.Doc();
+ freq = 0.0f;
+ payloadScore = 0;
+ payloadsSeen = 0;
+ Similarity similarity1 = Similarity;
+ while (more && doc == spans.Doc())
+ {
+ int matchLength = spans.End() - spans.Start();
+
+ freq += similarity1.SloppyFreq(matchLength);
+ ProcessPayload(similarity1);
+
+ more = spans.Next(); // this moves positions to the next match in this
+ // document
+ }
+ return more || (freq != 0);
+ }
+
+ protected internal virtual void ProcessPayload(Similarity similarity)
+ {
+ if (positions.IsPayloadAvailable)
+ {
+ payload = positions.GetPayload(payload, 0);
+ payloadScore = Enclosing_Instance.Enclosing_Instance.function.CurrentScore(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, spans.Start(), spans.End(), payloadsSeen, payloadScore, similarity.ScorePayload(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, spans.Start(), spans.End(), payload, 0, positions.PayloadLength));
+ payloadsSeen++;
+ }
+ else
+ {
+ // zero out the payload?
+ }
+ }
+
+ /// <summary> </summary>
+ /// <returns> <see cref="GetSpanScore()" /> * <see cref="GetPayloadScore()" />
+ /// </returns>
+ /// <throws> IOException </throws>
+ public override float Score()
+ {
+
+ return Enclosing_Instance.Enclosing_Instance.includeSpanScore?GetSpanScore() * GetPayloadScore():GetPayloadScore();
+ }
+
+ /// <summary> Returns the SpanScorer score only.
+ /// <p/>
+ /// Should not be overriden without good cause!
+ ///
+ /// </summary>
+ /// <returns> the score for just the Span part w/o the payload
+ /// </returns>
+ /// <throws> IOException </throws>
+ /// <summary>
+ /// </summary>
+ /// <seealso cref="Score()">
+ /// </seealso>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ protected internal virtual float GetSpanScore()
+ {
+ return base.Score();
+ }
+
+ /// <summary> The score for the payload
+ ///
+ /// </summary>
+ /// <returns> The score, as calculated by
+ /// <see cref="PayloadFunction.DocScore(int, String, int, float)" />
+ /// </returns>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ protected internal virtual float GetPayloadScore()
+ {
+ return Enclosing_Instance.Enclosing_Instance.function.DocScore(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, payloadsSeen, payloadScore);
+ }
+
+ protected internal override Explanation Explain(int doc)
+ {
+ ComplexExplanation result = new ComplexExplanation();
+ Explanation nonPayloadExpl = base.Explain(doc);
+ result.AddDetail(nonPayloadExpl);
+ // QUESTION: Is there a way to avoid this skipTo call? We need to know
+ // whether to load the payload or not
+ Explanation payloadBoost = new Explanation();
+ result.AddDetail(payloadBoost);
+
+ float payloadScore = GetPayloadScore();
+ payloadBoost.Value = payloadScore;
+ // GSI: I suppose we could toString the payload, but I don't think that
+ // would be a good idea
+ payloadBoost.Description = "scorePayload(...)";
+ result.Value = nonPayloadExpl.Value * payloadScore;
+ result.Description = "btq, product of:";
+ result.Match = nonPayloadExpl.Value == 0?false:true; // LUCENE-1303
+ return result;
+ }
+ }
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((function == null)?0:function.GetHashCode());
+ result = prime * result + (includeSpanScore?1231:1237);
+ return result;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!base.Equals(obj))
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ PayloadTermQuery other = (PayloadTermQuery) obj;
+ if (function == null)
+ {
+ if (other.function != null)
+ return false;
+ }
+ else if (!function.Equals(other.function))
+ return false;
+ if (includeSpanScore != other.includeSpanScore)
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PhrasePositions.cs b/src/core/Search/PhrasePositions.cs
new file mode 100644
index 0000000..5614aed
--- /dev/null
+++ b/src/core/Search/PhrasePositions.cs
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Position of a term in a document that takes into account the term offset within the phrase. </summary>
+ sealed class PhrasePositions
+ {
+ internal int doc; // current doc
+ internal int position; // position in doc
+ internal int count; // remaining pos in this doc
+ internal int offset; // position in phrase
+ internal TermPositions tp; // stream of positions
+ internal PhrasePositions next; // used to make lists
+ internal bool repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
+
+ internal PhrasePositions(TermPositions t, int o)
+ {
+ tp = t;
+ offset = o;
+ }
+
+ internal bool Next()
+ {
+ // increments to next doc
+ if (!tp.Next())
+ {
+ tp.Close(); // close stream
+ doc = System.Int32.MaxValue; // sentinel value
+ return false;
+ }
+ doc = tp.Doc;
+ position = 0;
+ return true;
+ }
+
+ internal bool SkipTo(int target)
+ {
+ if (!tp.SkipTo(target))
+ {
+ tp.Close(); // close stream
+ doc = System.Int32.MaxValue; // sentinel value
+ return false;
+ }
+ doc = tp.Doc;
+ position = 0;
+ return true;
+ }
+
+
+ internal void FirstPosition()
+ {
+ count = tp.Freq; // read first pos
+ NextPosition();
+ }
+
+ /// <summary> Go to next location of this term current document, and set
+ /// <c>position</c> as <c>location - offset</c>, so that a
+ /// matching exact phrase is easily identified when all PhrasePositions
+ /// have exactly the same <c>position</c>.
+ /// </summary>
+ internal bool NextPosition()
+ {
+ if (count-- > 0)
+ {
+ // read subsequent pos's
+ position = tp.NextPosition() - offset;
+ return true;
+ }
+ else
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PhraseQuery.cs b/src/core/Search/PhraseQuery.cs
new file mode 100644
index 0000000..9e96180
--- /dev/null
+++ b/src/core/Search/PhraseQuery.cs
@@ -0,0 +1,370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using TermPositions = Lucene.Net.Index.TermPositions;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>A Query that matches documents containing a particular sequence of terms.
+ /// A PhraseQuery is built by QueryParser for input like <c>"new york"</c>.
+ ///
+ /// <p/>This query may be combined with other terms or queries with a <see cref="BooleanQuery" />.
+ /// </summary>
+ [Serializable]
+ public class PhraseQuery:Query
+ {
+ private System.String field;
+ private EquatableList<Term> terms = new EquatableList<Term>(4);
+ private EquatableList<int> positions = new EquatableList<int>(4);
+ private int maxPosition = 0;
+ private int slop = 0;
+
+ /// <summary>Constructs an empty phrase query. </summary>
+ public PhraseQuery()
+ {
+ }
+
+ /// <summary>Sets the number of other words permitted between words in query phrase.
+ /// If zero, then this is an exact phrase search. For larger values this works
+ /// like a <c>WITHIN</c> or <c>NEAR</c> operator.
+ /// <p/>The slop is in fact an edit-distance, where the units correspond to
+ /// moves of terms in the query phrase out of position. For example, to switch
+ /// the order of two words requires two moves (the first move places the words
+ /// atop one another), so to permit re-orderings of phrases, the slop must be
+ /// at least two.
+ /// <p/>More exact matches are scored higher than sloppier matches, thus search
+ /// results are sorted by exactness.
+ /// <p/>The slop is zero by default, requiring exact matches.
+ /// </summary>
+ public virtual int Slop
+ {
+ get { return slop; }
+ set { slop = value; }
+ }
+
+ /// <summary> Adds a term to the end of the query phrase.
+ /// The relative position of the term is the one immediately after the last term added.
+ /// </summary>
+ public virtual void Add(Term term)
+ {
+ int position = 0;
+ if (positions.Count > 0)
+ position = positions[positions.Count - 1] + 1;
+
+ Add(term, position);
+ }
+
+ /// <summary> Adds a term to the end of the query phrase.
+ /// The relative position of the term within the phrase is specified explicitly.
+ /// This allows e.g. phrases with more than one term at the same position
+ /// or phrases with gaps (e.g. in connection with stopwords).
+ ///
+ /// </summary>
+ /// <param name="term">
+ /// </param>
+ /// <param name="position">
+ /// </param>
+ public virtual void Add(Term term, int position)
+ {
+ if (terms.Count == 0)
+ field = term.Field;
+ else if ((System.Object) term.Field != (System.Object) field)
+ {
+ throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
+ }
+
+ terms.Add(term);
+ positions.Add(position);
+ if (position > maxPosition)
+ maxPosition = position;
+ }
+
+ /// <summary>Returns the set of terms in this phrase. </summary>
+ public virtual Term[] GetTerms()
+ {
+ return terms.ToArray();
+ }
+
+ /// <summary> Returns the relative positions of terms in this phrase.</summary>
+ public virtual int[] GetPositions()
+ {
+ int[] result = new int[positions.Count];
+ for (int i = 0; i < positions.Count; i++)
+ result[i] = positions[i];
+ return result;
+ }
+
+ [Serializable]
+ private class PhraseWeight:Weight
+ {
+ private void InitBlock(PhraseQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private PhraseQuery enclosingInstance;
+ public PhraseQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Similarity similarity;
+ private float value_Renamed;
+ private float idf;
+ private float queryNorm;
+ private float queryWeight;
+ private IDFExplanation idfExp;
+
+ public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+
+ idfExp = similarity.IdfExplain(Enclosing_Instance.terms, searcher);
+ idf = idfExp.Idf;
+ }
+
+ public override System.String ToString()
+ {
+ return "weight(" + Enclosing_Instance + ")";
+ }
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ public override float Value
+ {
+ get { return value_Renamed; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ queryWeight = idf*Enclosing_Instance.Boost; // compute query weight
+ return queryWeight*queryWeight; // square it
+ }
+
+ public override void Normalize(float queryNorm)
+ {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value_Renamed = queryWeight * idf; // idf for document
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ if (Enclosing_Instance.terms.Count == 0)
+ // optimize zero-term case
+ return null;
+
+ TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];
+ for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
+ {
+ TermPositions p = reader.TermPositions(Enclosing_Instance.terms[i]);
+ if (p == null)
+ return null;
+ tps[i] = p;
+ }
+
+ if (Enclosing_Instance.slop == 0)
+ // optimize exact case
+ return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
+ else
+ return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+
+ Explanation result = new Explanation();
+ result.Description = "weight(" + Query + " in " + doc + "), product of:";
+
+ System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
+ System.Text.StringBuilder query = new System.Text.StringBuilder();
+ query.Append('\"');
+ docFreqs.Append(idfExp.Explain());
+ for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
+ {
+ if (i != 0)
+ {
+ query.Append(" ");
+ }
+
+ Term term = Enclosing_Instance.terms[i];
+
+ query.Append(term.Text);
+ }
+ query.Append('\"');
+
+ Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ":" + docFreqs + ")");
+
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.Description = "queryWeight(" + Query + "), product of:";
+
+ Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");
+ if (Enclosing_Instance.Boost != 1.0f)
+ queryExpl.AddDetail(boostExpl);
+ queryExpl.AddDetail(idfExpl);
+
+ Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
+ queryExpl.AddDetail(queryNormExpl);
+
+ queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value;
+
+ result.AddDetail(queryExpl);
+
+ // explain field weight
+ Explanation fieldExpl = new Explanation();
+ fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:";
+
+ PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false);
+ if (scorer == null)
+ {
+ return new Explanation(0.0f, "no matching docs");
+ }
+ Explanation tfExplanation = new Explanation();
+ int d = scorer.Advance(doc);
+ float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f;
+ tfExplanation.Value = similarity.Tf(phraseFreq);
+ tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")";
+
+ fieldExpl.AddDetail(tfExplanation);
+ fieldExpl.AddDetail(idfExpl);
+
+ Explanation fieldNormExpl = new Explanation();
+ byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
+ float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f;
+ fieldNormExpl.Value = fieldNorm;
+ fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")";
+ fieldExpl.AddDetail(fieldNormExpl);
+
+ fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value;
+
+ result.AddDetail(fieldExpl);
+
+ // combine them
+ result.Value = queryExpl.Value * fieldExpl.Value;
+
+ if (queryExpl.Value == 1.0f)
+ return fieldExpl;
+
+ return result;
+ }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ if (terms.Count == 1)
+ {
+ // optimize one-term case
+ Term term = terms[0];
+ Query termQuery = new TermQuery(term);
+ termQuery.Boost = Boost;
+ return termQuery.CreateWeight(searcher);
+ }
+ return new PhraseWeight(this, searcher);
+ }
+
+ /// <seealso cref="Lucene.Net.Search.Query.ExtractTerms(System.Collections.Generic.ISet{Term})">
+ /// </seealso>
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> queryTerms)
+ {
+ queryTerms.UnionWith(terms);
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String f)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (field != null && !field.Equals(f))
+ {
+ buffer.Append(field);
+ buffer.Append(":");
+ }
+
+ buffer.Append("\"");
+ System.String[] pieces = new System.String[maxPosition + 1];
+ for (int i = 0; i < terms.Count; i++)
+ {
+ int pos = positions[i];
+ System.String s = pieces[pos];
+ if (s == null)
+ {
+ s = terms[i].Text;
+ }
+ else
+ {
+ s = s + "|" + terms[i].Text;
+ }
+ pieces[pos] = s;
+ }
+ for (int i = 0; i < pieces.Length; i++)
+ {
+ if (i > 0)
+ {
+ buffer.Append(' ');
+ }
+ System.String s = pieces[i];
+ if (s == null)
+ {
+ buffer.Append('?');
+ }
+ else
+ {
+ buffer.Append(s);
+ }
+ }
+ buffer.Append("\"");
+
+ if (slop != 0)
+ {
+ buffer.Append("~");
+ buffer.Append(slop);
+ }
+
+ buffer.Append(ToStringUtils.Boost(Boost));
+
+ return buffer.ToString();
+ }
+
+ /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is PhraseQuery))
+ return false;
+ PhraseQuery other = (PhraseQuery) o;
+ return (this.Boost == other.Boost) && (this.slop == other.slop) && this.terms.Equals(other.terms) && this.positions.Equals(other.positions);
+ }
+
+ /// <summary>Returns a hash code value for this object.</summary>
+ public override int GetHashCode()
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ slop ^ terms.GetHashCode() ^ positions.GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PhraseQueue.cs b/src/core/Search/PhraseQueue.cs
new file mode 100644
index 0000000..d603df5
--- /dev/null
+++ b/src/core/Search/PhraseQueue.cs
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search
+{
+
+ sealed class PhraseQueue : PriorityQueue<PhrasePositions>
+ {
+ internal PhraseQueue(int size)
+ {
+ Initialize(size);
+ }
+
+ public override bool LessThan(PhrasePositions pp1, PhrasePositions pp2)
+ {
+ if (pp1.doc == pp2.doc)
+ if (pp1.position == pp2.position)
+ // same doc and pp.position, so decide by actual term positions.
+ // rely on: pp.position == tp.position - offset.
+ return pp1.offset < pp2.offset;
+ else
+ return pp1.position < pp2.position;
+ else
+ return pp1.doc < pp2.doc;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PhraseScorer.cs b/src/core/Search/PhraseScorer.cs
new file mode 100644
index 0000000..59c9771
--- /dev/null
+++ b/src/core/Search/PhraseScorer.cs
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using TermPositions = Lucene.Net.Index.TermPositions;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: Scoring functionality for phrase queries.
+ /// <br/>A document is considered matching if it contains the phrase-query terms
+ /// at "valid" positons. What "valid positions" are
+ /// depends on the type of the phrase query: for an exact phrase query terms are required
+ /// to appear in adjacent locations, while for a sloppy phrase query some distance between
+ /// the terms is allowed. The abstract method <see cref="PhraseFreq()" /> of extending classes
+ /// is invoked for each document containing all the phrase query terms, in order to
+ /// compute the frequency of the phrase query in that document. A non zero frequency
+ /// means a match.
+ /// </summary>
+ abstract class PhraseScorer:Scorer
+ {
+ private Weight weight;
+ protected internal byte[] norms;
+ protected internal float value_Renamed;
+
+ private bool firstTime = true;
+ private bool more = true;
+ protected internal PhraseQueue pq;
+ protected internal PhrasePositions first, last;
+
+ private float freq; //prhase frequency in current doc as computed by phraseFreq().
+
+ internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity)
+ {
+ this.norms = norms;
+ this.weight = weight;
+ this.value_Renamed = weight.Value;
+
+ // convert tps to a list of phrase positions.
+ // note: phrase-position differs from term-position in that its position
+ // reflects the phrase offset: pp.pos = tp.pos - offset.
+ // this allows to easily identify a matching (exact) phrase
+ // when all PhrasePositions have exactly the same position.
+ for (int i = 0; i < tps.Length; i++)
+ {
+ PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
+ if (last != null)
+ {
+ // add next to end of list
+ last.next = pp;
+ }
+ else
+ {
+ first = pp;
+ }
+ last = pp;
+ }
+
+ pq = new PhraseQueue(tps.Length); // construct empty pq
+ first.doc = - 1;
+ }
+
+ public override int DocID()
+ {
+ return first.doc;
+ }
+
+ public override int NextDoc()
+ {
+ if (firstTime)
+ {
+ Init();
+ firstTime = false;
+ }
+ else if (more)
+ {
+ more = last.Next(); // trigger further scanning
+ }
+ if (!DoNext())
+ {
+ first.doc = NO_MORE_DOCS;
+ }
+ return first.doc;
+ }
+
+ // next without initial increment
+ private bool DoNext()
+ {
+ while (more)
+ {
+ while (more && first.doc < last.doc)
+ {
+ // find doc w/ all the terms
+ more = first.SkipTo(last.doc); // skip first upto last
+ FirstToLast(); // and move it to the end
+ }
+
+ if (more)
+ {
+ // found a doc with all of the terms
+ freq = PhraseFreq(); // check for phrase
+ if (freq == 0.0f)
+ // no match
+ more = last.Next();
+ // trigger further scanning
+ else
+ return true; // found a match
+ }
+ }
+ return false; // no more matches
+ }
+
+ public override float Score()
+ {
+ //System.out.println("scoring " + first.doc);
+ float raw = Similarity.Tf(freq) * value_Renamed; // raw score
+ return norms == null?raw:raw * Similarity.DecodeNorm(norms[first.doc]); // normalize
+ }
+
+ public override int Advance(int target)
+ {
+ firstTime = false;
+ for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
+ {
+ more = pp.SkipTo(target);
+ }
+ if (more)
+ {
+ Sort(); // re-sort
+ }
+ if (!DoNext())
+ {
+ first.doc = NO_MORE_DOCS;
+ }
+ return first.doc;
+ }
+
+ /// <summary>
+ /// Phrase frequency in current doc as computed by PhraseFreq()
+ /// </summary>
+ /// <returns></returns>
+ public float CurrentFreq()
+ {
+ return freq;
+ }
+
+ /// <summary> For a document containing all the phrase query terms, compute the
+ /// frequency of the phrase in that document.
+ /// A non zero frequency means a match.
+ /// <br/>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
+ /// </summary>
+ /// <returns> frequency of the phrase in current doc, 0 if not found.
+ /// </returns>
+ protected internal abstract float PhraseFreq();
+
+ private void Init()
+ {
+ for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
+ {
+ more = pp.Next();
+ }
+ if (more)
+ {
+ Sort();
+ }
+ }
+
+ private void Sort()
+ {
+ pq.Clear();
+ for (PhrasePositions pp = first; pp != null; pp = pp.next)
+ {
+ pq.Add(pp);
+ }
+ PqToList();
+ }
+
+ protected internal void PqToList()
+ {
+ last = first = null;
+ while (pq.Top() != null)
+ {
+ PhrasePositions pp = pq.Pop();
+ if (last != null)
+ {
+ // add next to end of list
+ last.next = pp;
+ }
+ else
+ first = pp;
+ last = pp;
+ pp.next = null;
+ }
+ }
+
+ protected internal void FirstToLast()
+ {
+ last.next = first; // move first to end of list
+ last = first;
+ first = first.next;
+ last.next = null;
+ }
+
+ public override System.String ToString()
+ {
+ return "scorer(" + weight + ")";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PositiveScoresOnlyCollector.cs b/src/core/Search/PositiveScoresOnlyCollector.cs
new file mode 100644
index 0000000..c9f1ca2
--- /dev/null
+++ b/src/core/Search/PositiveScoresOnlyCollector.cs
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A <see cref="Collector" /> implementation which wraps another
+ /// <see cref="Collector" /> and makes sure only documents with
+ /// scores &gt; 0 are collected.
+ /// </summary>
+ public class PositiveScoresOnlyCollector:Collector
+ {
+
+ private Collector c;
+ private Scorer scorer;
+
+ public PositiveScoresOnlyCollector(Collector c)
+ {
+ this.c = c;
+ }
+
+ public override void Collect(int doc)
+ {
+ if (scorer.Score() > 0)
+ {
+ c.Collect(doc);
+ }
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ c.SetNextReader(reader, docBase);
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call
+ // score() also.
+ this.scorer = new ScoreCachingWrappingScorer(scorer);
+ c.SetScorer(this.scorer);
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return c.AcceptsDocsOutOfOrder; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PrefixFilter.cs b/src/core/Search/PrefixFilter.cs
new file mode 100644
index 0000000..d398466
--- /dev/null
+++ b/src/core/Search/PrefixFilter.cs
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Term = Lucene.Net.Index.Term;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A Filter that restricts search results to values that have a matching prefix in a given
+ /// field.
+ /// </summary>
+ [Serializable]
+ public class PrefixFilter:MultiTermQueryWrapperFilter<PrefixQuery>
+ {
+
+ public PrefixFilter(Term prefix):base(new PrefixQuery(prefix))
+ {
+ }
+
+ public virtual Term Prefix
+ {
+ get { return query.Prefix; }
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("PrefixFilter(");
+ buffer.Append(Prefix.ToString());
+ buffer.Append(")");
+ return buffer.ToString();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PrefixQuery.cs b/src/core/Search/PrefixQuery.cs
new file mode 100644
index 0000000..d1a013a
--- /dev/null
+++ b/src/core/Search/PrefixQuery.cs
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>A Query that matches documents containing terms with a specified prefix. A PrefixQuery
+ /// is built by QueryParser for input like <c>app*</c>.
+ ///
+ /// <p/>This query uses the
+ /// <see cref="MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT"/>
+ /// rewrite method.
+ /// </summary>
+ [Serializable]
+ public class PrefixQuery:MultiTermQuery
+ {
+ private Term prefix;
+
+ /// <summary>Constructs a query for terms starting with <c>prefix</c>. </summary>
+ public PrefixQuery(Term prefix)
+ { //will be removed in 3.0
+ this.prefix = prefix;
+ }
+
+ /// <summary>Returns the prefix of this query. </summary>
+ public virtual Term Prefix
+ {
+ get { return prefix; }
+ }
+
+ protected internal override FilteredTermEnum GetEnum(IndexReader reader)
+ {
+ return new PrefixTermEnum(reader, prefix);
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (!prefix.Field.Equals(field))
+ {
+ buffer.Append(prefix.Field);
+ buffer.Append(":");
+ }
+ buffer.Append(prefix.Text);
+ buffer.Append('*');
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ //@Override
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((prefix == null)?0:prefix.GetHashCode());
+ return result;
+ }
+
+ //@Override
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!base.Equals(obj))
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ PrefixQuery other = (PrefixQuery) obj;
+ if (prefix == null)
+ {
+ if (other.prefix != null)
+ return false;
+ }
+ else if (!prefix.Equals(other.prefix))
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/PrefixTermEnum.cs b/src/core/Search/PrefixTermEnum.cs
new file mode 100644
index 0000000..c92195c
--- /dev/null
+++ b/src/core/Search/PrefixTermEnum.cs
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Subclass of FilteredTermEnum for enumerating all terms that match the
+ /// specified prefix filter term.
+ /// <p/>
+ /// Term enumerations are always ordered by Term.compareTo(). Each term in
+ /// the enumeration is greater than all that precede it.
+ ///
+ /// </summary>
+ public class PrefixTermEnum:FilteredTermEnum
+ {
+
+ private Term prefix;
+ private bool endEnum = false;
+
+ public PrefixTermEnum(IndexReader reader, Term prefix)
+ {
+ this.prefix = prefix;
+
+ SetEnum(reader.Terms(new Term(prefix.Field, prefix.Text)));
+ }
+
+ public override float Difference()
+ {
+ return 1.0f;
+ }
+
+ public override bool EndEnum()
+ {
+ return endEnum;
+ }
+
+ protected internal virtual Term PrefixTerm
+ {
+ get { return prefix; }
+ }
+
+ protected internal override bool TermCompare(Term term)
+ {
+ if ((System.Object) term.Field == (System.Object) prefix.Field && term.Text.StartsWith(prefix.Text))
+ {
+ return true;
+ }
+ endEnum = true;
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Query.cs b/src/core/Search/Query.cs
new file mode 100644
index 0000000..b00d16b
--- /dev/null
+++ b/src/core/Search/Query.cs
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>The abstract base class for queries.
+ /// <p/>Instantiable subclasses are:
+ /// <list type="bullet">
+ /// <item> <see cref="TermQuery" /> </item>
+ /// <item> <see cref="MultiTermQuery" /> </item>
+ /// <item> <see cref="BooleanQuery" /> </item>
+ /// <item> <see cref="WildcardQuery" /> </item>
+ /// <item> <see cref="PhraseQuery" /> </item>
+ /// <item> <see cref="PrefixQuery" /> </item>
+ /// <item> <see cref="MultiPhraseQuery" /> </item>
+ /// <item> <see cref="FuzzyQuery" /> </item>
+ /// <item> <see cref="TermRangeQuery" /> </item>
+ /// <item> <see cref="NumericRangeQuery{T}" /> </item>
+ /// <item> <see cref="Lucene.Net.Search.Spans.SpanQuery" /> </item>
+ /// </list>
+ /// <p/>A parser for queries is contained in:
+ /// <list type="bullet">
+ /// <item><see cref="Lucene.Net.QueryParsers.QueryParser">QueryParser</see> </item>
+ /// </list>
+ /// </summary>
+ [Serializable]
+ public abstract class Query : System.ICloneable
+ {
+ private float boost = 1.0f; // query boost factor
+
+ /// <summary>Gets or sets the boost for this query clause to <c>b</c>. Documents
+ /// matching this clause will (in addition to the normal weightings) have
+ /// their score multiplied by <c>b</c>. The boost is 1.0 by default.
+ /// </summary>
+ public virtual float Boost
+ {
+ get { return boost; }
+ set { boost = value; }
+ }
+
+ /// <summary>Prints a query to a string, with <c>field</c> assumed to be the
+ /// default field and omitted.
+ /// <p/>The representation used is one that is supposed to be readable
+ /// by <see cref="Lucene.Net.QueryParsers.QueryParser">QueryParser</see>. However,
+ /// there are the following limitations:
+ /// <list type="bullet">
+ /// <item>If the query was created by the parser, the printed
+ /// representation may not be exactly what was parsed. For example,
+ /// characters that need to be escaped will be represented without
+ /// the required backslash.</item>
+ /// <item>Some of the more complicated queries (e.g. span queries)
+ /// don't have a representation that can be parsed by QueryParser.</item>
+ /// </list>
+ /// </summary>
+ public abstract System.String ToString(System.String field);
+
+ /// <summary>Prints a query to a string. </summary>
+ public override System.String ToString()
+ {
+ return ToString("");
+ }
+
+ /// <summary> Expert: Constructs an appropriate Weight implementation for this query.
+ ///
+ /// <p/>
+ /// Only implemented by primitive queries, which re-write to themselves.
+ /// </summary>
+ public virtual Weight CreateWeight(Searcher searcher)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> Expert: Constructs and initializes a Weight for a top-level query.</summary>
+ public virtual Weight Weight(Searcher searcher)
+ {
+ Query query = searcher.Rewrite(this);
+ Weight weight = query.CreateWeight(searcher);
+ float sum = weight.GetSumOfSquaredWeights();
+ float norm = GetSimilarity(searcher).QueryNorm(sum);
+ if (float.IsInfinity(norm) || float.IsNaN(norm))
+ norm = 1.0f;
+ weight.Normalize(norm);
+ return weight;
+ }
+
+
+ /// <summary>Expert: called to re-write queries into primitive queries. For example,
+ /// a PrefixQuery will be rewritten into a BooleanQuery that consists
+ /// of TermQuerys.
+ /// </summary>
+ public virtual Query Rewrite(IndexReader reader)
+ {
+ return this;
+ }
+
+
+ /// <summary>Expert: called when re-writing queries under MultiSearcher.
+ ///
+ /// Create a single query suitable for use by all subsearchers (in 1-1
+ /// correspondence with queries). This is an optimization of the OR of
+ /// all queries. We handle the common optimization cases of equal
+ /// queries and overlapping clauses of boolean OR queries (as generated
+ /// by MultiTermQuery.rewrite()).
+ /// Be careful overriding this method as queries[0] determines which
+ /// method will be called and is not necessarily of the same type as
+ /// the other queries.
+ /// </summary>
+ public virtual Query Combine(Query[] queries)
+ {
+ var uniques = new System.Collections.Generic.HashSet<Query>();
+ for (int i = 0; i < queries.Length; i++)
+ {
+ Query query = queries[i];
+ BooleanClause[] clauses = null;
+ // check if we can split the query into clauses
+ bool splittable = (query is BooleanQuery);
+ if (splittable)
+ {
+ BooleanQuery bq = (BooleanQuery) query;
+ splittable = bq.IsCoordDisabled();
+ clauses = bq.GetClauses();
+ for (int j = 0; splittable && j < clauses.Length; j++)
+ {
+ splittable = (clauses[j].Occur == Occur.SHOULD);
+ }
+ }
+ if (splittable)
+ {
+ for (int j = 0; j < clauses.Length; j++)
+ {
+ uniques.Add(clauses[j].Query);
+ }
+ }
+ else
+ {
+ uniques.Add(query);
+ }
+ }
+ // optimization: if we have just one query, just return it
+ if (uniques.Count == 1)
+ {
+ return uniques.First();
+ }
+ BooleanQuery result = new BooleanQuery(true);
+ foreach (Query key in uniques)
+ {
+ result.Add(key, Occur.SHOULD);
+ }
+ return result;
+ }
+
+
+ /// <summary> Expert: adds all terms occuring in this query to the terms set. Only
+ /// works if this query is in its <see cref="Rewrite">rewritten</see> form.
+ ///
+ /// </summary>
+ /// <throws> UnsupportedOperationException if this query is not yet rewritten </throws>
+ public virtual void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ // needs to be implemented by query subclasses
+ throw new System.NotSupportedException();
+ }
+
+
+
+ /// <summary>Expert: merges the clauses of a set of BooleanQuery's into a single
+ /// BooleanQuery.
+ ///
+ /// <p/>A utility for use by <see cref="Combine(Query[])" /> implementations.
+ /// </summary>
+ public static Query MergeBooleanQueries(params BooleanQuery[] queries)
+ {
+ var allClauses = new System.Collections.Generic.HashSet<BooleanClause>();
+ foreach (BooleanQuery booleanQuery in queries)
+ {
+ foreach (BooleanClause clause in booleanQuery)
+ {
+ allClauses.Add(clause);
+ }
+ }
+
+ bool coordDisabled = queries.Length == 0?false:queries[0].IsCoordDisabled();
+ BooleanQuery result = new BooleanQuery(coordDisabled);
+ foreach(BooleanClause clause in allClauses)
+ {
+ result.Add(clause);
+ }
+ return result;
+ }
+
+
+ /// <summary>Expert: Returns the Similarity implementation to be used for this query.
+ /// Subclasses may override this method to specify their own Similarity
+ /// implementation, perhaps one that delegates through that of the Searcher.
+ /// By default the Searcher's Similarity implementation is returned.
+ /// </summary>
+ public virtual Similarity GetSimilarity(Searcher searcher)
+ {
+ return searcher.Similarity;
+ }
+
+ /// <summary>Returns a clone of this query. </summary>
+ public virtual System.Object Clone()
+ {
+ try
+ {
+ return base.MemberwiseClone();
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException("Clone not supported: " + e.Message);
+ }
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = 1;
+ result = prime * result + BitConverter.ToInt32(BitConverter.GetBytes(boost), 0);
+ return result;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ Query other = (Query) obj;
+ if (BitConverter.ToInt32(BitConverter.GetBytes(boost), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.boost), 0))
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/QueryTermVector.cs b/src/core/Search/QueryTermVector.cs
new file mode 100644
index 0000000..a52a6a3
--- /dev/null
+++ b/src/core/Search/QueryTermVector.cs
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>
+ ///
+ ///
+ /// </summary>
+ public class QueryTermVector : ITermFreqVector
+ {
+ private System.String[] terms = new System.String[0];
+ private int[] termFreqs = new int[0];
+
+ public virtual string Field
+ {
+ get { return null; }
+ }
+
+ /// <summary> </summary>
+ /// <param name="queryTerms">The original list of terms from the query, can contain duplicates
+ /// </param>
+ public QueryTermVector(System.String[] queryTerms)
+ {
+
+ ProcessTerms(queryTerms);
+ }
+
+ public QueryTermVector(System.String queryString, Analyzer analyzer)
+ {
+ if (analyzer != null)
+ {
+ TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
+ if (stream != null)
+ {
+ IList<string> terms = new List<string>();
+ try
+ {
+ bool hasMoreTokens = false;
+
+ stream.Reset();
+ ITermAttribute termAtt = stream.AddAttribute<ITermAttribute>();
+
+ hasMoreTokens = stream.IncrementToken();
+ while (hasMoreTokens)
+ {
+ terms.Add(termAtt.Term);
+ hasMoreTokens = stream.IncrementToken();
+ }
+ ProcessTerms(terms.ToArray());
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ }
+ }
+ }
+
+ private void ProcessTerms(System.String[] queryTerms)
+ {
+ if (queryTerms != null)
+ {
+ System.Array.Sort(queryTerms);
+ IDictionary<string, int> tmpSet = new HashMap<string, int>(queryTerms.Length);
+ //filter out duplicates
+ IList<string> tmpList = new List<string>(queryTerms.Length);
+ IList<int> tmpFreqs = new List<int>(queryTerms.Length);
+ int j = 0;
+ for (int i = 0; i < queryTerms.Length; i++)
+ {
+ var term = queryTerms[i];
+ var position = tmpSet[term];
+ if (!tmpSet.ContainsKey(term)) // if temp_position == null
+ {
+ tmpSet[term] = j++;
+ tmpList.Add(term);
+ tmpFreqs.Add(1);
+ }
+ else
+ {
+ int integer = tmpFreqs[position];
+ tmpFreqs[position] = (integer + 1);
+ }
+ }
+ terms = tmpList.ToArray();
+ //termFreqs = (int[])tmpFreqs.toArray(termFreqs);
+ termFreqs = new int[tmpFreqs.Count];
+ int i2 = 0;
+ foreach (int integer in tmpFreqs)
+ {
+ termFreqs[i2++] = integer;
+ }
+ }
+ }
+
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder();
+ sb.Append('{');
+ for (int i = 0; i < terms.Length; i++)
+ {
+ if (i > 0)
+ sb.Append(", ");
+ sb.Append(terms[i]).Append('/').Append(termFreqs[i]);
+ }
+ sb.Append('}');
+ return sb.ToString();
+ }
+
+
+ public virtual int Size
+ {
+ get { return terms.Length; }
+ }
+
+ public virtual System.String[] GetTerms()
+ {
+ return terms;
+ }
+
+ public virtual int[] GetTermFrequencies()
+ {
+ return termFreqs;
+ }
+
+ public virtual int IndexOf(System.String term)
+ {
+ int res = System.Array.BinarySearch(terms, term);
+ return res >= 0?res:- 1;
+ }
+
+ public virtual int[] IndexesOf(System.String[] terms, int start, int len)
+ {
+ int[] res = new int[len];
+
+ for (int i = 0; i < len; i++)
+ {
+ res[i] = IndexOf(terms[i]);
+ }
+ return res;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/QueryWrapperFilter.cs b/src/core/Search/QueryWrapperFilter.cs
new file mode 100644
index 0000000..1642c97
--- /dev/null
+++ b/src/core/Search/QueryWrapperFilter.cs
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Constrains search results to only match those which also match a provided
+ /// query.
+ ///
+ /// <p/> This could be used, for example, with a <see cref="TermRangeQuery" /> on a suitably
+ /// formatted date field to implement date filtering. One could re-use a single
+ /// QueryFilter that matches, e.g., only documents modified within the last
+ /// week. The QueryFilter and TermRangeQuery would only need to be reconstructed
+ /// once per day.
+ ///
+ /// </summary>
+ /// <version> $Id:$
+ /// </version>
+ [Serializable]
+ public class QueryWrapperFilter:Filter
+ {
+ private class AnonymousClassDocIdSet:DocIdSet
+ {
+ public AnonymousClassDocIdSet(Lucene.Net.Search.Weight weight, Lucene.Net.Index.IndexReader reader, QueryWrapperFilter enclosingInstance)
+ {
+ InitBlock(weight, reader, enclosingInstance);
+ }
+ private void InitBlock(Lucene.Net.Search.Weight weight, Lucene.Net.Index.IndexReader reader, QueryWrapperFilter enclosingInstance)
+ {
+ this.weight = weight;
+ this.reader = reader;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Search.Weight weight;
+ private Lucene.Net.Index.IndexReader reader;
+ private QueryWrapperFilter enclosingInstance;
+ public QueryWrapperFilter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public override DocIdSetIterator Iterator()
+ {
+ return weight.Scorer(reader, true, false);
+ }
+
+ public override bool IsCacheable
+ {
+ get { return false; }
+ }
+ }
+ private Query query;
+
+ /// <summary>Constructs a filter which only matches documents matching
+ /// <c>query</c>.
+ /// </summary>
+ public QueryWrapperFilter(Query query)
+ {
+ this.query = query;
+ }
+
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ Weight weight = query.Weight(new IndexSearcher(reader));
+ return new AnonymousClassDocIdSet(weight, reader, this);
+ }
+
+ public override System.String ToString()
+ {
+ return "QueryWrapperFilter(" + query + ")";
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is QueryWrapperFilter))
+ return false;
+ return this.query.Equals(((QueryWrapperFilter) o).query);
+ }
+
+ public override int GetHashCode()
+ {
+ return query.GetHashCode() ^ unchecked((int) 0x923F64B9);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ReqExclScorer.cs b/src/core/Search/ReqExclScorer.cs
new file mode 100644
index 0000000..a569362
--- /dev/null
+++ b/src/core/Search/ReqExclScorer.cs
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+
+ /// <summary>A Scorer for queries with a required subscorer
+ /// and an excluding (prohibited) sub DocIdSetIterator.
+ /// <br/>
+ /// This <c>Scorer</c> implements <see cref="DocIdSetIterator.Advance(int)" />,
+ /// and it uses the skipTo() on the given scorers.
+ /// </summary>
+ class ReqExclScorer:Scorer
+ {
+ private Scorer reqScorer;
+ private DocIdSetIterator exclDisi;
+ private int doc = - 1;
+
+ /// <summary>Construct a <c>ReqExclScorer</c>.</summary>
+ /// <param name="reqScorer">The scorer that must match, except where
+ /// </param>
+ /// <param name="exclDisi">indicates exclusion.
+ /// </param>
+ public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi):base(null)
+ { // No similarity used.
+ this.reqScorer = reqScorer;
+ this.exclDisi = exclDisi;
+ }
+
+ public override int NextDoc()
+ {
+ if (reqScorer == null)
+ {
+ return doc;
+ }
+ doc = reqScorer.NextDoc();
+ if (doc == NO_MORE_DOCS)
+ {
+ reqScorer = null; // exhausted, nothing left
+ return doc;
+ }
+ if (exclDisi == null)
+ {
+ return doc;
+ }
+ return doc = ToNonExcluded();
+ }
+
+ /// <summary>Advance to non excluded doc.
+ /// <br/>On entry:
+ /// <list type="bullet">
+ /// <item>reqScorer != null, </item>
+ /// <item>exclScorer != null, </item>
+ /// <item>reqScorer was advanced once via next() or skipTo()
+ /// and reqScorer.doc() may still be excluded.</item>
+ /// </list>
+ /// Advances reqScorer a non excluded required doc, if any.
+ /// </summary>
+ /// <returns> true iff there is a non excluded required doc.
+ /// </returns>
+ private int ToNonExcluded()
+ {
+ int exclDoc = exclDisi.DocID();
+ int reqDoc = reqScorer.DocID(); // may be excluded
+ do
+ {
+ if (reqDoc < exclDoc)
+ {
+ return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded
+ }
+ else if (reqDoc > exclDoc)
+ {
+ exclDoc = exclDisi.Advance(reqDoc);
+ if (exclDoc == NO_MORE_DOCS)
+ {
+ exclDisi = null; // exhausted, no more exclusions
+ return reqDoc;
+ }
+ if (exclDoc > reqDoc)
+ {
+ return reqDoc; // not excluded
+ }
+ }
+ }
+ while ((reqDoc = reqScorer.NextDoc()) != NO_MORE_DOCS);
+ reqScorer = null; // exhausted, nothing left
+ return NO_MORE_DOCS;
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ /// <summary>Returns the score of the current document matching the query.
+ /// Initially invalid, until <see cref="NextDoc()" /> is called the first time.
+ /// </summary>
+ /// <returns> The score of the required scorer.
+ /// </returns>
+ public override float Score()
+ {
+ return reqScorer.Score(); // reqScorer may be null when next() or skipTo() already return false
+ }
+
+ public override int Advance(int target)
+ {
+ if (reqScorer == null)
+ {
+ return doc = NO_MORE_DOCS;
+ }
+ if (exclDisi == null)
+ {
+ return doc = reqScorer.Advance(target);
+ }
+ if (reqScorer.Advance(target) == NO_MORE_DOCS)
+ {
+ reqScorer = null;
+ return doc = NO_MORE_DOCS;
+ }
+ return doc = ToNonExcluded();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ReqOptSumScorer.cs b/src/core/Search/ReqOptSumScorer.cs
new file mode 100644
index 0000000..b432f13
--- /dev/null
+++ b/src/core/Search/ReqOptSumScorer.cs
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>A Scorer for queries with a required part and an optional part.
+ /// Delays skipTo() on the optional part until a score() is needed.
+ /// <br/>
+ /// This <c>Scorer</c> implements <see cref="DocIdSetIterator.Advance(int)" />.
+ /// </summary>
+ class ReqOptSumScorer:Scorer
+ {
+ /// <summary>The scorers passed from the constructor.
+ /// These are set to null as soon as their next() or skipTo() returns false.
+ /// </summary>
+ private Scorer reqScorer;
+ private Scorer optScorer;
+
+ /// <summary>Construct a <c>ReqOptScorer</c>.</summary>
+ /// <param name="reqScorer">The required scorer. This must match.
+ /// </param>
+ /// <param name="optScorer">The optional scorer. This is used for scoring only.
+ /// </param>
+ public ReqOptSumScorer(Scorer reqScorer, Scorer optScorer):base(null)
+ { // No similarity used.
+ this.reqScorer = reqScorer;
+ this.optScorer = optScorer;
+ }
+
+ public override int NextDoc()
+ {
+ return reqScorer.NextDoc();
+ }
+
+ public override int Advance(int target)
+ {
+ return reqScorer.Advance(target);
+ }
+
+ public override int DocID()
+ {
+ return reqScorer.DocID();
+ }
+
+ /// <summary>Returns the score of the current document matching the query.
+ /// Initially invalid, until <see cref="NextDoc()" /> is called the first time.
+ /// </summary>
+ /// <returns> The score of the required scorer, eventually increased by the score
+ /// of the optional scorer when it also matches the current document.
+ /// </returns>
+ public override float Score()
+ {
+ int curDoc = reqScorer.DocID();
+ float reqScore = reqScorer.Score();
+ if (optScorer == null)
+ {
+ return reqScore;
+ }
+
+ int optScorerDoc = optScorer.DocID();
+ if (optScorerDoc < curDoc && (optScorerDoc = optScorer.Advance(curDoc)) == NO_MORE_DOCS)
+ {
+ optScorer = null;
+ return reqScore;
+ }
+
+ return optScorerDoc == curDoc?reqScore + optScorer.Score():reqScore;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ScoreCachingWrappingScorer.cs b/src/core/Search/ScoreCachingWrappingScorer.cs
new file mode 100644
index 0000000..c60357b
--- /dev/null
+++ b/src/core/Search/ScoreCachingWrappingScorer.cs
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A <see cref="Scorer" /> which wraps another scorer and caches the score of the
+ /// current document. Successive calls to <see cref="Score()" /> will return the same
+ /// result and will not invoke the wrapped Scorer's score() method, unless the
+ /// current document has changed.<br/>
+ /// This class might be useful due to the changes done to the <see cref="Collector" />
+ /// interface, in which the score is not computed for a document by default, only
+ /// if the collector requests it. Some collectors may need to use the score in
+ /// several places, however all they have in hand is a <see cref="Scorer" /> object, and
+ /// might end up computing the score of a document more than once.
+ /// </summary>
+ public class ScoreCachingWrappingScorer:Scorer
+ {
+
+ private Scorer scorer;
+ private int curDoc = - 1;
+ private float curScore;
+
+ /// <summary>Creates a new instance by wrapping the given scorer. </summary>
+ public ScoreCachingWrappingScorer(Scorer scorer):base(scorer.Similarity)
+ {
+ this.scorer = scorer;
+ }
+
+ public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID)
+ {
+ return scorer.Score(collector, max, firstDocID);
+ }
+
+ public override Similarity Similarity
+ {
+ get { return scorer.Similarity; }
+ }
+
+ public override float Score()
+ {
+ int doc = scorer.DocID();
+ if (doc != curDoc)
+ {
+ curScore = scorer.Score();
+ curDoc = doc;
+ }
+
+ return curScore;
+ }
+
+ public override int DocID()
+ {
+ return scorer.DocID();
+ }
+
+ public override int NextDoc()
+ {
+ return scorer.NextDoc();
+ }
+
+ public override void Score(Collector collector)
+ {
+ scorer.Score(collector);
+ }
+
+ public override int Advance(int target)
+ {
+ return scorer.Advance(target);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/ScoreDoc.cs b/src/core/Search/ScoreDoc.cs
new file mode 100644
index 0000000..2ac3417
--- /dev/null
+++ b/src/core/Search/ScoreDoc.cs
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: Returned by low-level search implementations.</summary>
+ /// <seealso cref="TopDocs">
+ /// </seealso>
+ [Serializable]
+ public class ScoreDoc
+ {
+ /// <summary>Expert: The score of this document for the query. </summary>
+ public float Score { get; set; }
+
+ /// <summary>Expert: A hit document's number.</summary>
+ /// <seealso cref="Searcher.Doc(int)">
+ /// </seealso>
+ public int Doc { get; set; }
+
+ /// <summary>Expert: Constructs a ScoreDoc. </summary>
+ public ScoreDoc(int doc, float score)
+ {
+ this.Doc = doc;
+ this.Score = score;
+ }
+
+ // A convenience method for debugging.
+ public override System.String ToString()
+ {
+ return "doc=" + Doc + " score=" + Score;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Scorer.cs b/src/core/Search/Scorer.cs
new file mode 100644
index 0000000..17885ac
--- /dev/null
+++ b/src/core/Search/Scorer.cs
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: Common scoring functionality for different types of queries.
+ ///
+ /// <p/>
+ /// A <c>Scorer</c> iterates over documents matching a
+ /// query in increasing order of doc Id.
+ /// <p/>
+ /// <p/>
+ /// Document scores are computed using a given <c>Similarity</c>
+ /// implementation.
+ /// <p/>
+ ///
+ /// <p/><b>NOTE</b>: The values Float.Nan,
+ /// Float.NEGATIVE_INFINITY and Float.POSITIVE_INFINITY are
+ /// not valid scores. Certain collectors (eg <see cref="TopScoreDocCollector" />
+ ///) will not properly collect hits
+ /// with these scores.
+ /// </summary>
+ public abstract class Scorer:DocIdSetIterator
+ {
+ private Similarity similarity;
+
+ /// <summary>Constructs a Scorer.</summary>
+ /// <param name="similarity">The <c>Similarity</c> implementation used by this scorer.
+ /// </param>
+ protected internal Scorer(Similarity similarity)
+ {
+ this.similarity = similarity;
+ }
+
+ /// <summary>Returns the Similarity implementation used by this scorer. </summary>
+ public virtual Similarity Similarity
+ {
+ get { return this.similarity; }
+ }
+
+ /// <summary>Scores and collects all matching documents.</summary>
+ /// <param name="collector">The collector to which all matching documents are passed.
+ /// </param>
+ public virtual void Score(Collector collector)
+ {
+ collector.SetScorer(this);
+ int doc;
+ while ((doc = NextDoc()) != NO_MORE_DOCS)
+ {
+ collector.Collect(doc);
+ }
+ }
+
+ /// <summary> Expert: Collects matching documents in a range. Hook for optimization.
+ /// Note, <paramref name="firstDocID" /> is added to ensure that <see cref="DocIdSetIterator.NextDoc()" />
+ /// was called before this method.
+ ///
+ /// </summary>
+ /// <param name="collector">The collector to which all matching documents are passed.
+ /// </param>
+ /// <param name="max">Do not score documents past this.
+ /// </param>
+ /// <param name="firstDocID">
+ /// The first document ID (ensures <see cref="DocIdSetIterator.NextDoc()" /> is called before
+ /// this method.
+ /// </param>
+ /// <returns> true if more matching documents may remain.
+ /// </returns>
+ public /*protected internal*/ virtual bool Score(Collector collector, int max, int firstDocID)
+ {
+ collector.SetScorer(this);
+ int doc = firstDocID;
+ while (doc < max)
+ {
+ collector.Collect(doc);
+ doc = NextDoc();
+ }
+ return doc != NO_MORE_DOCS;
+ }
+
+ /// <summary>Returns the score of the current document matching the query.
+ /// Initially invalid, until <see cref="DocIdSetIterator.NextDoc()" /> or <see cref="DocIdSetIterator.Advance(int)" />
+ /// is called the first time, or when called from within
+ /// <see cref="Collector.Collect(int)" />.
+ /// </summary>
+ public abstract float Score();
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Searchable.cs b/src/core/Search/Searchable.cs
new file mode 100644
index 0000000..3c595df
--- /dev/null
+++ b/src/core/Search/Searchable.cs
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
+using Term = Lucene.Net.Index.Term;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> The interface for search implementations.
+ ///
+ /// <p/>
+ /// Searchable is the abstract network protocol for searching. Implementations
+ /// provide search over a single index, over multiple indices, and over indices
+ /// on remote servers.
+ ///
+ /// <p/>
+ /// Queries, filters and sort criteria are designed to be compact so that they
+ /// may be efficiently passed to a remote index, with only the top-scoring hits
+ /// being returned, rather than every matching hit.
+ ///
+ /// <b>NOTE:</b> this interface is kept public for convenience. Since it is not
+ /// expected to be implemented directly, it may be changed unexpectedly between
+ /// releases.
+ /// </summary>
+ public interface Searchable : IDisposable
+ {
+ /// <summary> Lower-level search API.
+ ///
+ /// <p/>
+ /// <see cref="Collector.Collect(int)" /> is called for every document. <br/>
+ /// Collector-based access to remote indexes is discouraged.
+ ///
+ /// <p/>
+ /// Applications should only use this if they need <i>all</i> of the matching
+ /// documents. The high-level search API (<see cref="Searcher.Search(Query,int)" />) is
+ /// usually more efficient, as it skips non-high-scoring hits.
+ ///
+ /// </summary>
+ /// <param name="weight">to match documents
+ /// </param>
+ /// <param name="filter">if non-null, used to permit documents to be collected.
+ /// </param>
+ /// <param name="collector">to receive hits
+ /// </param>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ void Search(Weight weight, Filter filter, Collector collector);
+
+ /// <summary>Frees resources associated with this Searcher.
+ /// Be careful not to call this method while you are still using objects
+ /// that reference this searchable
+ /// </summary>
+ void Close();
+
+ /// <summary>Expert: Returns the number of documents containing <c>term</c>.
+ /// Called by search code to compute term weights.
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.DocFreq(Term)">
+ /// </seealso>
+ int DocFreq(Term term);
+
+ /// <summary>Expert: For each term in the terms array, calculates the number of
+ /// documents containing <c>term</c>. Returns an array with these
+ /// document frequencies. Used to minimize number of remote calls.
+ /// </summary>
+ int[] DocFreqs(Term[] terms);
+
+ /// <summary>Expert: Returns one greater than the largest possible document number.
+ /// Called by search code to compute term weights.
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.MaxDoc">
+ /// </seealso>
+ int MaxDoc { get; }
+
+ /// <summary>
+ /// Expert: Low-level search implementation. Finds the top <c>n</c>
+ /// hits for <c>query</c>, applying <c>filter</c> if non-null.
+ ///
+ /// <p/>Applications should usually call <see cref="Searcher.Search(Query, int)" /> or
+ /// <see cref="Searcher.Search(Query,Filter,int)" /> instead.
+ /// </summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ TopDocs Search(Weight weight, Filter filter, int n);
+
+ /// <summary>Expert: Returns the stored fields of document <c>i</c>.</summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.Document(int)" />
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ Document Doc(int i);
+
+ /// <summary> Get the <see cref="Lucene.Net.Documents.Document" />at the <c>n</c><sup>th</sup> position. The <see cref="Lucene.Net.Documents.FieldSelector"/>
+ /// may be used to determine what <see cref="Lucene.Net.Documents.Field" />s to load and how they should be loaded.
+ ///
+ /// <b>NOTE:</b> If the underlying Reader (more specifically, the underlying <c>FieldsReader</c>) is closed before the lazy <see cref="Lucene.Net.Documents.Field" /> is
+ /// loaded an exception may be thrown. If you want the value of a lazy <see cref="Lucene.Net.Documents.Field" /> to be available after closing you must
+ /// explicitly load it or fetch the Document again with a new loader.
+ ///
+ ///
+ /// </summary>
+ /// <param name="n">Get the document at the <c>n</c><sup>th</sup> position
+ /// </param>
+ /// <param name="fieldSelector">The <see cref="Lucene.Net.Documents.FieldSelector" /> to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded.
+ /// </param>
+ /// <returns> The stored fields of the <see cref="Lucene.Net.Documents.Document" /> at the nth position
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ /// <summary>
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Index.IndexReader.Document(int, FieldSelector)">
+ /// </seealso>
+ /// <seealso cref="IFieldable">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Documents.FieldSelector">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Documents.SetBasedFieldSelector">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Documents.LoadFirstFieldSelector">
+ /// </seealso>
+ Document Doc(int n, FieldSelector fieldSelector);
+
+ /// <summary>Expert: called to re-write queries into primitive queries.</summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ Query Rewrite(Query query);
+
+ /// <summary>Expert: low-level implementation method
+ /// Returns an Explanation that describes how <c>doc</c> scored against
+ /// <c>weight</c>.
+ ///
+ /// <p/>This is intended to be used in developing Similarity implementations,
+ /// and, for good performance, should not be displayed with every hit.
+ /// Computing an explanation is as expensive as executing the query over the
+ /// entire index.
+ /// <p/>Applications should call <see cref="Searcher.Explain(Query, int)" />.
+ /// </summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ Explanation Explain(Weight weight, int doc);
+
+ /// <summary>Expert: Low-level search implementation with arbitrary sorting. Finds
+ /// the top <c>n</c> hits for <c>query</c>, applying
+ /// <c>filter</c> if non-null, and sorting the hits by the criteria in
+ /// <c>sort</c>.
+ ///
+ /// <p/>Applications should usually call
+ /// <see cref="Searcher.Search(Query,Filter,int,Sort)" /> instead.
+ ///
+ /// </summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Searcher.cs b/src/core/Search/Searcher.cs
new file mode 100644
index 0000000..a545e11
--- /dev/null
+++ b/src/core/Search/Searcher.cs
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Document = Lucene.Net.Documents.Document;
+using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
+using Term = Lucene.Net.Index.Term;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> An abstract base class for search implementations. Implements the main search
+ /// methods.
+ ///
+ /// <p/>
+ /// Note that you can only access hits from a Searcher as long as it is not yet
+ /// closed, otherwise an IOException will be thrown.
+ /// </summary>
+ public abstract class Searcher : System.MarshalByRefObject, Searchable, System.IDisposable
+ {
+ protected Searcher()
+ {
+ InitBlock();
+ }
+ private void InitBlock()
+ {
+ similarity = Net.Search.Similarity.Default;
+ }
+
+ /// <summary>Search implementation with arbitrary sorting. Finds
+ /// the top <c>n</c> hits for <c>query</c>, applying
+ /// <c>filter</c> if non-null, and sorting the hits by the criteria in
+ /// <c>sort</c>.
+ ///
+ /// <p/>NOTE: this does not compute scores by default; use
+ /// <see cref="IndexSearcher.SetDefaultFieldSortScoring(bool,bool)" /> to enable scoring.
+ ///
+ /// </summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ public virtual TopFieldDocs Search(Query query, Filter filter, int n, Sort sort)
+ {
+ return Search(CreateWeight(query), filter, n, sort);
+ }
+
+ /// <summary>Lower-level search API.
+ ///
+ /// <p/><see cref="Collector.Collect(int)" /> is called for every matching document.
+ ///
+ /// <p/>Applications should only use this if they need <i>all</i> of the matching
+ /// documents. The high-level search API (<see cref="Searcher.Search(Query, int)" />
+ /// ) is usually more efficient, as it skips non-high-scoring hits.
+ /// <p/>Note: The <c>score</c> passed to this method is a raw score.
+ /// In other words, the score will not necessarily be a float whose value is
+ /// between 0 and 1.
+ /// </summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ public virtual void Search(Query query, Collector results)
+ {
+ Search(CreateWeight(query), null, results);
+ }
+
+ /// <summary>Lower-level search API.
+ ///
+ /// <p/><see cref="Collector.Collect(int)" /> is called for every matching
+ /// document.
+ /// <br/>Collector-based access to remote indexes is discouraged.
+ ///
+ /// <p/>Applications should only use this if they need <i>all</i> of the
+ /// matching documents. The high-level search API (<see cref="Searcher.Search(Query, Filter, int)" />)
+ /// is usually more efficient, as it skips
+ /// non-high-scoring hits.
+ ///
+ /// </summary>
+ /// <param name="query">to match documents
+ /// </param>
+ /// <param name="filter">if non-null, used to permit documents to be collected.
+ /// </param>
+ /// <param name="results">to receive hits
+ /// </param>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ public virtual void Search(Query query, Filter filter, Collector results)
+ {
+ Search(CreateWeight(query), filter, results);
+ }
+
+ /// <summary>Finds the top <c>n</c>
+ /// hits for <c>query</c>, applying <c>filter</c> if non-null.
+ ///
+ /// </summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ public virtual TopDocs Search(Query query, Filter filter, int n)
+ {
+ return Search(CreateWeight(query), filter, n);
+ }
+
+ /// <summary>Finds the top <c>n</c>
+ /// hits for <c>query</c>.
+ ///
+ /// </summary>
+ /// <throws> BooleanQuery.TooManyClauses </throws>
+ public virtual TopDocs Search(Query query, int n)
+ {
+ return Search(query, null, n);
+ }
+
+ /// <summary>Returns an Explanation that describes how <c>doc</c> scored against
+ /// <c>query</c>.
+ ///
+ /// <p/>This is intended to be used in developing Similarity implementations,
+ /// and, for good performance, should not be displayed with every hit.
+ /// Computing an explanation is as expensive as executing the query over the
+ /// entire index.
+ /// </summary>
+ public virtual Explanation Explain(Query query, int doc)
+ {
+ return Explain(CreateWeight(query), doc);
+ }
+
+ /// <summary>The Similarity implementation used by this searcher. </summary>
+ private Similarity similarity;
+
+ /// <summary>Expert: Gets or Sets the Similarity implementation used by this Searcher.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Search.Similarity.Default">
+ /// </seealso>
+ public virtual Similarity Similarity
+ {
+ get { return this.similarity; }
+ set { this.similarity = value; }
+ }
+
+ /// <summary> creates a weight for <c>query</c></summary>
+ /// <returns> new weight
+ /// </returns>
+ public /*protected internal*/ virtual Weight CreateWeight(Query query)
+ {
+ return query.Weight(this);
+ }
+
+ // inherit javadoc
+ public virtual int[] DocFreqs(Term[] terms)
+ {
+ int[] result = new int[terms.Length];
+ for (int i = 0; i < terms.Length; i++)
+ {
+ result[i] = DocFreq(terms[i]);
+ }
+ return result;
+ }
+
+ public abstract void Search(Weight weight, Filter filter, Collector results);
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+
+ public abstract int DocFreq(Term term);
+ public abstract int MaxDoc { get; }
+ public abstract TopDocs Search(Weight weight, Filter filter, int n);
+ public abstract Document Doc(int i);
+ public abstract Document Doc(int docid, FieldSelector fieldSelector);
+ public abstract Query Rewrite(Query query);
+ public abstract Explanation Explain(Weight weight, int doc);
+ public abstract TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort);
+ /* End patch for GCJ bug #15411. */
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Similarity.cs b/src/core/Search/Similarity.cs
new file mode 100644
index 0000000..18583e3
--- /dev/null
+++ b/src/core/Search/Similarity.cs
@@ -0,0 +1,697 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Documents;
+using FieldInvertState = Lucene.Net.Index.FieldInvertState;
+using Term = Lucene.Net.Index.Term;
+using SmallFloat = Lucene.Net.Util.SmallFloat;
+using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: Scoring API.
+ /// <p/>Subclasses implement search scoring.
+ ///
+ /// <p/>The score of query <c>q</c> for document <c>d</c> correlates to the
+ /// cosine-distance or dot-product between document and query vectors in a
+ /// <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">
+ /// Vector Space Model (VSM) of Information Retrieval</a>.
+ /// A document whose vector is closer to the query vector in that model is scored higher.
+ ///
+ /// The score is computed as follows:
+ ///
+ /// <p/>
+ /// <table cellpadding="1" cellspacing="0" border="1" align="center">
+ /// <tr><td>
+ /// <table cellpadding="1" cellspacing="0" border="0" align="center">
+ /// <tr>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// score(q,d) &#160; = &#160;
+ /// <A HREF="#formula_coord">coord(q,d)</A> &#160;&#183;&#160;
+ /// <A HREF="#formula_queryNorm">queryNorm(q)</A> &#160;&#183;&#160;
+ /// </td>
+ /// <td valign="bottom" align="center" rowspan="1">
+ /// <big><big><big>&#8721;</big></big></big>
+ /// </td>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// <big><big>(</big></big>
+ /// <A HREF="#formula_tf">tf(t in d)</A> &#160;&#183;&#160;
+ /// <A HREF="#formula_idf">idf(t)</A><sup>2</sup> &#160;&#183;&#160;
+ /// <A HREF="#formula_termBoost">t.Boost</A>&#160;&#183;&#160;
+ /// <A HREF="#formula_norm">norm(t,d)</A>
+ /// <big><big>)</big></big>
+ /// </td>
+ /// </tr>
+ /// <tr valigh="top">
+ /// <td></td>
+ /// <td align="center"><small>t in q</small></td>
+ /// <td></td>
+ /// </tr>
+ /// </table>
+ /// </td></tr>
+ /// </table>
+ ///
+ /// <p/> where
+ /// <list type="bullet">
+ /// <item>
+ /// <A NAME="formula_tf"></A>
+ /// <b>tf(t in d)</b>
+ /// correlates to the term's <i>frequency</i>,
+ /// defined as the number of times term <i>t</i> appears in the currently scored document <i>d</i>.
+ /// Documents that have more occurrences of a given term receive a higher score.
+ /// The default computation for <i>tf(t in d)</i> in
+ /// <see cref="Lucene.Net.Search.DefaultSimilarity.Tf(float)">DefaultSimilarity</see> is:
+ ///
+ /// <br/>&#160;<br/>
+ /// <table cellpadding="2" cellspacing="2" border="0" align="center">
+ /// <tr>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// <see cref="Lucene.Net.Search.DefaultSimilarity.Tf(float)">tf(t in d)</see> &#160; = &#160;
+ /// </td>
+ /// <td valign="top" align="center" rowspan="1">
+ /// frequency<sup><big>&#189;</big></sup>
+ /// </td>
+ /// </tr>
+ /// </table>
+ /// <br/>&#160;<br/>
+ /// </item>
+ ///
+ /// <item>
+ /// <A NAME="formula_idf"></A>
+ /// <b>idf(t)</b> stands for Inverse Document Frequency. This value
+ /// correlates to the inverse of <i>docFreq</i>
+ /// (the number of documents in which the term <i>t</i> appears).
+ /// This means rarer terms give higher contribution to the total score.
+ /// The default computation for <i>idf(t)</i> in
+ /// <see cref="Lucene.Net.Search.DefaultSimilarity.Idf(int, int)">DefaultSimilarity</see> is:
+ ///
+ /// <br/>&#160;<br/>
+ /// <table cellpadding="2" cellspacing="2" border="0" align="center">
+ /// <tr>
+ /// <td valign="middle" align="right">
+ /// <see cref="Lucene.Net.Search.DefaultSimilarity.Idf(int, int)">idf(t)</see>&#160; = &#160;
+ /// </td>
+ /// <td valign="middle" align="center">
+ /// 1 + log <big>(</big>
+ /// </td>
+ /// <td valign="middle" align="center">
+ /// <table>
+ /// <tr><td align="center"><small>numDocs</small></td></tr>
+ /// <tr><td align="center">&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;</td></tr>
+ /// <tr><td align="center"><small>docFreq+1</small></td></tr>
+ /// </table>
+ /// </td>
+ /// <td valign="middle" align="center">
+ /// <big>)</big>
+ /// </td>
+ /// </tr>
+ /// </table>
+ /// <br/>&#160;<br/>
+ /// </item>
+ ///
+ /// <item>
+ /// <A NAME="formula_coord"></A>
+ /// <b>coord(q,d)</b>
+ /// is a score factor based on how many of the query terms are found in the specified document.
+ /// Typically, a document that contains more of the query's terms will receive a higher score
+ /// than another document with fewer query terms.
+ /// This is a search time factor computed in
+ /// <see cref="Coord(int, int)">coord(q,d)</see>
+ /// by the Similarity in effect at search time.
+ /// <br/>&#160;<br/>
+ /// </item>
+ ///
+ /// <item><b>
+ /// <A NAME="formula_queryNorm"></A>
+ /// queryNorm(q)
+ /// </b>
+ /// is a normalizing factor used to make scores between queries comparable.
+ /// This factor does not affect document ranking (since all ranked documents are multiplied by the same factor),
+ /// but rather just attempts to make scores from different queries (or even different indexes) comparable.
+ /// This is a search time factor computed by the Similarity in effect at search time.
+ ///
+ /// The default computation in
+ /// <see cref="Lucene.Net.Search.DefaultSimilarity.QueryNorm(float)">DefaultSimilarity</see>
+ /// is:
+ /// <br/>&#160;<br/>
+ /// <table cellpadding="1" cellspacing="0" border="0" align="center">
+ /// <tr>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// queryNorm(q) &#160; = &#160;
+ /// <see cref="Lucene.Net.Search.DefaultSimilarity.QueryNorm(float)">queryNorm(sumOfSquaredWeights)</see>
+ /// &#160; = &#160;
+ /// </td>
+ /// <td valign="middle" align="center" rowspan="1">
+ /// <table>
+ /// <tr><td align="center"><big>1</big></td></tr>
+ /// <tr><td align="center"><big>
+ /// &#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;&#8211;
+ /// </big></td></tr>
+ /// <tr><td align="center">sumOfSquaredWeights<sup><big>&#189;</big></sup></td></tr>
+ /// </table>
+ /// </td>
+ /// </tr>
+ /// </table>
+ /// <br/>&#160;<br/>
+ ///
+ /// The sum of squared weights (of the query terms) is
+ /// computed by the query <see cref="Lucene.Net.Search.Weight" /> object.
+ /// For example, a <see cref="Lucene.Net.Search.BooleanQuery">boolean query</see>
+ /// computes this value as:
+ ///
+ /// <br/>&#160;<br/>
+ /// <table cellpadding="1" cellspacing="0" border="0" align="center">
+ /// <tr>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// <see cref="Lucene.Net.Search.Weight.GetSumOfSquaredWeights">GetSumOfSquaredWeights</see> &#160; = &#160;
+ /// <see cref="Lucene.Net.Search.Query.Boost">q.Boost</see> <sup><big>2</big></sup>
+ /// &#160;&#183;&#160;
+ /// </td>
+ /// <td valign="bottom" align="center" rowspan="1">
+ /// <big><big><big>&#8721;</big></big></big>
+ /// </td>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// <big><big>(</big></big>
+ /// <A HREF="#formula_idf">idf(t)</A> &#160;&#183;&#160;
+ /// <A HREF="#formula_termBoost">t.Boost</A>
+ /// <big><big>) <sup>2</sup> </big></big>
+ /// </td>
+ /// </tr>
+ /// <tr valigh="top">
+ /// <td></td>
+ /// <td align="center"><small>t in q</small></td>
+ /// <td></td>
+ /// </tr>
+ /// </table>
+ /// <br/>&#160;<br/>
+ ///
+ /// </item>
+ ///
+ /// <item>
+ /// <A NAME="formula_termBoost"></A>
+ /// <b>t.Boost</b>
+ /// is a search time boost of term <i>t</i> in the query <i>q</i> as
+ /// specified in the query text
+ /// (see <A HREF="../../../../../../queryparsersyntax.html#Boosting a Term">query syntax</A>),
+ /// or as set by application calls to
+ /// <see cref="Lucene.Net.Search.Query.Boost" />.
+ /// Notice that there is really no direct API for accessing a boost of one term in a multi term query,
+ /// but rather multi terms are represented in a query as multi
+ /// <see cref="Lucene.Net.Search.TermQuery">TermQuery</see> objects,
+ /// and so the boost of a term in the query is accessible by calling the sub-query
+ /// <see cref="Lucene.Net.Search.Query.Boost" />.
+ /// <br/>&#160;<br/>
+ /// </item>
+ ///
+ /// <item>
+ /// <A NAME="formula_norm"></A>
+ /// <b>norm(t,d)</b> encapsulates a few (indexing time) boost and length factors:
+ ///
+ /// <list type="bullet">
+ /// <item><b>Document boost</b> - set by calling
+ /// <see cref="Lucene.Net.Documents.Document.Boost">doc.Boost</see>
+ /// before adding the document to the index.
+ /// </item>
+ /// <item><b>Field boost</b> - set by calling
+ /// <see cref="IFieldable.Boost">field.Boost</see>
+ /// before adding the field to a document.
+ /// </item>
+ /// <item><see cref="LengthNorm(String, int)">LengthNorm(field)</see> - computed
+ /// when the document is added to the index in accordance with the number of tokens
+ /// of this field in the document, so that shorter fields contribute more to the score.
+ /// LengthNorm is computed by the Similarity class in effect at indexing.
+ /// </item>
+ /// </list>
+ ///
+ /// <p/>
+ /// When a document is added to the index, all the above factors are multiplied.
+ /// If the document has multiple fields with the same name, all their boosts are multiplied together:
+ ///
+ /// <br/>&#160;<br/>
+ /// <table cellpadding="1" cellspacing="0" border="0" align="center">
+ /// <tr>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// norm(t,d) &#160; = &#160;
+ /// <see cref="Lucene.Net.Documents.Document.Boost">doc.Boost</see>
+ /// &#160;&#183;&#160;
+ /// <see cref="LengthNorm(String, int)">LengthNorm(field)</see>
+ /// &#160;&#183;&#160;
+ /// </td>
+ /// <td valign="bottom" align="center" rowspan="1">
+ /// <big><big><big>&#8719;</big></big></big>
+ /// </td>
+ /// <td valign="middle" align="right" rowspan="1">
+ /// <see cref="IFieldable.Boost">field.Boost</see>
+ /// </td>
+ /// </tr>
+ /// <tr valigh="top">
+ /// <td></td>
+ /// <td align="center"><small>field <i><b>f</b></i> in <i>d</i> named as <i><b>t</b></i></small></td>
+ /// <td></td>
+ /// </tr>
+ /// </table>
+ /// <br/>&#160;<br/>
+ /// However the resulted <i>norm</i> value is <see cref="EncodeNorm(float)">encoded</see> as a single byte
+ /// before being stored.
+ /// At search time, the norm byte value is read from the index
+ /// <see cref="Lucene.Net.Store.Directory">directory</see> and
+ /// <see cref="DecodeNorm(byte)">decoded</see> back to a float <i>norm</i> value.
+ /// This encoding/decoding, while reducing index size, comes with the price of
+ /// precision loss - it is not guaranteed that decode(encode(x)) = x.
+ /// For instance, decode(encode(0.89)) = 0.75.
+ /// Also notice that search time is too late to modify this <i>norm</i> part of scoring, e.g. by
+ /// using a different <see cref="Similarity" /> for search.
+ /// <br/>&#160;<br/>
+ /// </item>
+ /// </list>
+ ///
+ /// </summary>
+ /// <seealso cref="Default">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Index.IndexWriter.Similarity">
+ /// </seealso>
+ /// <seealso cref="Searcher.Similarity">
+ /// </seealso>
+ [Serializable]
+ public abstract class Similarity
+ {
+ protected Similarity()
+ {
+ InitBlock();
+ }
+ [Serializable]
+ private class AnonymousClassIDFExplanation1:IDFExplanation
+ {
+ public AnonymousClassIDFExplanation1(int df, int max, float idf, Similarity enclosingInstance)
+ {
+ InitBlock(df, max, idf, enclosingInstance);
+ }
+ private void InitBlock(int df, int max, float idf, Similarity enclosingInstance)
+ {
+ this.df = df;
+ this.max = max;
+ this.idf = idf;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private int df;
+ private int max;
+ private float idf;
+ private Similarity enclosingInstance;
+ public Similarity Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ //@Override
+ public override System.String Explain()
+ {
+ return "idf(docFreq=" + df + ", maxDocs=" + max + ")";
+ }
+ //@Override
+
+ public override float Idf
+ {
+ get { return idf; }
+ }
+ }
+ [Serializable]
+ private class AnonymousClassIDFExplanation3:IDFExplanation
+ {
+ public AnonymousClassIDFExplanation3(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance)
+ {
+ InitBlock(fIdf, exp, enclosingInstance);
+ }
+ private void InitBlock(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance)
+ {
+ this.fIdf = fIdf;
+ this.exp = exp;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private float fIdf;
+ private System.Text.StringBuilder exp;
+ private Similarity enclosingInstance;
+ public Similarity Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ //@Override
+
+ public override float Idf
+ {
+ get { return fIdf; }
+ }
+
+ //@Override
+ public override System.String Explain()
+ {
+ return exp.ToString();
+ }
+ }
+ private void InitBlock()
+ {
+
+ }
+
+ /// <summary>The Similarity implementation used by default.</summary>
+ private static Similarity defaultImpl = new DefaultSimilarity();
+ public const int NO_DOC_ID_PROVIDED = - 1;
+
+ /// <summary>Gets or sets the default Similarity implementation
+ /// used by indexing and search code.
+ /// <p/>This is initially an instance of <see cref="DefaultSimilarity" />.
+ /// </summary>
+ /// <seealso cref="Searcher.Similarity">
+ /// </seealso>
+ /// <seealso cref="Lucene.Net.Index.IndexWriter.SetSimilarity(Similarity)">
+ /// </seealso>
+ public static Similarity Default
+ {
+ get { return defaultImpl; }
+ set { defaultImpl = value; }
+ }
+
+ /// <summary>Cache of decoded bytes. </summary>
+ private static readonly float[] NORM_TABLE = new float[256];
+
+ /// <summary>Decodes a normalization factor stored in an index.</summary>
+ /// <seealso cref="EncodeNorm(float)">
+ /// </seealso>
+ public static float DecodeNorm(byte b)
+ {
+ return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127
+ }
+
+ /// <summary>Returns a table for decoding normalization bytes.</summary>
+ /// <seealso cref="EncodeNorm(float)">
+ /// </seealso>
+ public static float[] GetNormDecoder()
+ {
+ return NORM_TABLE;
+ }
+
+ /// <summary> Compute the normalization value for a field, given the accumulated
+ /// state of term processing for this field (see <see cref="FieldInvertState" />).
+ ///
+ /// <p/>Implementations should calculate a float value based on the field
+ /// state and then return that value.
+ ///
+ /// <p/>For backward compatibility this method by default calls
+ /// <see cref="LengthNorm(String, int)" /> passing
+ /// <see cref="FieldInvertState.Length" /> as the second argument, and
+ /// then multiplies this value by <see cref="FieldInvertState.Boost" />.<p/>
+ ///
+ /// <p/><b>WARNING</b>: This API is new and experimental and may
+ /// suddenly change.<p/>
+ ///
+ /// </summary>
+ /// <param name="field">field name
+ /// </param>
+ /// <param name="state">current processing state for this field
+ /// </param>
+ /// <returns> the calculated float norm
+ /// </returns>
+ public virtual float ComputeNorm(System.String field, FieldInvertState state)
+ {
+ return (float) (state.Boost * LengthNorm(field, state.Length));
+ }
+
+ /// <summary>Computes the normalization value for a field given the total number of
+ /// terms contained in a field. These values, together with field boosts, are
+ /// stored in an index and multipled into scores for hits on each field by the
+ /// search code.
+ ///
+ /// <p/>Matches in longer fields are less precise, so implementations of this
+ /// method usually return smaller values when <c>numTokens</c> is large,
+ /// and larger values when <c>numTokens</c> is small.
+ ///
+ /// <p/>Note that the return values are computed under
+ /// <see cref="Lucene.Net.Index.IndexWriter.AddDocument(Lucene.Net.Documents.Document)" />
+ /// and then stored using
+ /// <see cref="EncodeNorm(float)" />.
+ /// Thus they have limited precision, and documents
+ /// must be re-indexed if this method is altered.
+ ///
+ /// </summary>
+ /// <param name="fieldName">the name of the field
+ /// </param>
+ /// <param name="numTokens">the total number of tokens contained in fields named
+ /// <i>fieldName</i> of <i>doc</i>.
+ /// </param>
+ /// <returns> a normalization factor for hits on this field of this document
+ ///
+ /// </returns>
+ /// <seealso cref="Lucene.Net.Documents.AbstractField.Boost" />
+ public abstract float LengthNorm(System.String fieldName, int numTokens);
+
+ /// <summary>Computes the normalization value for a query given the sum of the squared
+ /// weights of each of the query terms. This value is then multipled into the
+ /// weight of each query term.
+ ///
+ /// <p/>This does not affect ranking, but rather just attempts to make scores
+ /// from different queries comparable.
+ ///
+ /// </summary>
+ /// <param name="sumOfSquaredWeights">the sum of the squares of query term weights
+ /// </param>
+ /// <returns> a normalization factor for query weights
+ /// </returns>
+ public abstract float QueryNorm(float sumOfSquaredWeights);
+
+ /// <summary>Encodes a normalization factor for storage in an index.
+ ///
+ /// <p/>The encoding uses a three-bit mantissa, a five-bit exponent, and
+ /// the zero-exponent point at 15, thus
+ /// representing values from around 7x10^9 to 2x10^-9 with about one
+ /// significant decimal digit of accuracy. Zero is also represented.
+ /// Negative numbers are rounded up to zero. Values too large to represent
+ /// are rounded down to the largest representable value. Positive values too
+ /// small to represent are rounded up to the smallest positive representable
+ /// value.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.Documents.AbstractField.Boost" />
+ /// <seealso cref="Lucene.Net.Util.SmallFloat" />
+ public static byte EncodeNorm(float f)
+ {
+ return (byte) SmallFloat.FloatToByte315(f);
+ }
+
+
+ /// <summary>Computes a score factor based on a term or phrase's frequency in a
+ /// document. This value is multiplied by the <see cref="Idf(int, int)" />
+ /// factor for each term in the query and these products are then summed to
+ /// form the initial score for a document.
+ ///
+ /// <p/>Terms and phrases repeated in a document indicate the topic of the
+ /// document, so implementations of this method usually return larger values
+ /// when <c>freq</c> is large, and smaller values when <c>freq</c>
+ /// is small.
+ ///
+ /// <p/>The default implementation calls <see cref="Tf(float)" />.
+ ///
+ /// </summary>
+ /// <param name="freq">the frequency of a term within a document
+ /// </param>
+ /// <returns> a score factor based on a term's within-document frequency
+ /// </returns>
+ public virtual float Tf(int freq)
+ {
+ return Tf((float) freq);
+ }
+
+ /// <summary>Computes the amount of a sloppy phrase match, based on an edit distance.
+ /// This value is summed for each sloppy phrase match in a document to form
+ /// the frequency that is passed to <see cref="Tf(float)" />.
+ ///
+ /// <p/>A phrase match with a small edit distance to a document passage more
+ /// closely matches the document, so implementations of this method usually
+ /// return larger values when the edit distance is small and smaller values
+ /// when it is large.
+ ///
+ /// </summary>
+ /// <seealso cref="PhraseQuery.Slop" />
+ /// <param name="distance">the edit distance of this sloppy phrase match </param>
+ /// <returns> the frequency increment for this match </returns>
+ public abstract float SloppyFreq(int distance);
+
+ /// <summary>Computes a score factor based on a term or phrase's frequency in a
+ /// document. This value is multiplied by the <see cref="Idf(int, int)" />
+ /// factor for each term in the query and these products are then summed to
+ /// form the initial score for a document.
+ ///
+ /// <p/>Terms and phrases repeated in a document indicate the topic of the
+ /// document, so implementations of this method usually return larger values
+ /// when <c>freq</c> is large, and smaller values when <c>freq</c>
+ /// is small.
+ ///
+ /// </summary>
+ /// <param name="freq">the frequency of a term within a document
+ /// </param>
+ /// <returns> a score factor based on a term's within-document frequency
+ /// </returns>
+ public abstract float Tf(float freq);
+
+ /// <summary> Computes a score factor for a simple term and returns an explanation
+ /// for that score factor.
+ ///
+ /// <p/>
+ /// The default implementation uses:
+ ///
+ /// <code>
+ /// idf(searcher.docFreq(term), searcher.MaxDoc);
+ /// </code>
+ ///
+ /// Note that <see cref="Searcher.MaxDoc" /> is used instead of
+ /// <see cref="Lucene.Net.Index.IndexReader.NumDocs()" /> because it is
+ /// proportional to <see cref="Searcher.DocFreq(Term)" /> , i.e., when one is
+ /// inaccurate, so is the other, and in the same direction.
+ ///
+ /// </summary>
+ /// <param name="term">the term in question
+ /// </param>
+ /// <param name="searcher">the document collection being searched
+ /// </param>
+ /// <returns> an IDFExplain object that includes both an idf score factor
+ /// and an explanation for the term.
+ /// </returns>
+ /// <throws> IOException </throws>
+ public virtual IDFExplanation IdfExplain(Term term, Searcher searcher)
+ {
+ int df = searcher.DocFreq(term);
+ int max = searcher.MaxDoc;
+ float idf2 = Idf(df, max);
+ return new AnonymousClassIDFExplanation1(df, max, idf2, this);
+ }
+
+ /// <summary> Computes a score factor for a phrase.
+ ///
+ /// <p/>
+ /// The default implementation sums the idf factor for
+ /// each term in the phrase.
+ ///
+ /// </summary>
+ /// <param name="terms">the terms in the phrase
+ /// </param>
+ /// <param name="searcher">the document collection being searched
+ /// </param>
+ /// <returns> an IDFExplain object that includes both an idf
+ /// score factor for the phrase and an explanation
+ /// for each term.
+ /// </returns>
+ /// <throws> IOException </throws>
+ public virtual IDFExplanation IdfExplain(ICollection<Term> terms, Searcher searcher)
+ {
+ int max = searcher.MaxDoc;
+ float idf2 = 0.0f;
+ System.Text.StringBuilder exp = new System.Text.StringBuilder();
+ foreach (Term term in terms)
+ {
+ int df = searcher.DocFreq(term);
+ idf2 += Idf(df, max);
+ exp.Append(" ");
+ exp.Append(term.Text);
+ exp.Append("=");
+ exp.Append(df);
+ }
+ float fIdf = idf2;
+ return new AnonymousClassIDFExplanation3(fIdf, exp, this);
+ }
+
+ /// <summary>Computes a score factor based on a term's document frequency (the number
+ /// of documents which contain the term). This value is multiplied by the
+ /// <see cref="Tf(int)" /> factor for each term in the query and these products are
+ /// then summed to form the initial score for a document.
+ ///
+ /// <p/>Terms that occur in fewer documents are better indicators of topic, so
+ /// implementations of this method usually return larger values for rare terms,
+ /// and smaller values for common terms.
+ ///
+ /// </summary>
+ /// <param name="docFreq">the number of documents which contain the term
+ /// </param>
+ /// <param name="numDocs">the total number of documents in the collection
+ /// </param>
+ /// <returns> a score factor based on the term's document frequency
+ /// </returns>
+ public abstract float Idf(int docFreq, int numDocs);
+
+ /// <summary>Computes a score factor based on the fraction of all query terms that a
+ /// document contains. This value is multiplied into scores.
+ ///
+ /// <p/>The presence of a large portion of the query terms indicates a better
+ /// match with the query, so implementations of this method usually return
+ /// larger values when the ratio between these parameters is large and smaller
+ /// values when the ratio between them is small.
+ ///
+ /// </summary>
+ /// <param name="overlap">the number of query terms matched in the document
+ /// </param>
+ /// <param name="maxOverlap">the total number of terms in the query
+ /// </param>
+ /// <returns> a score factor based on term overlap with the query
+ /// </returns>
+ public abstract float Coord(int overlap, int maxOverlap);
+
+
+ /// <summary> Calculate a scoring factor based on the data in the payload. Overriding implementations
+ /// are responsible for interpreting what is in the payload. Lucene makes no assumptions about
+ /// what is in the byte array.
+ /// <p/>
+ /// The default implementation returns 1.
+ ///
+ /// </summary>
+ /// <param name="docId">The docId currently being scored. If this value is <see cref="NO_DOC_ID_PROVIDED" />, then it should be assumed that the PayloadQuery implementation does not provide document information
+ /// </param>
+ /// <param name="fieldName">The fieldName of the term this payload belongs to
+ /// </param>
+ /// <param name="start">The start position of the payload
+ /// </param>
+ /// <param name="end">The end position of the payload
+ /// </param>
+ /// <param name="payload">The payload byte array to be scored
+ /// </param>
+ /// <param name="offset">The offset into the payload array
+ /// </param>
+ /// <param name="length">The length in the array
+ /// </param>
+ /// <returns> An implementation dependent float to be used as a scoring factor
+ ///
+ /// </returns>
+ public virtual float ScorePayload(int docId, System.String fieldName, int start, int end, byte[] payload, int offset, int length)
+ {
+ return 1;
+ }
+
+ static Similarity()
+ {
+ {
+ for (int i = 0; i < 256; i++)
+ NORM_TABLE[i] = SmallFloat.Byte315ToFloat((byte) i);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/SimilarityDelegator.cs b/src/core/Search/SimilarityDelegator.cs
new file mode 100644
index 0000000..433fc0b
--- /dev/null
+++ b/src/core/Search/SimilarityDelegator.cs
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using FieldInvertState = Lucene.Net.Index.FieldInvertState;
+
+namespace Lucene.Net.Search
+{
+ /// <summary>Expert: Delegating scoring implementation. Useful in <see cref="Query.GetSimilarity(Searcher)" />
+ /// implementations, to override only certain
+ /// methods of a Searcher's Similiarty implementation..
+ /// </summary>
+ [Serializable]
+ public class SimilarityDelegator:Similarity
+ {
+ private Similarity delegee;
+
+ /// <summary>Construct a <see cref="Similarity" /> that delegates all methods to another.</summary>
+ /// <param name="delegee">the Similarity implementation to delegate to</param>
+ public SimilarityDelegator(Similarity delegee)
+ {
+ this.delegee = delegee;
+ }
+
+ public override float ComputeNorm(System.String fieldName, FieldInvertState state)
+ {
+ return delegee.ComputeNorm(fieldName, state);
+ }
+
+ public override float LengthNorm(System.String fieldName, int numTerms)
+ {
+ return delegee.LengthNorm(fieldName, numTerms);
+ }
+
+ public override float QueryNorm(float sumOfSquaredWeights)
+ {
+ return delegee.QueryNorm(sumOfSquaredWeights);
+ }
+
+ public override float Tf(float freq)
+ {
+ return delegee.Tf(freq);
+ }
+
+ public override float SloppyFreq(int distance)
+ {
+ return delegee.SloppyFreq(distance);
+ }
+
+ public override float Idf(int docFreq, int numDocs)
+ {
+ return delegee.Idf(docFreq, numDocs);
+ }
+
+ public override float Coord(int overlap, int maxOverlap)
+ {
+ return delegee.Coord(overlap, maxOverlap);
+ }
+
+ public override float ScorePayload(int docId, string fieldName, int start, int end, byte[] payload, int offset, int length)
+ {
+ return delegee.ScorePayload(docId, fieldName, start, end, payload, offset, length);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/SingleTermEnum.cs b/src/core/Search/SingleTermEnum.cs
new file mode 100644
index 0000000..f80b8d3
--- /dev/null
+++ b/src/core/Search/SingleTermEnum.cs
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search
+{
+ /// <summary>
+ /// Subclass of FilteredTermEnum for enumerating a single term.
+ /// <p/>
+ /// This can be used by <see cref="MultiTermQuery"/>s that need only visit one term,
+ /// but want to preserve MultiTermQuery semantics such as
+ /// <see cref="RewriteMethod"/>.
+ /// </summary>
+ public class SingleTermEnum : FilteredTermEnum
+ {
+ private Term singleTerm;
+ private bool _endEnum = false;
+
+ /// <summary>
+ /// Creates a new <c>SingleTermEnum</c>.
+ /// <p/>
+ /// After calling the constructor the enumeration is already pointing to the term,
+ /// if it exists.
+ /// </summary>
+ public SingleTermEnum(IndexReader reader, Term singleTerm)
+ {
+ this.singleTerm = singleTerm;
+ SetEnum(reader.Terms(singleTerm));
+ }
+
+ public override float Difference()
+ {
+ return 1.0F;
+ }
+
+ public override bool EndEnum()
+ {
+ return _endEnum;
+ }
+
+ protected internal override bool TermCompare(Term term)
+ {
+ if (term.Equals(singleTerm))
+ {
+ return true;
+ }
+ else
+ {
+ _endEnum = true;
+ return false;
+ }
+ }
+ }
+}
diff --git a/src/core/Search/SloppyPhraseScorer.cs b/src/core/Search/SloppyPhraseScorer.cs
new file mode 100644
index 0000000..2052c2b
--- /dev/null
+++ b/src/core/Search/SloppyPhraseScorer.cs
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Support;
+using TermPositions = Lucene.Net.Index.TermPositions;
+
+namespace Lucene.Net.Search
+{
+
+ sealed class SloppyPhraseScorer:PhraseScorer
+ {
+ private int slop;
+ private PhrasePositions[] repeats;
+ private PhrasePositions[] tmpPos; // for flipping repeating pps.
+ private bool checkedRepeats;
+
+ internal SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, int slop, byte[] norms):base(weight, tps, offsets, similarity, norms)
+ {
+ this.slop = slop;
+ }
+
+ /// <summary> Score a candidate doc for all slop-valid position-combinations (matches)
+ /// encountered while traversing/hopping the PhrasePositions.
+ /// <br/> The score contribution of a match depends on the distance:
+ /// <br/> - highest score for distance=0 (exact match).
+ /// <br/> - score gets lower as distance gets higher.
+ /// <br/>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
+ /// once for "a b" (distance=0), and once for "b a" (distance=2).
+ /// <br/>Possibly not all valid combinations are encountered, because for efficiency
+ /// we always propagate the least PhrasePosition. This allows to base on
+ /// PriorityQueue and move forward faster.
+ /// As result, for example, document "a b c b a"
+ /// would score differently for queries "a b c"~4 and "c b a"~4, although
+ /// they really are equivalent.
+ /// Similarly, for doc "a b c b a f g", query "c b"~2
+ /// would get same score as "g f"~2, although "c b"~2 could be matched twice.
+ /// We may want to fix this in the future (currently not, for performance reasons).
+ /// </summary>
+ protected internal override float PhraseFreq()
+ {
+ int end = InitPhrasePositions();
+
+ float freq = 0.0f;
+ bool done = (end < 0);
+ while (!done)
+ {
+ PhrasePositions pp = pq.Pop();
+ int start = pp.position;
+ int next = pq.Top().position;
+
+ bool tpsDiffer = true;
+ for (int pos = start; pos <= next || !tpsDiffer; pos = pp.position)
+ {
+ if (pos <= next && tpsDiffer)
+ start = pos; // advance pp to min window
+ if (!pp.NextPosition())
+ {
+ done = true; // ran out of a term -- done
+ break;
+ }
+ PhrasePositions pp2 = null;
+ tpsDiffer = !pp.repeats || (pp2 = TermPositionsDiffer(pp)) == null;
+ if (pp2 != null && pp2 != pp)
+ {
+ pp = Flip(pp, pp2); // flip pp to pp2
+ }
+ }
+
+ int matchLength = end - start;
+ if (matchLength <= slop)
+ freq += Similarity.SloppyFreq(matchLength); // score match
+
+ if (pp.position > end)
+ end = pp.position;
+ pq.Add(pp); // restore pq
+ }
+
+ return freq;
+ }
+
+ // flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back.
+ // assumes: pp!=pp2, pp2 in pq, pp not in pq.
+ // called only when there are repeating pps.
+ private PhrasePositions Flip(PhrasePositions pp, PhrasePositions pp2)
+ {
+ int n = 0;
+ PhrasePositions pp3;
+ //pop until finding pp2
+ while ((pp3 = pq.Pop()) != pp2)
+ {
+ tmpPos[n++] = pp3;
+ }
+ //insert back all but pp2
+ for (n--; n >= 0; n--)
+ {
+ pq.InsertWithOverflow(tmpPos[n]);
+ }
+ //insert pp back
+ pq.Add(pp);
+ return pp2;
+ }
+
+ /// <summary> Init PhrasePositions in place.
+ /// There is a one time initialization for this scorer:
+ /// <br/>- Put in repeats[] each pp that has another pp with same position in the doc.
+ /// <br/>- Also mark each such pp by pp.repeats = true.
+ /// <br/>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
+ /// In particular, this allows to score queries with no repetitions with no overhead due to this computation.
+ /// <br/>- Example 1 - query with no repetitions: "ho my"~2
+ /// <br/>- Example 2 - query with repetitions: "ho my my"~2
+ /// <br/>- Example 3 - query with repetitions: "my ho my"~2
+ /// <br/>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection.
+ /// </summary>
+ /// <returns> end (max position), or -1 if any term ran out (i.e. done)
+ /// </returns>
+ /// <throws> IOException </throws>
+ private int InitPhrasePositions()
+ {
+ int end = 0;
+
+ // no repeats at all (most common case is also the simplest one)
+ if (checkedRepeats && repeats == null)
+ {
+ // build queue from list
+ pq.Clear();
+ for (PhrasePositions pp = first; pp != null; pp = pp.next)
+ {
+ pp.FirstPosition();
+ if (pp.position > end)
+ end = pp.position;
+ pq.Add(pp); // build pq from list
+ }
+ return end;
+ }
+
+ // position the pp's
+ for (PhrasePositions pp = first; pp != null; pp = pp.next)
+ pp.FirstPosition();
+
+ // one time initializatin for this scorer
+ if (!checkedRepeats)
+ {
+ checkedRepeats = true;
+ // check for repeats
+ HashMap<PhrasePositions, object> m = null;
+ for (PhrasePositions pp = first; pp != null; pp = pp.next)
+ {
+ int tpPos = pp.position + pp.offset;
+ for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next)
+ {
+ int tpPos2 = pp2.position + pp2.offset;
+ if (tpPos2 == tpPos)
+ {
+ if (m == null)
+ {
+ m = new HashMap<PhrasePositions, object>();
+ }
+ pp.repeats = true;
+ pp2.repeats = true;
+ m[pp] = null;
+ m[pp2] = null;
+ }
+ }
+ }
+ if (m != null)
+ {
+ repeats = m.Keys.ToArray();
+ }
+ }
+
+ // with repeats must advance some repeating pp's so they all start with differing tp's
+ if (repeats != null)
+ {
+ for (int i = 0; i < repeats.Length; i++)
+ {
+ PhrasePositions pp = repeats[i];
+ PhrasePositions pp2;
+ while ((pp2 = TermPositionsDiffer(pp)) != null)
+ {
+ if (!pp2.NextPosition())
+ // out of pps that do not differ, advance the pp with higher offset
+ return - 1; // ran out of a term -- done
+ }
+ }
+ }
+
+ // build queue from list
+ pq.Clear();
+ for (PhrasePositions pp = first; pp != null; pp = pp.next)
+ {
+ if (pp.position > end)
+ end = pp.position;
+ pq.Add(pp); // build pq from list
+ }
+
+ if (repeats != null)
+ {
+ tmpPos = new PhrasePositions[pq.Size()];
+ }
+ return end;
+ }
+
+ /// <summary> We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences
+ /// in the query of the same word would go elsewhere in the matched doc.
+ /// </summary>
+ /// <returns> null if differ (i.e. valid) otherwise return the higher offset PhrasePositions
+ /// out of the first two PPs found to not differ.
+ /// </returns>
+ private PhrasePositions TermPositionsDiffer(PhrasePositions pp)
+ {
+ // efficiency note: a more efficient implementation could keep a map between repeating
+ // pp's, so that if pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats
+ // of term2, pp2a would only be checked against pp2b but not against pp1a, pp1b, pp1c.
+ // However this would complicate code, for a rather rare case, so choice is to compromise here.
+ int tpPos = pp.position + pp.offset;
+ for (int i = 0; i < repeats.Length; i++)
+ {
+ PhrasePositions pp2 = repeats[i];
+ if (pp2 == pp)
+ continue;
+ int tpPos2 = pp2.position + pp2.offset;
+ if (tpPos2 == tpPos)
+ return pp.offset > pp2.offset?pp:pp2; // do not differ: return the one with higher offset.
+ }
+ return null;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Sort.cs b/src/core/Search/Sort.cs
new file mode 100644
index 0000000..2a837eb
--- /dev/null
+++ b/src/core/Search/Sort.cs
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Search
+{
+
+
+ /// <summary> Encapsulates sort criteria for returned hits.
+ ///
+ /// <p/>The fields used to determine sort order must be carefully chosen.
+ /// Documents must contain a single term in such a field,
+ /// and the value of the term should indicate the document's relative position in
+ /// a given sort order. The field must be indexed, but should not be tokenized,
+ /// and does not need to be stored (unless you happen to want it back with the
+ /// rest of your document data). In other words:
+ ///
+ /// <p/><c>document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.NOT_ANALYZED));</c><p/>
+ ///
+ ///
+ /// <p/><h3>Valid Types of Values</h3>
+ ///
+ /// <p/>There are four possible kinds of term values which may be put into
+ /// sorting fields: Integers, Longs, Floats, or Strings. Unless
+ /// <see cref="SortField">SortField</see> objects are specified, the type of value
+ /// in the field is determined by parsing the first term in the field.
+ ///
+ /// <p/>Integer term values should contain only digits and an optional
+ /// preceding negative sign. Values must be base 10 and in the range
+ /// <c>Integer.MIN_VALUE</c> and <c>Integer.MAX_VALUE</c> inclusive.
+ /// Documents which should appear first in the sort
+ /// should have low value integers, later documents high values
+ /// (i.e. the documents should be numbered <c>1..n</c> where
+ /// <c>1</c> is the first and <c>n</c> the last).
+ ///
+ /// <p/>Long term values should contain only digits and an optional
+ /// preceding negative sign. Values must be base 10 and in the range
+ /// <c>Long.MIN_VALUE</c> and <c>Long.MAX_VALUE</c> inclusive.
+ /// Documents which should appear first in the sort
+ /// should have low value integers, later documents high values.
+ ///
+ /// <p/>Float term values should conform to values accepted by
+ /// <see cref="float.Parse(string)" /> (except that <c>NaN</c>
+ /// and <c>Infinity</c> are not supported).
+ /// Documents which should appear first in the sort
+ /// should have low values, later documents high values.
+ ///
+ /// <p/>String term values can contain any valid String, but should
+ /// not be tokenized. The values are sorted according to their
+ /// <see cref="IComparable">natural order</see>. Note that using this type
+ /// of term value has higher memory requirements than the other
+ /// two types.
+ ///
+ /// <p/><h3>Object Reuse</h3>
+ ///
+ /// <p/>One of these objects can be
+ /// used multiple times and the sort order changed between usages.
+ ///
+ /// <p/>This class is thread safe.
+ ///
+ /// <p/><h3>Memory Usage</h3>
+ ///
+ /// <p/>Sorting uses of caches of term values maintained by the
+ /// internal HitQueue(s). The cache is static and contains an integer
+ /// or float array of length <c>IndexReader.MaxDoc</c> for each field
+ /// name for which a sort is performed. In other words, the size of the
+ /// cache in bytes is:
+ ///
+ /// <p/><c>4 * IndexReader.MaxDoc * (# of different fields actually used to sort)</c>
+ ///
+ /// <p/>For String fields, the cache is larger: in addition to the
+ /// above array, the value of every term in the field is kept in memory.
+ /// If there are many unique terms in the field, this could
+ /// be quite large.
+ ///
+ /// <p/>Note that the size of the cache is not affected by how many
+ /// fields are in the index and <i>might</i> be used to sort - only by
+ /// the ones actually used to sort a result set.
+ ///
+ /// <p/>Created: Feb 12, 2004 10:53:57 AM
+ ///
+ /// </summary>
+ [Serializable]
+ public class Sort
+ {
+
+ /// <summary> Represents sorting by computed relevance. Using this sort criteria returns
+ /// the same results as calling
+ /// <see cref="Searcher.Search(Query,int)" />Searcher#search()without a sort criteria,
+ /// only with slightly more overhead.
+ /// </summary>
+ public static readonly Sort RELEVANCE = new Sort();
+
+ /// <summary>Represents sorting by index order. </summary>
+ public static readonly Sort INDEXORDER;
+
+ // internal representation of the sort criteria
+ internal SortField[] fields;
+
+ /// <summary> Sorts by computed relevance. This is the same sort criteria as calling
+ /// <see cref="Searcher.Search(Query,int)" />without a sort criteria,
+ /// only with slightly more overhead.
+ /// </summary>
+ public Sort():this(SortField.FIELD_SCORE)
+ {
+ }
+
+ /// <summary>Sorts by the criteria in the given SortField. </summary>
+ public Sort(SortField field)
+ {
+ SetSort(field);
+ }
+
+ /// <summary>Sorts in succession by the criteria in each SortField. </summary>
+ public Sort(params SortField[] fields)
+ {
+ SetSort(fields);
+ }
+
+ /// <summary>Sets the sort to the given criteria. </summary>
+ public virtual void SetSort(SortField field)
+ {
+ this.fields = new SortField[]{field};
+ }
+
+ /// <summary>Sets the sort to the given criteria in succession. </summary>
+ public virtual void SetSort(params SortField[] fields)
+ {
+ this.fields = fields;
+ }
+
+ /// <summary> Representation of the sort criteria.</summary>
+ /// <returns> Array of SortField objects used in this sort criteria
+ /// </returns>
+ public virtual SortField[] GetSort()
+ {
+ return fields;
+ }
+
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+
+ for (int i = 0; i < fields.Length; i++)
+ {
+ buffer.Append(fields[i].ToString());
+ if ((i + 1) < fields.Length)
+ buffer.Append(',');
+ }
+
+ return buffer.ToString();
+ }
+
+ /// <summary>Returns true if <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is Sort))
+ return false;
+ Sort other = (Sort) o;
+
+ bool result = false;
+ if ((this.fields == null) && (other.fields == null))
+ result = true;
+ else if ((this.fields != null) && (other.fields != null))
+ {
+ if (this.fields.Length == other.fields.Length)
+ {
+ int length = this.fields.Length;
+ result = true;
+ for (int i = 0; i < length; i++)
+ {
+ if (!(this.fields[i].Equals(other.fields[i])))
+ {
+ result = false;
+ break;
+ }
+ }
+ }
+ }
+ return result;
+ }
+
+ /// <summary>Returns a hash code value for this object. </summary>
+ public override int GetHashCode()
+ {
+ // TODO in Java 1.5: switch to Arrays.hashCode(). The
+ // Java 1.4 workaround below calculates the same hashCode
+ // as Java 1.5's new Arrays.hashCode()
+ return 0x45aaf665 + EquatableList<SortField>.GetHashCode(fields);
+ }
+ static Sort()
+ {
+ INDEXORDER = new Sort(SortField.FIELD_DOC);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/SortField.cs b/src/core/Search/SortField.cs
new file mode 100644
index 0000000..5e06b48
--- /dev/null
+++ b/src/core/Search/SortField.cs
@@ -0,0 +1,512 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Globalization;
+using Lucene.Net.Support;
+using NumericField = Lucene.Net.Documents.NumericField;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Single = Lucene.Net.Support.Single;
+using Term = Lucene.Net.Index.Term;
+using TermEnum = Lucene.Net.Index.TermEnum;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Stores information about how to sort documents by terms in an individual
+ /// field. Fields must be indexed in order to sort by them.
+ ///
+ /// <p/>Created: Feb 11, 2004 1:25:29 PM
+ /// </summary>
+ /// <seealso cref="Sort"></seealso>
+ [Serializable]
+ public class SortField
+ {
+ /// <summary>Sort by document score (relevancy). Sort values are Float and higher
+ /// values are at the front.
+ /// </summary>
+ public const int SCORE = 0;
+
+ /// <summary>Sort by document number (index order). Sort values are Integer and lower
+ /// values are at the front.
+ /// </summary>
+ public const int DOC = 1;
+
+ // reserved, in Lucene 2.9, there was a constant: AUTO = 2
+
+ /// <summary>Sort using term values as Strings. Sort values are String and lower
+ /// values are at the front.
+ /// </summary>
+ public const int STRING = 3;
+
+ /// <summary>Sort using term values as encoded Integers. Sort values are Integer and
+ /// lower values are at the front.
+ /// </summary>
+ public const int INT = 4;
+
+ /// <summary>Sort using term values as encoded Floats. Sort values are Float and
+ /// lower values are at the front.
+ /// </summary>
+ public const int FLOAT = 5;
+
+ /// <summary>Sort using term values as encoded Longs. Sort values are Long and
+ /// lower values are at the front.
+ /// </summary>
+ public const int LONG = 6;
+
+ /// <summary>Sort using term values as encoded Doubles. Sort values are Double and
+ /// lower values are at the front.
+ /// </summary>
+ public const int DOUBLE = 7;
+
+ /// <summary>Sort using term values as encoded Shorts. Sort values are Short and
+ /// lower values are at the front.
+ /// </summary>
+ public const int SHORT = 8;
+
+ /// <summary>Sort using a custom Comparator. Sort values are any Comparable and
+ /// sorting is done according to natural order.
+ /// </summary>
+ public const int CUSTOM = 9;
+
+ /// <summary>Sort using term values as encoded Bytes. Sort values are Byte and
+ /// lower values are at the front.
+ /// </summary>
+ public const int BYTE = 10;
+
+ /// <summary>Sort using term values as Strings, but comparing by
+ /// value (using String.compareTo) for all comparisons.
+ /// This is typically slower than <see cref="STRING" />, which
+ /// uses ordinals to do the sorting.
+ /// </summary>
+ public const int STRING_VAL = 11;
+
+ // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
+ // as the above static int values. Any new values must not have the same value
+ // as FieldCache.STRING_INDEX.
+
+ /// <summary>Represents sorting by document score (relevancy). </summary>
+ public static readonly SortField FIELD_SCORE = new SortField(null, SCORE);
+
+ /// <summary>Represents sorting by document number (index order). </summary>
+ public static readonly SortField FIELD_DOC = new SortField(null, DOC);
+
+ private System.String field;
+ private int type; // defaults to determining type dynamically
+ private System.Globalization.CultureInfo locale; // defaults to "natural order" (no Locale)
+ internal bool reverse = false; // defaults to natural order
+ private Lucene.Net.Search.Parser parser;
+
+ // Used for CUSTOM sort
+ private FieldComparatorSource comparatorSource;
+
+ /// <summary>Creates a sort by terms in the given field with the type of term
+ /// values explicitly given.
+ /// </summary>
+ /// <param name="field"> Name of field to sort by. Can be <c>null</c> if
+ /// <c>type</c> is SCORE or DOC.
+ /// </param>
+ /// <param name="type"> Type of values in the terms.
+ /// </param>
+ public SortField(System.String field, int type)
+ {
+ InitFieldType(field, type);
+ }
+
+ /// <summary>Creates a sort, possibly in reverse, by terms in the given field with the
+ /// type of term values explicitly given.
+ /// </summary>
+ /// <param name="field"> Name of field to sort by. Can be <c>null</c> if
+ /// <c>type</c> is SCORE or DOC.
+ /// </param>
+ /// <param name="type"> Type of values in the terms.
+ /// </param>
+ /// <param name="reverse">True if natural order should be reversed.
+ /// </param>
+ public SortField(System.String field, int type, bool reverse)
+ {
+ InitFieldType(field, type);
+ this.reverse = reverse;
+ }
+
+ /// <summary>Creates a sort by terms in the given field, parsed
+ /// to numeric values using a custom <see cref="Search.Parser" />.
+ /// </summary>
+ /// <param name="field"> Name of field to sort by. Must not be null.
+ /// </param>
+ /// <param name="parser">Instance of a <see cref="Search.Parser" />,
+ /// which must subclass one of the existing numeric
+ /// parsers from <see cref="FieldCache" />. Sort type is inferred
+ /// by testing which numeric parser the parser subclasses.
+ /// </param>
+ /// <throws> IllegalArgumentException if the parser fails to </throws>
+ /// <summary> subclass an existing numeric parser, or field is null
+ /// </summary>
+ public SortField(System.String field, Lucene.Net.Search.Parser parser):this(field, parser, false)
+ {
+ }
+
+ /// <summary>Creates a sort, possibly in reverse, by terms in the given field, parsed
+ /// to numeric values using a custom <see cref="Search.Parser" />.
+ /// </summary>
+ /// <param name="field"> Name of field to sort by. Must not be null.
+ /// </param>
+ /// <param name="parser">Instance of a <see cref="Search.Parser" />,
+ /// which must subclass one of the existing numeric
+ /// parsers from <see cref="FieldCache" />. Sort type is inferred
+ /// by testing which numeric parser the parser subclasses.
+ /// </param>
+ /// <param name="reverse">True if natural order should be reversed.
+ /// </param>
+ /// <throws> IllegalArgumentException if the parser fails to </throws>
+ /// <summary> subclass an existing numeric parser, or field is null
+ /// </summary>
+ public SortField(System.String field, Lucene.Net.Search.Parser parser, bool reverse)
+ {
+ if (parser is Lucene.Net.Search.IntParser)
+ InitFieldType(field, INT);
+ else if (parser is Lucene.Net.Search.FloatParser)
+ InitFieldType(field, FLOAT);
+ else if (parser is Lucene.Net.Search.ShortParser)
+ InitFieldType(field, SHORT);
+ else if (parser is Lucene.Net.Search.ByteParser)
+ InitFieldType(field, BYTE);
+ else if (parser is Lucene.Net.Search.LongParser)
+ InitFieldType(field, LONG);
+ else if (parser is Lucene.Net.Search.DoubleParser)
+ InitFieldType(field, DOUBLE);
+ else
+ {
+ throw new System.ArgumentException("Parser instance does not subclass existing numeric parser from FieldCache (got " + parser + ")");
+ }
+
+ this.reverse = reverse;
+ this.parser = parser;
+ }
+
+ /// <summary>Creates a sort by terms in the given field sorted
+ /// according to the given locale.
+ /// </summary>
+ /// <param name="field"> Name of field to sort by, cannot be <c>null</c>.
+ /// </param>
+ /// <param name="locale">Locale of values in the field.
+ /// </param>
+ public SortField(System.String field, System.Globalization.CultureInfo locale)
+ {
+ InitFieldType(field, STRING);
+ this.locale = locale;
+ }
+
+ /// <summary>Creates a sort, possibly in reverse, by terms in the given field sorted
+ /// according to the given locale.
+ /// </summary>
+ /// <param name="field"> Name of field to sort by, cannot be <c>null</c>.
+ /// </param>
+ /// <param name="locale">Locale of values in the field.
+ /// </param>
+ public SortField(System.String field, System.Globalization.CultureInfo locale, bool reverse)
+ {
+ InitFieldType(field, STRING);
+ this.locale = locale;
+ this.reverse = reverse;
+ }
+
+ /// <summary>Creates a sort with a custom comparison function.</summary>
+ /// <param name="field">Name of field to sort by; cannot be <c>null</c>.
+ /// </param>
+ /// <param name="comparator">Returns a comparator for sorting hits.
+ /// </param>
+ public SortField(System.String field, FieldComparatorSource comparator)
+ {
+ InitFieldType(field, CUSTOM);
+ this.comparatorSource = comparator;
+ }
+
+ /// <summary>Creates a sort, possibly in reverse, with a custom comparison function.</summary>
+ /// <param name="field">Name of field to sort by; cannot be <c>null</c>.
+ /// </param>
+ /// <param name="comparator">Returns a comparator for sorting hits.
+ /// </param>
+ /// <param name="reverse">True if natural order should be reversed.
+ /// </param>
+ public SortField(System.String field, FieldComparatorSource comparator, bool reverse)
+ {
+ InitFieldType(field, CUSTOM);
+ this.reverse = reverse;
+ this.comparatorSource = comparator;
+ }
+
+ // Sets field & type, and ensures field is not NULL unless
+ // type is SCORE or DOC
+ private void InitFieldType(System.String field, int type)
+ {
+ this.type = type;
+ if (field == null)
+ {
+ if (type != SCORE && type != DOC)
+ throw new System.ArgumentException("field can only be null when type is SCORE or DOC");
+ }
+ else
+ {
+ this.field = StringHelper.Intern(field);
+ }
+ }
+
+ /// <summary>Returns the name of the field. Could return <c>null</c>
+ /// if the sort is by SCORE or DOC.
+ /// </summary>
+ /// <value> Name of field, possibly &lt;c&gt;null&lt;/c&gt;. </value>
+ public virtual string Field
+ {
+ get { return field; }
+ }
+
+ /// <summary>Returns the type of contents in the field.</summary>
+ /// <value> One of the constants SCORE, DOC, STRING, INT or FLOAT. </value>
+ public virtual int Type
+ {
+ get { return type; }
+ }
+
+ /// <summary>Returns the Locale by which term values are interpreted.
+ /// May return <c>null</c> if no Locale was specified.
+ /// </summary>
+ /// <value> Locale, or &lt;c&gt;null&lt;/c&gt;. </value>
+ public virtual CultureInfo Locale
+ {
+ get { return locale; }
+ }
+
+ /// <summary>Returns the instance of a <see cref="FieldCache" /> parser that fits to the given sort type.
+ /// May return <c>null</c> if no parser was specified. Sorting is using the default parser then.
+ /// </summary>
+ /// <value> An instance of a &lt;see cref=&quot;FieldCache&quot; /&gt; parser, or &lt;c&gt;null&lt;/c&gt;. </value>
+ public virtual Parser Parser
+ {
+ get { return parser; }
+ }
+
+ /// <summary>Returns whether the sort should be reversed.</summary>
+ /// <value> True if natural order should be reversed. </value>
+ public virtual bool Reverse
+ {
+ get { return reverse; }
+ }
+
+ /// <summary>
+ /// Returns the <see cref="FieldComparatorSource"/> used for
+ /// custom sorting
+ /// </summary>
+ public virtual FieldComparatorSource ComparatorSource
+ {
+ get { return comparatorSource; }
+ }
+
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ switch (type)
+ {
+
+ case SCORE:
+ buffer.Append("<score>");
+ break;
+
+ case DOC:
+ buffer.Append("<doc>");
+ break;
+
+ case STRING:
+ buffer.Append("<string: \"").Append(field).Append("\">");
+ break;
+
+ case STRING_VAL:
+ buffer.Append("<string_val: \"").Append(field).Append("\">");
+ break;
+
+ case BYTE:
+ buffer.Append("<byte: \"").Append(field).Append("\">");
+ break;
+
+ case SHORT:
+ buffer.Append("<short: \"").Append(field).Append("\">");
+ break;
+
+ case INT:
+ buffer.Append("<int: \"").Append(field).Append("\">");
+ break;
+
+ case LONG:
+ buffer.Append("<long: \"").Append(field).Append("\">");
+ break;
+
+ case FLOAT:
+ buffer.Append("<float: \"").Append(field).Append("\">");
+ break;
+
+ case DOUBLE:
+ buffer.Append("<double: \"").Append(field).Append("\">");
+ break;
+
+ case CUSTOM:
+ buffer.Append("<custom:\"").Append(field).Append("\": ").Append(comparatorSource).Append('>');
+ break;
+
+ default:
+ buffer.Append("<???: \"").Append(field).Append("\">");
+ break;
+
+ }
+
+ if (locale != null)
+ buffer.Append('(').Append(locale).Append(')');
+ if (parser != null)
+ buffer.Append('(').Append(parser).Append(')');
+ if (reverse)
+ buffer.Append('!');
+
+ return buffer.ToString();
+ }
+
+ /// <summary>Returns true if <c>o</c> is equal to this. If a
+ /// <see cref="FieldComparatorSource" /> or <see cref="Search.Parser" />
+ /// was provided, it must properly
+ /// implement equals (unless a singleton is always used).
+ /// </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is SortField))
+ return false;
+ SortField other = (SortField) o;
+ return ((System.Object) other.field == (System.Object) this.field && other.type == this.type &&
+ other.reverse == this.reverse &&
+ (other.locale == null ? this.locale == null : other.locale.Equals(this.locale)) &&
+ (other.comparatorSource == null
+ ? this.comparatorSource == null
+ : other.comparatorSource.Equals(this.comparatorSource)) &&
+ (other.parser == null ? this.parser == null : other.parser.Equals(this.parser)));
+ }
+
+ /// <summary>Returns true if <c>o</c> is equal to this. If a
+ /// <see cref="FieldComparatorSource" /> (deprecated) or <see cref="Search.Parser" />
+ /// was provided, it must properly
+ /// implement hashCode (unless a singleton is always
+ /// used).
+ /// </summary>
+ public override int GetHashCode()
+ {
+ int hash = type ^ 0x346565dd + (reverse ? Boolean.TrueString.GetHashCode() : Boolean.FalseString.GetHashCode()) ^ unchecked((int) 0xaf5998bb);
+ if (field != null)
+ hash += (field.GetHashCode() ^ unchecked((int) 0xff5685dd));
+ if (locale != null)
+ {
+ hash += (locale.GetHashCode() ^ 0x08150815);
+ }
+ if (comparatorSource != null)
+ hash += comparatorSource.GetHashCode();
+ if (parser != null)
+ hash += (parser.GetHashCode() ^ 0x3aaf56ff);
+ return hash;
+ }
+
+
+ //// field must be interned after reading from stream
+ // private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+ // in.defaultReadObject();
+ // if (field != null)
+ // field = StringHelper.intern(field);
+ // }
+
+ [System.Runtime.Serialization.OnDeserialized]
+ internal void OnDeserialized(System.Runtime.Serialization.StreamingContext context)
+ {
+ field = StringHelper.Intern(field);
+ }
+
+ /// <summary>Returns the <see cref="FieldComparator" /> to use for
+ /// sorting.
+ ///
+ /// <b>NOTE:</b> This API is experimental and might change in
+ /// incompatible ways in the next release.
+ ///
+ /// </summary>
+ /// <param name="numHits">number of top hits the queue will store
+ /// </param>
+ /// <param name="sortPos">position of this SortField within <see cref="Sort" />
+ ///. The comparator is primary if sortPos==0,
+ /// secondary if sortPos==1, etc. Some comparators can
+ /// optimize themselves when they are the primary sort.
+ /// </param>
+ /// <returns> <see cref="FieldComparator" /> to use when sorting
+ /// </returns>
+ public virtual FieldComparator GetComparator(int numHits, int sortPos)
+ {
+
+ if (locale != null)
+ {
+ // TODO: it'd be nice to allow FieldCache.getStringIndex
+ // to optionally accept a Locale so sorting could then use
+ // the faster StringComparator impls
+ return new FieldComparator.StringComparatorLocale(numHits, field, locale);
+ }
+
+ switch (type)
+ {
+ case SortField.SCORE:
+ return new FieldComparator.RelevanceComparator(numHits);
+
+ case SortField.DOC:
+ return new FieldComparator.DocComparator(numHits);
+
+ case SortField.INT:
+ return new FieldComparator.IntComparator(numHits, field, parser);
+
+ case SortField.FLOAT:
+ return new FieldComparator.FloatComparator(numHits, field, parser);
+
+ case SortField.LONG:
+ return new FieldComparator.LongComparator(numHits, field, parser);
+
+ case SortField.DOUBLE:
+ return new FieldComparator.DoubleComparator(numHits, field, parser);
+
+ case SortField.BYTE:
+ return new FieldComparator.ByteComparator(numHits, field, parser);
+
+ case SortField.SHORT:
+ return new FieldComparator.ShortComparator(numHits, field, parser);
+
+ case SortField.CUSTOM:
+ System.Diagnostics.Debug.Assert(comparatorSource != null);
+ return comparatorSource.NewComparator(field, numHits, sortPos, reverse);
+
+ case SortField.STRING:
+ return new FieldComparator.StringOrdValComparator(numHits, field, sortPos, reverse);
+
+ case SortField.STRING_VAL:
+ return new FieldComparator.StringValComparator(numHits, field);
+
+ default:
+ throw new System.SystemException("Illegal sort type: " + type);
+
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/SpanFilter.cs b/src/core/Search/SpanFilter.cs
new file mode 100644
index 0000000..f522725
--- /dev/null
+++ b/src/core/Search/SpanFilter.cs
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Abstract base class providing a mechanism to restrict searches to a subset
+ /// of an index and also maintains and returns position information.
+ /// This is useful if you want to compare the positions from a SpanQuery with the positions of items in
+ /// a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents,
+ /// and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could
+ /// then compare position information for post processing.
+ /// </summary>
+ [Serializable]
+ public abstract class SpanFilter:Filter
+ {
+ /// <summary>Returns a SpanFilterResult with true for documents which should be permitted in
+ /// search results, and false for those that should not and Spans for where the true docs match.
+ /// </summary>
+ /// <param name="reader">The <see cref="Lucene.Net.Index.IndexReader" /> to load position and DocIdSet information from
+ /// </param>
+ /// <returns> A <see cref="SpanFilterResult" />
+ /// </returns>
+ /// <throws> java.io.IOException if there was an issue accessing the necessary information </throws>
+ /// <summary>
+ /// </summary>
+ public abstract SpanFilterResult BitSpans(IndexReader reader);
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/SpanFilterResult.cs b/src/core/Search/SpanFilterResult.cs
new file mode 100644
index 0000000..9cafe0c
--- /dev/null
+++ b/src/core/Search/SpanFilterResult.cs
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Search
+{
+ /// <summary> The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery
+ ///
+ /// <p/>
+ /// NOTE: This API is still experimental and subject to change.
+ /// </summary>
+ public class SpanFilterResult
+ {
+ private DocIdSet docIdSet;
+ private IList<PositionInfo> positions; //Spans spans;
+
+ /// <summary> </summary>
+ /// <param name="docIdSet">The DocIdSet for the Filter
+ /// </param>
+ /// <param name="positions">A List of <see cref="Lucene.Net.Search.SpanFilterResult.PositionInfo" /> objects
+ /// </param>
+ public SpanFilterResult(DocIdSet docIdSet, IList<PositionInfo> positions)
+ {
+ this.docIdSet = docIdSet;
+ this.positions = positions;
+ }
+
+ /// <summary> The first entry in the array corresponds to the first "on" bit.
+ /// Entries are increasing by document order
+ /// </summary>
+ /// <value> A List of PositionInfo objects </value>
+ public virtual IList<PositionInfo> Positions
+ {
+ get { return positions; }
+ }
+
+ /// <summary>Returns the docIdSet </summary>
+ public virtual DocIdSet DocIdSet
+ {
+ get { return docIdSet; }
+ }
+
+ public class PositionInfo
+ {
+ private int doc;
+ private IList<StartEnd> positions;
+
+
+ public PositionInfo(int doc)
+ {
+ this.doc = doc;
+ positions = new List<StartEnd>();
+ }
+
+ public virtual void AddPosition(int start, int end)
+ {
+ positions.Add(new StartEnd(start, end));
+ }
+
+ public virtual int Doc
+ {
+ get { return doc; }
+ }
+
+ /// <summary> </summary>
+ /// <value> A List of &lt;see cref=&quot;Lucene.Net.Search.SpanFilterResult.StartEnd&quot; /&gt; objects </value>
+ public virtual IList<StartEnd> Positions
+ {
+ get { return positions; }
+ }
+ }
+
+ public class StartEnd
+ {
+ private int start;
+ private int end;
+
+
+ public StartEnd(int start, int end)
+ {
+ this.start = start;
+ this.end = end;
+ }
+
+ /// <summary> </summary>
+ /// <value> The end position of this match </value>
+ public virtual int End
+ {
+ get { return end; }
+ }
+
+ /// <summary> The Start position</summary>
+ /// <value> The start position of this match </value>
+ public virtual int Start
+ {
+ get { return start; }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/SpanQueryFilter.cs b/src/core/Search/SpanQueryFilter.cs
new file mode 100644
index 0000000..af3de6b
--- /dev/null
+++ b/src/core/Search/SpanQueryFilter.cs
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using OpenBitSet = Lucene.Net.Util.OpenBitSet;
+using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Constrains search results to only match those which also match a provided
+ /// query. Also provides position information about where each document matches
+ /// at the cost of extra space compared with the QueryWrapperFilter.
+ /// There is an added cost to this above what is stored in a <see cref="QueryWrapperFilter" />. Namely,
+ /// the position information for each matching document is stored.
+ /// <p/>
+ /// This filter does not cache. See the <see cref="Lucene.Net.Search.CachingSpanFilter" /> for a wrapper that
+ /// caches.
+ ///
+ ///
+ /// </summary>
+ /// <version> $Id:$
+ /// </version>
+ [Serializable]
+ public class SpanQueryFilter:SpanFilter
+ {
+ protected internal SpanQuery internalQuery;
+
+ protected internal SpanQueryFilter()
+ {
+ }
+
+ /// <summary>Constructs a filter which only matches documents matching
+ /// <c>query</c>.
+ /// </summary>
+ /// <param name="query">The <see cref="Lucene.Net.Search.Spans.SpanQuery" /> to use as the basis for the Filter.
+ /// </param>
+ public SpanQueryFilter(SpanQuery query)
+ {
+ this.internalQuery = query;
+ }
+
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ SpanFilterResult result = BitSpans(reader);
+ return result.DocIdSet;
+ }
+
+ public override SpanFilterResult BitSpans(IndexReader reader)
+ {
+
+ OpenBitSet bits = new OpenBitSet(reader.MaxDoc);
+ Lucene.Net.Search.Spans.Spans spans = internalQuery.GetSpans(reader);
+ IList<SpanFilterResult.PositionInfo> tmp = new List<SpanFilterResult.PositionInfo>(20);
+ int currentDoc = - 1;
+ SpanFilterResult.PositionInfo currentInfo = null;
+ while (spans.Next())
+ {
+ int doc = spans.Doc();
+ bits.Set(doc);
+ if (currentDoc != doc)
+ {
+ currentInfo = new SpanFilterResult.PositionInfo(doc);
+ tmp.Add(currentInfo);
+ currentDoc = doc;
+ }
+ currentInfo.AddPosition(spans.Start(), spans.End());
+ }
+ return new SpanFilterResult(bits, tmp);
+ }
+
+
+ public virtual SpanQuery Query
+ {
+ get { return internalQuery; }
+ }
+
+ public override System.String ToString()
+ {
+ return "SpanQueryFilter(" + internalQuery + ")";
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ return o is SpanQueryFilter && this.internalQuery.Equals(((SpanQueryFilter) o).internalQuery);
+ }
+
+ public override int GetHashCode()
+ {
+ return internalQuery.GetHashCode() ^ unchecked((int) 0x923F64B9);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/FieldMaskingSpanQuery.cs b/src/core/Search/Spans/FieldMaskingSpanQuery.cs
new file mode 100644
index 0000000..b69a58d
--- /dev/null
+++ b/src/core/Search/Spans/FieldMaskingSpanQuery.cs
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Query = Lucene.Net.Search.Query;
+using Searcher = Lucene.Net.Search.Searcher;
+using Similarity = Lucene.Net.Search.Similarity;
+using Weight = Lucene.Net.Search.Weight;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary> <p/>Wrapper to allow <see cref="SpanQuery" /> objects participate in composite
+ /// single-field SpanQueries by 'lying' about their search field. That is,
+ /// the masked SpanQuery will function as normal,
+ /// but <see cref="SpanQuery.Field" /> simply hands back the value supplied
+ /// in this class's constructor.<p/>
+ ///
+ /// <p/>This can be used to support Queries like <see cref="SpanNearQuery" /> or
+ /// <see cref="SpanOrQuery" /> across different fields, which is not ordinarily
+ /// permitted.<p/>
+ ///
+ /// <p/>This can be useful for denormalized relational data: for example, when
+ /// indexing a document with conceptually many 'children': <p/>
+ ///
+ /// <pre>
+ /// teacherid: 1
+ /// studentfirstname: james
+ /// studentsurname: jones
+ ///
+ /// teacherid: 2
+ /// studenfirstname: james
+ /// studentsurname: smith
+ /// studentfirstname: sally
+ /// studentsurname: jones
+ /// </pre>
+ ///
+ /// <p/>a SpanNearQuery with a slop of 0 can be applied across two
+ /// <see cref="SpanTermQuery" /> objects as follows:
+ /// <code>
+ /// SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james"));
+ /// SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones"));
+ /// SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname");
+ /// Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false);
+ /// </code>
+ /// to search for 'studentfirstname:james studentsurname:jones' and find
+ /// teacherid 1 without matching teacherid 2 (which has a 'james' in position 0
+ /// and 'jones' in position 1). <p/>
+ ///
+ /// <p/>Note: as <see cref="Field" /> returns the masked field, scoring will be
+ /// done using the norms of the field name supplied. This may lead to unexpected
+ /// scoring behaviour.<p/>
+ /// </summary>
+ [Serializable]
+ public class FieldMaskingSpanQuery:SpanQuery
+ {
+ private SpanQuery maskedQuery;
+ private System.String field;
+
+ public FieldMaskingSpanQuery(SpanQuery maskedQuery, System.String maskedField)
+ {
+ this.maskedQuery = maskedQuery;
+ this.field = maskedField;
+ }
+
+ public override string Field
+ {
+ get { return field; }
+ }
+
+ public virtual SpanQuery MaskedQuery
+ {
+ get { return maskedQuery; }
+ }
+
+ // :NOTE: getBoost and setBoost are not proxied to the maskedQuery
+ // ...this is done to be more consistent with thigns like SpanFirstQuery
+
+ public override Spans GetSpans(IndexReader reader)
+ {
+ return maskedQuery.GetSpans(reader);
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ maskedQuery.ExtractTerms(terms);
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return maskedQuery.CreateWeight(searcher);
+ }
+
+ public override Similarity GetSimilarity(Searcher searcher)
+ {
+ return maskedQuery.GetSimilarity(searcher);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ FieldMaskingSpanQuery clone = null;
+
+ SpanQuery rewritten = (SpanQuery) maskedQuery.Rewrite(reader);
+ if (rewritten != maskedQuery)
+ {
+ clone = (FieldMaskingSpanQuery) this.Clone();
+ clone.maskedQuery = rewritten;
+ }
+
+ if (clone != null)
+ {
+ return clone;
+ }
+ else
+ {
+ return this;
+ }
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("mask(");
+ buffer.Append(maskedQuery.ToString(field));
+ buffer.Append(")");
+ buffer.Append(ToStringUtils.Boost(Boost));
+ buffer.Append(" as ");
+ buffer.Append(this.field);
+ return buffer.ToString();
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is FieldMaskingSpanQuery))
+ return false;
+ FieldMaskingSpanQuery other = (FieldMaskingSpanQuery) o;
+ return (this.Field.Equals(other.Field) && (this.Boost == other.Boost) && this.MaskedQuery.Equals(other.MaskedQuery));
+ }
+
+ public override int GetHashCode()
+ {
+ return MaskedQuery.GetHashCode() ^ Field.GetHashCode() ^ System.Convert.ToInt32(Boost);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/NearSpansOrdered.cs b/src/core/Search/Spans/NearSpansOrdered.cs
new file mode 100644
index 0000000..86ffae8
--- /dev/null
+++ b/src/core/Search/Spans/NearSpansOrdered.cs
@@ -0,0 +1,436 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>A Spans that is formed from the ordered subspans of a SpanNearQuery
+ /// where the subspans do not overlap and have a maximum slop between them.
+ /// <p/>
+ /// The formed spans only contains minimum slop matches.<br/>
+ /// The matching slop is computed from the distance(s) between
+ /// the non overlapping matching Spans.<br/>
+ /// Successive matches are always formed from the successive Spans
+ /// of the SpanNearQuery.
+ /// <p/>
+ /// The formed spans may contain overlaps when the slop is at least 1.
+ /// For example, when querying using
+ /// <c>t1 t2 t3</c>
+ /// with slop at least 1, the fragment:
+ /// <c>t1 t2 t1 t3 t2 t3</c>
+ /// matches twice:
+ /// <c>t1 t2 .. t3 </c>
+ /// <c> t1 .. t2 t3</c>
+ ///
+ ///
+ /// Expert:
+ /// Only public for subclassing. Most implementations should not need this class
+ /// </summary>
+ public class NearSpansOrdered:Spans
+ {
+ internal class AnonymousClassComparator : System.Collections.IComparer
+ {
+ public AnonymousClassComparator(NearSpansOrdered enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(NearSpansOrdered enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private NearSpansOrdered enclosingInstance;
+ public NearSpansOrdered Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public virtual int Compare(System.Object o1, System.Object o2)
+ {
+ return ((Spans) o1).Doc() - ((Spans) o2).Doc();
+ }
+ }
+ private void InitBlock()
+ {
+ spanDocComparator = new AnonymousClassComparator(this);
+ }
+ private int allowedSlop;
+ private bool firstTime = true;
+ private bool more = false;
+
+ /// <summary>The spans in the same order as the SpanNearQuery </summary>
+ private Spans[] subSpans;
+
+ /// <summary>Indicates that all subSpans have same doc() </summary>
+ private bool inSameDoc = false;
+
+ private int matchDoc = - 1;
+ private int matchStart = - 1;
+ private int matchEnd = - 1;
+ private System.Collections.Generic.List<byte[]> matchPayload;
+
+ private Spans[] subSpansByDoc;
+ private System.Collections.IComparer spanDocComparator;
+
+ private SpanNearQuery query;
+ private bool collectPayloads = true;
+
+ public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader):this(spanNearQuery, reader, true)
+ {
+ }
+
+ public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader, bool collectPayloads)
+ {
+ InitBlock();
+ if (spanNearQuery.GetClauses().Length < 2)
+ {
+ throw new System.ArgumentException("Less than 2 clauses: " + spanNearQuery);
+ }
+ this.collectPayloads = collectPayloads;
+ allowedSlop = spanNearQuery.Slop;
+ SpanQuery[] clauses = spanNearQuery.GetClauses();
+ subSpans = new Spans[clauses.Length];
+ matchPayload = new System.Collections.Generic.List<byte[]>();
+ subSpansByDoc = new Spans[clauses.Length];
+ for (int i = 0; i < clauses.Length; i++)
+ {
+ subSpans[i] = clauses[i].GetSpans(reader);
+ subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
+ }
+ query = spanNearQuery; // kept for toString() only.
+ }
+
+ // inherit javadocs
+ public override int Doc()
+ {
+ return matchDoc;
+ }
+
+ // inherit javadocs
+ public override int Start()
+ {
+ return matchStart;
+ }
+
+ // inherit javadocs
+ public override int End()
+ {
+ return matchEnd;
+ }
+
+ public virtual Spans[] GetSubSpans()
+ {
+ return subSpans;
+ }
+
+ // TODO: Remove warning after API has been finalized
+ // TODO: Would be nice to be able to lazy load payloads
+
+ public override ICollection<byte[]> GetPayload()
+ {
+ return matchPayload;
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override bool IsPayloadAvailable()
+ {
+ return (matchPayload.Count == 0) == false;
+ }
+
+ // inherit javadocs
+ public override bool Next()
+ {
+ if (firstTime)
+ {
+ firstTime = false;
+ for (int i = 0; i < subSpans.Length; i++)
+ {
+ if (!subSpans[i].Next())
+ {
+ more = false;
+ return false;
+ }
+ }
+ more = true;
+ }
+ if (collectPayloads)
+ {
+ matchPayload.Clear();
+ }
+ return AdvanceAfterOrdered();
+ }
+
+ // inherit javadocs
+ public override bool SkipTo(int target)
+ {
+ if (firstTime)
+ {
+ firstTime = false;
+ for (int i = 0; i < subSpans.Length; i++)
+ {
+ if (!subSpans[i].SkipTo(target))
+ {
+ more = false;
+ return false;
+ }
+ }
+ more = true;
+ }
+ else if (more && (subSpans[0].Doc() < target))
+ {
+ if (subSpans[0].SkipTo(target))
+ {
+ inSameDoc = false;
+ }
+ else
+ {
+ more = false;
+ return false;
+ }
+ }
+ if (collectPayloads)
+ {
+ matchPayload.Clear();
+ }
+ return AdvanceAfterOrdered();
+ }
+
+ /// <summary>Advances the subSpans to just after an ordered match with a minimum slop
+ /// that is smaller than the slop allowed by the SpanNearQuery.
+ /// </summary>
+ /// <returns> true iff there is such a match.
+ /// </returns>
+ private bool AdvanceAfterOrdered()
+ {
+ while (more && (inSameDoc || ToSameDoc()))
+ {
+ if (StretchToOrder() && ShrinkToAfterShortestMatch())
+ {
+ return true;
+ }
+ }
+ return false; // no more matches
+ }
+
+
+ /// <summary>Advance the subSpans to the same document </summary>
+ private bool ToSameDoc()
+ {
+ System.Array.Sort(subSpansByDoc, spanDocComparator);
+ int firstIndex = 0;
+ int maxDoc = subSpansByDoc[subSpansByDoc.Length - 1].Doc();
+ while (subSpansByDoc[firstIndex].Doc() != maxDoc)
+ {
+ if (!subSpansByDoc[firstIndex].SkipTo(maxDoc))
+ {
+ more = false;
+ inSameDoc = false;
+ return false;
+ }
+ maxDoc = subSpansByDoc[firstIndex].Doc();
+ if (++firstIndex == subSpansByDoc.Length)
+ {
+ firstIndex = 0;
+ }
+ }
+ for (int i = 0; i < subSpansByDoc.Length; i++)
+ {
+ System.Diagnostics.Debug.Assert((subSpansByDoc [i].Doc() == maxDoc)
+ , "NearSpansOrdered.toSameDoc() spans " + subSpansByDoc [0]
+ + "\n at doc " + subSpansByDoc [i].Doc()
+ + ", but should be at " + maxDoc);
+ }
+ inSameDoc = true;
+ return true;
+ }
+
+ /// <summary>Check whether two Spans in the same document are ordered.</summary>
+ /// <param name="spans1">
+ /// </param>
+ /// <param name="spans2">
+ /// </param>
+ /// <returns> true iff spans1 starts before spans2
+ /// or the spans start at the same position,
+ /// and spans1 ends before spans2.
+ /// </returns>
+ internal static bool DocSpansOrdered(Spans spans1, Spans spans2)
+ {
+ System.Diagnostics.Debug.Assert(spans1.Doc() == spans2.Doc(), "doc1 " + spans1.Doc() + " != doc2 " + spans2.Doc());
+ int start1 = spans1.Start();
+ int start2 = spans2.Start();
+ /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
+ return (start1 == start2)?(spans1.End() < spans2.End()):(start1 < start2);
+ }
+
+ /// <summary>Like <see cref="DocSpansOrdered(Spans,Spans)" />, but use the spans
+ /// starts and ends as parameters.
+ /// </summary>
+ private static bool DocSpansOrdered(int start1, int end1, int start2, int end2)
+ {
+ return (start1 == start2)?(end1 < end2):(start1 < start2);
+ }
+
+ /// <summary>Order the subSpans within the same document by advancing all later spans
+ /// after the previous one.
+ /// </summary>
+ private bool StretchToOrder()
+ {
+ matchDoc = subSpans[0].Doc();
+ for (int i = 1; inSameDoc && (i < subSpans.Length); i++)
+ {
+ while (!DocSpansOrdered(subSpans[i - 1], subSpans[i]))
+ {
+ if (!subSpans[i].Next())
+ {
+ inSameDoc = false;
+ more = false;
+ break;
+ }
+ else if (matchDoc != subSpans[i].Doc())
+ {
+ inSameDoc = false;
+ break;
+ }
+ }
+ }
+ return inSameDoc;
+ }
+
+ /// <summary>The subSpans are ordered in the same doc, so there is a possible match.
+ /// Compute the slop while making the match as short as possible by advancing
+ /// all subSpans except the last one in reverse order.
+ /// </summary>
+ private bool ShrinkToAfterShortestMatch()
+ {
+ matchStart = subSpans[subSpans.Length - 1].Start();
+ matchEnd = subSpans[subSpans.Length - 1].End();
+ System.Collections.Generic.Dictionary<byte[], byte[]> possibleMatchPayloads = new System.Collections.Generic.Dictionary<byte[], byte[]>();
+ if (subSpans[subSpans.Length - 1].IsPayloadAvailable())
+ {
+ System.Collections.Generic.ICollection<byte[]> payload = subSpans[subSpans.Length - 1].GetPayload();
+ foreach(byte[] pl in payload)
+ {
+ if (!possibleMatchPayloads.ContainsKey(pl))
+ {
+ possibleMatchPayloads.Add(pl, pl);
+ }
+ }
+ }
+
+ System.Collections.Generic.List<byte[]> possiblePayload = null;
+
+ int matchSlop = 0;
+ int lastStart = matchStart;
+ int lastEnd = matchEnd;
+ for (int i = subSpans.Length - 2; i >= 0; i--)
+ {
+ Spans prevSpans = subSpans[i];
+ if (collectPayloads && prevSpans.IsPayloadAvailable())
+ {
+ System.Collections.Generic.ICollection<byte[]> payload = prevSpans.GetPayload();
+ possiblePayload = new System.Collections.Generic.List<byte[]>(payload.Count);
+ possiblePayload.AddRange(payload);
+ }
+
+ int prevStart = prevSpans.Start();
+ int prevEnd = prevSpans.End();
+ while (true)
+ {
+ // Advance prevSpans until after (lastStart, lastEnd)
+ if (!prevSpans.Next())
+ {
+ inSameDoc = false;
+ more = false;
+ break; // Check remaining subSpans for final match.
+ }
+ else if (matchDoc != prevSpans.Doc())
+ {
+ inSameDoc = false; // The last subSpans is not advanced here.
+ break; // Check remaining subSpans for last match in this document.
+ }
+ else
+ {
+ int ppStart = prevSpans.Start();
+ int ppEnd = prevSpans.End(); // Cannot avoid invoking .end()
+ if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd))
+ {
+ break; // Check remaining subSpans.
+ }
+ else
+ {
+ // prevSpans still before (lastStart, lastEnd)
+ prevStart = ppStart;
+ prevEnd = ppEnd;
+ if (collectPayloads && prevSpans.IsPayloadAvailable())
+ {
+ System.Collections.Generic.ICollection<byte[]> payload = prevSpans.GetPayload();
+ possiblePayload = new System.Collections.Generic.List<byte[]>(payload.Count);
+ possiblePayload.AddRange(payload);
+ }
+ }
+ }
+ }
+
+ if (collectPayloads && possiblePayload != null)
+ {
+ foreach (byte[] pl in possiblePayload)
+ {
+ if (!possibleMatchPayloads.ContainsKey(pl))
+ {
+ possibleMatchPayloads.Add(pl, pl);
+ }
+ }
+ }
+
+ System.Diagnostics.Debug.Assert(prevStart <= matchStart);
+ if (matchStart > prevEnd)
+ {
+ // Only non overlapping spans add to slop.
+ matchSlop += (matchStart - prevEnd);
+ }
+
+ /* Do not break on (matchSlop > allowedSlop) here to make sure
+ * that subSpans[0] is advanced after the match, if any.
+ */
+ matchStart = prevStart;
+ lastStart = prevStart;
+ lastEnd = prevEnd;
+ }
+
+ bool match = matchSlop <= allowedSlop;
+
+ if (collectPayloads && match && possibleMatchPayloads.Count > 0)
+ {
+ matchPayload.AddRange(possibleMatchPayloads.Keys);
+ }
+
+ return match; // ordered and allowed slop
+ }
+
+ public override System.String ToString()
+ {
+ return GetType().FullName + "(" + query.ToString() + ")@" + (firstTime?"START":(more?(Doc() + ":" + Start() + "-" + End()):"END"));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/NearSpansUnordered.cs b/src/core/Search/Spans/NearSpansUnordered.cs
new file mode 100644
index 0000000..e926827
--- /dev/null
+++ b/src/core/Search/Spans/NearSpansUnordered.cs
@@ -0,0 +1,415 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Util;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary> Similar to <see cref="NearSpansOrdered" />, but for the unordered case.
+ ///
+ /// Expert:
+ /// Only public for subclassing. Most implementations should not need this class
+ /// </summary>
+ public class NearSpansUnordered : Spans
+ {
+ private SpanNearQuery query;
+
+ private System.Collections.Generic.IList<SpansCell> ordered = new System.Collections.Generic.List<SpansCell>(); // spans in query order
+ private Spans[] subSpans;
+ private int slop; // from query
+
+ private SpansCell first; // linked list of spans
+ private SpansCell last; // sorted by doc only
+
+ private int totalLength; // sum of current lengths
+
+ private CellQueue queue; // sorted queue of spans
+ private SpansCell max; // max element in queue
+
+ private bool more = true; // true iff not done
+ private bool firstTime = true; // true before first next()
+
+ private class CellQueue : PriorityQueue<SpansCell>
+ {
+ private void InitBlock(NearSpansUnordered enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private NearSpansUnordered enclosingInstance;
+ public NearSpansUnordered Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public CellQueue(NearSpansUnordered enclosingInstance, int size)
+ {
+ InitBlock(enclosingInstance);
+ Initialize(size);
+ }
+
+ public override bool LessThan(SpansCell spans1, SpansCell spans2)
+ {
+ if (spans1.Doc() == spans2.Doc())
+ {
+ return NearSpansOrdered.DocSpansOrdered(spans1, spans2);
+ }
+ else
+ {
+ return spans1.Doc() < spans2.Doc();
+ }
+ }
+ }
+
+
+ /// <summary>Wraps a Spans, and can be used to form a linked list. </summary>
+ private class SpansCell:Spans
+ {
+ private void InitBlock(NearSpansUnordered enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private NearSpansUnordered enclosingInstance;
+ public NearSpansUnordered Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal /*private*/ Spans spans;
+ internal /*private*/ SpansCell next;
+ private int length = - 1;
+ private int index;
+
+ public SpansCell(NearSpansUnordered enclosingInstance, Spans spans, int index)
+ {
+ InitBlock(enclosingInstance);
+ this.spans = spans;
+ this.index = index;
+ }
+
+ public override bool Next()
+ {
+ return Adjust(spans.Next());
+ }
+
+ public override bool SkipTo(int target)
+ {
+ return Adjust(spans.SkipTo(target));
+ }
+
+ private bool Adjust(bool condition)
+ {
+ if (length != - 1)
+ {
+ Enclosing_Instance.totalLength -= length; // subtract old length
+ }
+ if (condition)
+ {
+ length = End() - Start();
+ Enclosing_Instance.totalLength += length; // add new length
+
+ if (Enclosing_Instance.max == null || Doc() > Enclosing_Instance.max.Doc() || (Doc() == Enclosing_Instance.max.Doc()) && (End() > Enclosing_Instance.max.End()))
+ {
+ Enclosing_Instance.max = this;
+ }
+ }
+ Enclosing_Instance.more = condition;
+ return condition;
+ }
+
+ public override int Doc()
+ {
+ return spans.Doc();
+ }
+ public override int Start()
+ {
+ return spans.Start();
+ }
+ public override int End()
+ {
+ return spans.End();
+ }
+ // TODO: Remove warning after API has been finalized
+
+ public override ICollection<byte[]> GetPayload()
+ {
+ return spans.GetPayload().ToArray();
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override bool IsPayloadAvailable()
+ {
+ return spans.IsPayloadAvailable();
+ }
+
+ public override System.String ToString()
+ {
+ return spans.ToString() + "#" + index;
+ }
+ }
+
+
+ public NearSpansUnordered(SpanNearQuery query, IndexReader reader)
+ {
+ this.query = query;
+ this.slop = query.Slop;
+
+ SpanQuery[] clauses = query.GetClauses();
+ queue = new CellQueue(this, clauses.Length);
+ subSpans = new Spans[clauses.Length];
+ for (int i = 0; i < clauses.Length; i++)
+ {
+ SpansCell cell = new SpansCell(this, clauses[i].GetSpans(reader), i);
+ ordered.Add(cell);
+ subSpans[i] = cell.spans;
+ }
+ }
+ public virtual Spans[] GetSubSpans()
+ {
+ return subSpans;
+ }
+ public override bool Next()
+ {
+ if (firstTime)
+ {
+ InitList(true);
+ ListToQueue(); // initialize queue
+ firstTime = false;
+ }
+ else if (more)
+ {
+ if (Min().Next())
+ {
+ // trigger further scanning
+ queue.UpdateTop(); // maintain queue
+ }
+ else
+ {
+ more = false;
+ }
+ }
+
+ while (more)
+ {
+
+ bool queueStale = false;
+
+ if (Min().Doc() != max.Doc())
+ {
+ // maintain list
+ QueueToList();
+ queueStale = true;
+ }
+
+ // skip to doc w/ all clauses
+
+ while (more && first.Doc() < last.Doc())
+ {
+ more = first.SkipTo(last.Doc()); // skip first upto last
+ FirstToLast(); // and move it to the end
+ queueStale = true;
+ }
+
+ if (!more)
+ return false;
+
+ // found doc w/ all clauses
+
+ if (queueStale)
+ {
+ // maintain the queue
+ ListToQueue();
+ queueStale = false;
+ }
+
+ if (AtMatch())
+ {
+ return true;
+ }
+
+ more = Min().Next();
+ if (more)
+ {
+ queue.UpdateTop(); // maintain queue
+ }
+ }
+ return false; // no more matches
+ }
+
+ public override bool SkipTo(int target)
+ {
+ if (firstTime)
+ {
+ // initialize
+ InitList(false);
+ for (SpansCell cell = first; more && cell != null; cell = cell.next)
+ {
+ more = cell.SkipTo(target); // skip all
+ }
+ if (more)
+ {
+ ListToQueue();
+ }
+ firstTime = false;
+ }
+ else
+ {
+ // normal case
+ while (more && Min().Doc() < target)
+ {
+ // skip as needed
+ if (Min().SkipTo(target))
+ {
+ queue.UpdateTop();
+ }
+ else
+ {
+ more = false;
+ }
+ }
+ }
+ return more && (AtMatch() || Next());
+ }
+
+ private SpansCell Min()
+ {
+ return queue.Top();
+ }
+
+ public override int Doc()
+ {
+ return Min().Doc();
+ }
+ public override int Start()
+ {
+ return Min().Start();
+ }
+ public override int End()
+ {
+ return max.End();
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ /// <summary> WARNING: The List is not necessarily in order of the the positions</summary>
+ /// <returns> Collection of &amp;lt;c&amp;gt;byte[]&amp;lt;/c&amp;gt; payloads </returns>
+ /// <throws> IOException </throws>
+ public override ICollection<byte[]> GetPayload()
+ {
+ System.Collections.Generic.ISet<byte[]> matchPayload = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<byte[]>();
+ for (SpansCell cell = first; cell != null; cell = cell.next)
+ {
+ if (cell.IsPayloadAvailable())
+ {
+ matchPayload.UnionWith(cell.GetPayload());
+ }
+ }
+ return matchPayload;
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override bool IsPayloadAvailable()
+ {
+ SpansCell pointer = Min();
+ while (pointer != null)
+ {
+ if (pointer.IsPayloadAvailable())
+ {
+ return true;
+ }
+ pointer = pointer.next;
+ }
+
+ return false;
+ }
+
+ public override System.String ToString()
+ {
+ return GetType().FullName + "(" + query.ToString() + ")@" + (firstTime?"START":(more?(Doc() + ":" + Start() + "-" + End()):"END"));
+ }
+
+ private void InitList(bool next)
+ {
+ for (int i = 0; more && i < ordered.Count; i++)
+ {
+ SpansCell cell = ordered[i];
+ if (next)
+ more = cell.Next(); // move to first entry
+ if (more)
+ {
+ AddToList(cell); // add to list
+ }
+ }
+ }
+
+ private void AddToList(SpansCell cell)
+ {
+ if (last != null)
+ {
+ // add next to end of list
+ last.next = cell;
+ }
+ else
+ first = cell;
+ last = cell;
+ cell.next = null;
+ }
+
+ private void FirstToLast()
+ {
+ last.next = first; // move first to end of list
+ last = first;
+ first = first.next;
+ last.next = null;
+ }
+
+ private void QueueToList()
+ {
+ last = first = null;
+ while (queue.Top() != null)
+ {
+ AddToList(queue.Pop());
+ }
+ }
+
+ private void ListToQueue()
+ {
+ queue.Clear(); // rebuild queue
+ for (SpansCell cell = first; cell != null; cell = cell.next)
+ {
+ queue.Add(cell); // add to queue from list
+ }
+ }
+
+ private bool AtMatch()
+ {
+ return (Min().Doc() == max.Doc()) && ((max.End() - Min().Start() - totalLength) <= slop);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanFirstQuery.cs b/src/core/Search/Spans/SpanFirstQuery.cs
new file mode 100644
index 0000000..ff39b28
--- /dev/null
+++ b/src/core/Search/Spans/SpanFirstQuery.cs
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Query = Lucene.Net.Search.Query;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>Matches spans near the beginning of a field. </summary>
+ [Serializable]
+ public class SpanFirstQuery : SpanQuery, System.ICloneable
+ {
+ private class AnonymousClassSpans : Spans
+ {
+ public AnonymousClassSpans(Lucene.Net.Index.IndexReader reader, SpanFirstQuery enclosingInstance)
+ {
+ InitBlock(reader, enclosingInstance);
+ }
+ private void InitBlock(Lucene.Net.Index.IndexReader reader, SpanFirstQuery enclosingInstance)
+ {
+ this.reader = reader;
+ this.enclosingInstance = enclosingInstance;
+ spans = Enclosing_Instance.match.GetSpans(reader);
+ }
+ private Lucene.Net.Index.IndexReader reader;
+ private SpanFirstQuery enclosingInstance;
+ public SpanFirstQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Spans spans;
+
+ public override bool Next()
+ {
+ while (spans.Next())
+ {
+ // scan to next match
+ if (End() <= Enclosing_Instance.end)
+ return true;
+ }
+ return false;
+ }
+
+ public override bool SkipTo(int target)
+ {
+ if (!spans.SkipTo(target))
+ return false;
+
+ return spans.End() <= Enclosing_Instance.end || Next();
+ }
+
+ public override int Doc()
+ {
+ return spans.Doc();
+ }
+ public override int Start()
+ {
+ return spans.Start();
+ }
+ public override int End()
+ {
+ return spans.End();
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override ICollection<byte[]> GetPayload()
+ {
+ System.Collections.Generic.ICollection<byte[]> result = null;
+ if (spans.IsPayloadAvailable())
+ {
+ result = spans.GetPayload();
+ }
+ return result; //TODO: any way to avoid the new construction?
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override bool IsPayloadAvailable()
+ {
+ return spans.IsPayloadAvailable();
+ }
+
+ public override System.String ToString()
+ {
+ return "spans(" + Enclosing_Instance.ToString() + ")";
+ }
+ }
+ private SpanQuery match;
+ private int end;
+
+ /// <summary>Construct a SpanFirstQuery matching spans in <c>match</c> whose end
+ /// position is less than or equal to <c>end</c>.
+ /// </summary>
+ public SpanFirstQuery(SpanQuery match, int end)
+ {
+ this.match = match;
+ this.end = end;
+ }
+
+ /// <summary>Return the SpanQuery whose matches are filtered. </summary>
+ public virtual SpanQuery Match
+ {
+ get { return match; }
+ }
+
+ /// <summary>Return the maximum end position permitted in a match. </summary>
+ public virtual int End
+ {
+ get { return end; }
+ }
+
+ public override string Field
+ {
+ get { return match.Field; }
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("spanFirst(");
+ buffer.Append(match.ToString(field));
+ buffer.Append(", ");
+ buffer.Append(end);
+ buffer.Append(")");
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ public override System.Object Clone()
+ {
+ SpanFirstQuery spanFirstQuery = new SpanFirstQuery((SpanQuery) match.Clone(), end);
+ spanFirstQuery.Boost = Boost;
+ return spanFirstQuery;
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ match.ExtractTerms(terms);
+ }
+
+ public override Spans GetSpans(IndexReader reader)
+ {
+ return new AnonymousClassSpans(reader, this);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ SpanFirstQuery clone = null;
+
+ SpanQuery rewritten = (SpanQuery) match.Rewrite(reader);
+ if (rewritten != match)
+ {
+ clone = (SpanFirstQuery) this.Clone();
+ clone.match = rewritten;
+ }
+
+ if (clone != null)
+ {
+ return clone; // some clauses rewrote
+ }
+ else
+ {
+ return this; // no clauses rewrote
+ }
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is SpanFirstQuery))
+ return false;
+
+ SpanFirstQuery other = (SpanFirstQuery) o;
+ return this.end == other.end && this.match.Equals(other.match) && this.Boost == other.Boost;
+ }
+
+ public override int GetHashCode()
+ {
+ int h = match.GetHashCode();
+ h ^= ((h << 8) | (Number.URShift(h, 25))); // reversible
+ h ^= System.Convert.ToInt32(Boost) ^ end;
+ return h;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanNearQuery.cs b/src/core/Search/Spans/SpanNearQuery.cs
new file mode 100644
index 0000000..ddcac4f
--- /dev/null
+++ b/src/core/Search/Spans/SpanNearQuery.cs
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Query = Lucene.Net.Search.Query;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>Matches spans which are near one another. One can specify <i>slop</i>, the
+ /// maximum number of intervening unmatched positions, as well as whether
+ /// matches are required to be in-order.
+ /// </summary>
+ [Serializable]
+ public class SpanNearQuery : SpanQuery, System.ICloneable
+ {
+ protected internal System.Collections.Generic.IList<SpanQuery> clauses;
+ protected internal int internalSlop;
+ protected internal bool inOrder;
+
+ protected internal System.String internalField;
+ private readonly bool collectPayloads;
+
+ /// <summary>Construct a SpanNearQuery. Matches spans matching a span from each
+ /// clause, with up to <c>slop</c> total unmatched positions between
+ /// them. * When <c>inOrder</c> is true, the spans from each clause
+ /// must be * ordered as in <c>clauses</c>.
+ /// </summary>
+ public SpanNearQuery(SpanQuery[] clauses, int slop, bool inOrder):this(clauses, slop, inOrder, true)
+ {
+ }
+
+ public SpanNearQuery(SpanQuery[] clauses, int slop, bool inOrder, bool collectPayloads)
+ {
+
+ // copy clauses array into an ArrayList
+ this.clauses = new System.Collections.Generic.List<SpanQuery>(clauses.Length);
+ for (int i = 0; i < clauses.Length; i++)
+ {
+ SpanQuery clause = clauses[i];
+ if (i == 0)
+ {
+ // check field
+ internalField = clause.Field;
+ }
+ else if (!clause.Field.Equals(internalField))
+ {
+ throw new System.ArgumentException("Clauses must have same field.");
+ }
+ this.clauses.Add(clause);
+ }
+ this.collectPayloads = collectPayloads;
+ this.internalSlop = slop;
+ this.inOrder = inOrder;
+ }
+
+ /// <summary>Return the clauses whose spans are matched. </summary>
+ public virtual SpanQuery[] GetClauses()
+ {
+ // Return a copy
+ return clauses.ToArray();
+ }
+
+ /// <summary>Return the maximum number of intervening unmatched positions permitted.</summary>
+ public virtual int Slop
+ {
+ get { return internalSlop; }
+ }
+
+ /// <summary>Return true if matches are required to be in-order.</summary>
+ public virtual bool IsInOrder
+ {
+ get { return inOrder; }
+ }
+
+ public override string Field
+ {
+ get { return internalField; }
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ foreach (SpanQuery clause in clauses)
+ {
+ clause.ExtractTerms(terms);
+ }
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("spanNear([");
+ System.Collections.Generic.IEnumerator<SpanQuery> i = clauses.GetEnumerator();
+ while (i.MoveNext())
+ {
+ SpanQuery clause = i.Current;
+ buffer.Append(clause.ToString(field));
+ buffer.Append(", ");
+ }
+ if (clauses.Count > 0) buffer.Length -= 2;
+ buffer.Append("], ");
+ buffer.Append(internalSlop);
+ buffer.Append(", ");
+ buffer.Append(inOrder);
+ buffer.Append(")");
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ public override Spans GetSpans(IndexReader reader)
+ {
+ if (clauses.Count == 0)
+ // optimize 0-clause case
+ return new SpanOrQuery(GetClauses()).GetSpans(reader);
+
+ if (clauses.Count == 1)
+ // optimize 1-clause case
+ return clauses[0].GetSpans(reader);
+
+ return inOrder?(Spans) new NearSpansOrdered(this, reader, collectPayloads):(Spans) new NearSpansUnordered(this, reader);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ SpanNearQuery clone = null;
+ for (int i = 0; i < clauses.Count; i++)
+ {
+ SpanQuery c = clauses[i];
+ SpanQuery query = (SpanQuery) c.Rewrite(reader);
+ if (query != c)
+ {
+ // clause rewrote: must clone
+ if (clone == null)
+ clone = (SpanNearQuery) this.Clone();
+ clone.clauses[i] = query;
+ }
+ }
+ if (clone != null)
+ {
+ return clone; // some clauses rewrote
+ }
+ else
+ {
+ return this; // no clauses rewrote
+ }
+ }
+
+ public override System.Object Clone()
+ {
+ int sz = clauses.Count;
+ SpanQuery[] newClauses = new SpanQuery[sz];
+
+ for (int i = 0; i < sz; i++)
+ {
+ SpanQuery clause = clauses[i];
+ newClauses[i] = (SpanQuery) clause.Clone();
+ }
+ SpanNearQuery spanNearQuery = new SpanNearQuery(newClauses, internalSlop, inOrder);
+ spanNearQuery.Boost = Boost;
+ return spanNearQuery;
+ }
+
+ /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is SpanNearQuery))
+ return false;
+
+ SpanNearQuery spanNearQuery = (SpanNearQuery) o;
+
+ if (inOrder != spanNearQuery.inOrder)
+ return false;
+ if (internalSlop != spanNearQuery.internalSlop)
+ return false;
+ if (clauses.Count != spanNearQuery.clauses.Count)
+ return false;
+ System.Collections.IEnumerator iter1 = clauses.GetEnumerator();
+ System.Collections.IEnumerator iter2 = spanNearQuery.clauses.GetEnumerator();
+ while (iter1.MoveNext() && iter2.MoveNext())
+ {
+ SpanQuery item1 = (SpanQuery)iter1.Current;
+ SpanQuery item2 = (SpanQuery)iter2.Current;
+ if (!item1.Equals(item2))
+ return false;
+ }
+
+ return Boost == spanNearQuery.Boost;
+ }
+
+ public override int GetHashCode()
+ {
+ long result = 0;
+ //mgarski .NET uses the arraylist's location, not contents to calculate the hash
+ // need to start with result being the hash of the contents.
+ foreach (SpanQuery sq in clauses)
+ {
+ result += sq.GetHashCode();
+ }
+ // Mix bits before folding in things like boost, since it could cancel the
+ // last element of clauses. This particular mix also serves to
+ // differentiate SpanNearQuery hashcodes from others.
+ result ^= ((result << 14) | (Number.URShift(result, 19))); // reversible
+ result += System.Convert.ToInt32(Boost);
+ result += internalSlop;
+ result ^= (inOrder ? (long) 0x99AFD3BD : 0);
+ return (int) result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanNotQuery.cs b/src/core/Search/Spans/SpanNotQuery.cs
new file mode 100644
index 0000000..3f69080
--- /dev/null
+++ b/src/core/Search/Spans/SpanNotQuery.cs
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Query = Lucene.Net.Search.Query;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>Removes matches which overlap with another SpanQuery. </summary>
+ [Serializable]
+ public class SpanNotQuery:SpanQuery, System.ICloneable
+ {
+ private class AnonymousClassSpans : Spans
+ {
+ public AnonymousClassSpans(Lucene.Net.Index.IndexReader reader, SpanNotQuery enclosingInstance)
+ {
+ InitBlock(reader, enclosingInstance);
+ }
+ private void InitBlock(Lucene.Net.Index.IndexReader reader, SpanNotQuery enclosingInstance)
+ {
+ this.reader = reader;
+ this.enclosingInstance = enclosingInstance;
+ includeSpans = Enclosing_Instance.include.GetSpans(reader);
+ excludeSpans = Enclosing_Instance.exclude.GetSpans(reader);
+ moreExclude = excludeSpans.Next();
+ }
+ private Lucene.Net.Index.IndexReader reader;
+ private SpanNotQuery enclosingInstance;
+ public SpanNotQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Spans includeSpans;
+ private bool moreInclude = true;
+
+ private Spans excludeSpans;
+ private bool moreExclude;
+
+ public override bool Next()
+ {
+ if (moreInclude)
+ // move to next include
+ moreInclude = includeSpans.Next();
+
+ while (moreInclude && moreExclude)
+ {
+
+ if (includeSpans.Doc() > excludeSpans.Doc())
+ // skip exclude
+ moreExclude = excludeSpans.SkipTo(includeSpans.Doc());
+
+ while (moreExclude && includeSpans.Doc() == excludeSpans.Doc() && excludeSpans.End() <= includeSpans.Start())
+ {
+ moreExclude = excludeSpans.Next(); // increment exclude
+ }
+
+ if (!moreExclude || includeSpans.Doc() != excludeSpans.Doc() || includeSpans.End() <= excludeSpans.Start())
+ break; // we found a match
+
+ moreInclude = includeSpans.Next(); // intersected: keep scanning
+ }
+ return moreInclude;
+ }
+
+ public override bool SkipTo(int target)
+ {
+ if (moreInclude)
+ // skip include
+ moreInclude = includeSpans.SkipTo(target);
+
+ if (!moreInclude)
+ return false;
+
+ if (moreExclude && includeSpans.Doc() > excludeSpans.Doc())
+ moreExclude = excludeSpans.SkipTo(includeSpans.Doc());
+
+ while (moreExclude && includeSpans.Doc() == excludeSpans.Doc() && excludeSpans.End() <= includeSpans.Start())
+ {
+ moreExclude = excludeSpans.Next(); // increment exclude
+ }
+
+ if (!moreExclude || includeSpans.Doc() != excludeSpans.Doc() || includeSpans.End() <= excludeSpans.Start())
+ return true; // we found a match
+
+ return Next(); // scan to next match
+ }
+
+ public override int Doc()
+ {
+ return includeSpans.Doc();
+ }
+ public override int Start()
+ {
+ return includeSpans.Start();
+ }
+ public override int End()
+ {
+ return includeSpans.End();
+ }
+
+ // TODO: Remove warning after API has been finalizedb
+
+ public override ICollection<byte[]> GetPayload()
+ {
+ System.Collections.Generic.ICollection<byte[]> result = null;
+ if (includeSpans.IsPayloadAvailable())
+ {
+ result = includeSpans.GetPayload();
+ }
+ return result;
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override bool IsPayloadAvailable()
+ {
+ return includeSpans.IsPayloadAvailable();
+ }
+
+ public override System.String ToString()
+ {
+ return "spans(" + Enclosing_Instance.ToString() + ")";
+ }
+ }
+ private SpanQuery include;
+ private SpanQuery exclude;
+
+ /// <summary>Construct a SpanNotQuery matching spans from <c>include</c> which
+ /// have no overlap with spans from <c>exclude</c>.
+ /// </summary>
+ public SpanNotQuery(SpanQuery include, SpanQuery exclude)
+ {
+ this.include = include;
+ this.exclude = exclude;
+
+ if (!include.Field.Equals(exclude.Field))
+ throw new System.ArgumentException("Clauses must have same field.");
+ }
+
+ /// <summary>Return the SpanQuery whose matches are filtered. </summary>
+ public virtual SpanQuery Include
+ {
+ get { return include; }
+ }
+
+ /// <summary>Return the SpanQuery whose matches must not overlap those returned. </summary>
+ public virtual SpanQuery Exclude
+ {
+ get { return exclude; }
+ }
+
+ public override string Field
+ {
+ get { return include.Field; }
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ include.ExtractTerms(terms);
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("spanNot(");
+ buffer.Append(include.ToString(field));
+ buffer.Append(", ");
+ buffer.Append(exclude.ToString(field));
+ buffer.Append(")");
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ public override System.Object Clone()
+ {
+ SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery) include.Clone(), (SpanQuery) exclude.Clone());
+ spanNotQuery.Boost = Boost;
+ return spanNotQuery;
+ }
+
+ public override Spans GetSpans(IndexReader reader)
+ {
+ return new AnonymousClassSpans(reader, this);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ SpanNotQuery clone = null;
+
+ SpanQuery rewrittenInclude = (SpanQuery) include.Rewrite(reader);
+ if (rewrittenInclude != include)
+ {
+ clone = (SpanNotQuery) this.Clone();
+ clone.include = rewrittenInclude;
+ }
+ SpanQuery rewrittenExclude = (SpanQuery) exclude.Rewrite(reader);
+ if (rewrittenExclude != exclude)
+ {
+ if (clone == null)
+ clone = (SpanNotQuery) this.Clone();
+ clone.exclude = rewrittenExclude;
+ }
+
+ if (clone != null)
+ {
+ return clone; // some clauses rewrote
+ }
+ else
+ {
+ return this; // no clauses rewrote
+ }
+ }
+
+ /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is SpanNotQuery))
+ return false;
+
+ SpanNotQuery other = (SpanNotQuery) o;
+ return this.include.Equals(other.include) && this.exclude.Equals(other.exclude) && this.Boost == other.Boost;
+ }
+
+ public override int GetHashCode()
+ {
+ int h = include.GetHashCode();
+ h = (h << 1) | (Number.URShift(h, 31)); // rotate left
+ h ^= exclude.GetHashCode();
+ h = (h << 1) | (Number.URShift(h, 31)); // rotate left
+ h ^= System.Convert.ToInt32(Boost);
+ return h;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanOrQuery.cs b/src/core/Search/Spans/SpanOrQuery.cs
new file mode 100644
index 0000000..cf99b8e
--- /dev/null
+++ b/src/core/Search/Spans/SpanOrQuery.cs
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using Query = Lucene.Net.Search.Query;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>Matches the union of its clauses.</summary>
+ [Serializable]
+ public class SpanOrQuery : SpanQuery, System.ICloneable
+ {
+ private class AnonymousClassSpans : Spans
+ {
+ public AnonymousClassSpans(Lucene.Net.Index.IndexReader reader, SpanOrQuery enclosingInstance)
+ {
+ InitBlock(reader, enclosingInstance);
+ }
+ private void InitBlock(Lucene.Net.Index.IndexReader reader, SpanOrQuery enclosingInstance)
+ {
+ this.reader = reader;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private Lucene.Net.Index.IndexReader reader;
+ private SpanOrQuery enclosingInstance;
+ public SpanOrQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private SpanQueue queue = null;
+
+ private bool InitSpanQueue(int target)
+ {
+ queue = new SpanQueue(enclosingInstance, Enclosing_Instance.clauses.Count);
+ System.Collections.Generic.IEnumerator<SpanQuery> i = Enclosing_Instance.clauses.GetEnumerator();
+ while (i.MoveNext())
+ {
+ Spans spans = i.Current.GetSpans(reader);
+ if (((target == - 1) && spans.Next()) || ((target != - 1) && spans.SkipTo(target)))
+ {
+ queue.Add(spans);
+ }
+ }
+ return queue.Size() != 0;
+ }
+
+ public override bool Next()
+ {
+ if (queue == null)
+ {
+ return InitSpanQueue(- 1);
+ }
+
+ if (queue.Size() == 0)
+ {
+ // all done
+ return false;
+ }
+
+ if (Top().Next())
+ {
+ // move to next
+ queue.UpdateTop();
+ return true;
+ }
+
+ queue.Pop(); // exhausted a clause
+ return queue.Size() != 0;
+ }
+
+ private Spans Top()
+ {
+ return queue.Top();
+ }
+
+ public override bool SkipTo(int target)
+ {
+ if (queue == null)
+ {
+ return InitSpanQueue(target);
+ }
+
+ bool skipCalled = false;
+ while (queue.Size() != 0 && Top().Doc() < target)
+ {
+ if (Top().SkipTo(target))
+ {
+ queue.UpdateTop();
+ }
+ else
+ {
+ queue.Pop();
+ }
+ skipCalled = true;
+ }
+
+ if (skipCalled)
+ {
+ return queue.Size() != 0;
+ }
+ return Next();
+ }
+
+ public override int Doc()
+ {
+ return Top().Doc();
+ }
+ public override int Start()
+ {
+ return Top().Start();
+ }
+ public override int End()
+ {
+ return Top().End();
+ }
+
+ public override ICollection<byte[]> GetPayload()
+ {
+ System.Collections.Generic.ICollection<byte[]> result = null;
+ Spans theTop = Top();
+ if (theTop != null && theTop.IsPayloadAvailable())
+ {
+ result = theTop.GetPayload();
+ }
+ return result;
+ }
+
+ public override bool IsPayloadAvailable()
+ {
+ Spans top = Top();
+ return top != null && top.IsPayloadAvailable();
+ }
+
+ public override System.String ToString()
+ {
+ return "spans(" + Enclosing_Instance + ")@" + ((queue == null)?"START":(queue.Size() > 0?(Doc() + ":" + Start() + "-" + End()):"END"));
+ }
+ }
+
+ private EquatableList<SpanQuery> clauses;
+ private System.String field;
+
+ /// <summary>Construct a SpanOrQuery merging the provided clauses. </summary>
+ public SpanOrQuery(params SpanQuery[] clauses)
+ {
+
+ // copy clauses array into an ArrayList
+ this.clauses = new EquatableList<SpanQuery>(clauses.Length);
+ for (int i = 0; i < clauses.Length; i++)
+ {
+ SpanQuery clause = clauses[i];
+ if (i == 0)
+ {
+ // check field
+ field = clause.Field;
+ }
+ else if (!clause.Field.Equals(field))
+ {
+ throw new System.ArgumentException("Clauses must have same field.");
+ }
+ this.clauses.Add(clause);
+ }
+ }
+
+ /// <summary>Return the clauses whose spans are matched. </summary>
+ public virtual SpanQuery[] GetClauses()
+ {
+ return clauses.ToArray();
+ }
+
+ public override string Field
+ {
+ get { return field; }
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ foreach(SpanQuery clause in clauses)
+ {
+ clause.ExtractTerms(terms);
+ }
+ }
+
+ public override System.Object Clone()
+ {
+ int sz = clauses.Count;
+ SpanQuery[] newClauses = new SpanQuery[sz];
+
+ for (int i = 0; i < sz; i++)
+ {
+ newClauses[i] = (SpanQuery) clauses[i].Clone();
+ }
+ SpanOrQuery soq = new SpanOrQuery(newClauses);
+ soq.Boost = Boost;
+ return soq;
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ SpanOrQuery clone = null;
+ for (int i = 0; i < clauses.Count; i++)
+ {
+ SpanQuery c = clauses[i];
+ SpanQuery query = (SpanQuery) c.Rewrite(reader);
+ if (query != c)
+ {
+ // clause rewrote: must clone
+ if (clone == null)
+ clone = (SpanOrQuery) this.Clone();
+ clone.clauses[i] = query;
+ }
+ }
+ if (clone != null)
+ {
+ return clone; // some clauses rewrote
+ }
+ else
+ {
+ return this; // no clauses rewrote
+ }
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("spanOr([");
+ System.Collections.Generic.IEnumerator<SpanQuery> i = clauses.GetEnumerator();
+ int j = 0;
+ while (i.MoveNext())
+ {
+ j++;
+ SpanQuery clause = i.Current;
+ buffer.Append(clause.ToString(field));
+ if (j < clauses.Count)
+ {
+ buffer.Append(", ");
+ }
+ }
+ buffer.Append("])");
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (o == null || GetType() != o.GetType())
+ return false;
+
+ SpanOrQuery that = (SpanOrQuery) o;
+
+ if (!clauses.Equals(that.clauses))
+ return false;
+ if (!(clauses.Count == 0) && !field.Equals(that.field))
+ return false;
+
+ return Boost == that.Boost;
+ }
+
+ public override int GetHashCode()
+ {
+ int h = clauses.GetHashCode();
+ h ^= ((h << 10) | (Number.URShift(h, 23)));
+ h ^= System.Convert.ToInt32(Boost);
+ return h;
+ }
+
+
+ private class SpanQueue : PriorityQueue<Spans>
+ {
+ private void InitBlock(SpanOrQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SpanOrQuery enclosingInstance;
+ public SpanOrQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public SpanQueue(SpanOrQuery enclosingInstance, int size)
+ {
+ InitBlock(enclosingInstance);
+ Initialize(size);
+ }
+
+ public override bool LessThan(Spans spans1, Spans spans2)
+ {
+ if (spans1.Doc() == spans2.Doc())
+ {
+ if (spans1.Start() == spans2.Start())
+ {
+ return spans1.End() < spans2.End();
+ }
+ else
+ {
+ return spans1.Start() < spans2.Start();
+ }
+ }
+ else
+ {
+ return spans1.Doc() < spans2.Doc();
+ }
+ }
+ }
+
+ public override Spans GetSpans(IndexReader reader)
+ {
+ if (clauses.Count == 1)
+ // optimize 1-clause case
+ return (clauses[0]).GetSpans(reader);
+
+ return new AnonymousClassSpans(reader, this);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanQuery.cs b/src/core/Search/Spans/SpanQuery.cs
new file mode 100644
index 0000000..03a615a
--- /dev/null
+++ b/src/core/Search/Spans/SpanQuery.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Query = Lucene.Net.Search.Query;
+using Searcher = Lucene.Net.Search.Searcher;
+using Weight = Lucene.Net.Search.Weight;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>Base class for span-based queries. </summary>
+ [Serializable]
+ public abstract class SpanQuery:Query
+ {
+ /// <summary>Expert: Returns the matches for this query in an index. Used internally
+ /// to search for spans.
+ /// </summary>
+ public abstract Spans GetSpans(IndexReader reader);
+
+ /// <summary>Returns the name of the field matched by this query.</summary>
+ public abstract string Field { get; }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new SpanWeight(this, searcher);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanScorer.cs b/src/core/Search/Spans/SpanScorer.cs
new file mode 100644
index 0000000..a44ce08
--- /dev/null
+++ b/src/core/Search/Spans/SpanScorer.cs
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Explanation = Lucene.Net.Search.Explanation;
+using Scorer = Lucene.Net.Search.Scorer;
+using Similarity = Lucene.Net.Search.Similarity;
+using Weight = Lucene.Net.Search.Weight;
+
+namespace Lucene.Net.Search.Spans
+{
+ /// <summary> Public for extension only.</summary>
+ public class SpanScorer:Scorer
+ {
+ protected internal Spans spans;
+ protected internal Weight weight;
+ protected internal byte[] norms;
+ protected internal float value_Renamed;
+
+ protected internal bool more = true;
+
+ protected internal int doc;
+ protected internal float freq;
+
+ protected internal SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms):base(similarity)
+ {
+ this.spans = spans;
+ this.norms = norms;
+ this.weight = weight;
+ this.value_Renamed = weight.Value;
+ if (this.spans.Next())
+ {
+ doc = - 1;
+ }
+ else
+ {
+ doc = NO_MORE_DOCS;
+ more = false;
+ }
+ }
+
+ public override int NextDoc()
+ {
+ if (!SetFreqCurrentDoc())
+ {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ if (!more)
+ {
+ return doc = NO_MORE_DOCS;
+ }
+ if (spans.Doc() < target)
+ {
+ // setFreqCurrentDoc() leaves spans.doc() ahead
+ more = spans.SkipTo(target);
+ }
+ if (!SetFreqCurrentDoc())
+ {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+
+ public /*protected internal*/ virtual bool SetFreqCurrentDoc()
+ {
+ if (!more)
+ {
+ return false;
+ }
+ doc = spans.Doc();
+ freq = 0.0f;
+ do
+ {
+ int matchLength = spans.End() - spans.Start();
+ freq += Similarity.SloppyFreq(matchLength);
+ more = spans.Next();
+ }
+ while (more && (doc == spans.Doc()));
+ return true;
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override float Score()
+ {
+ float raw = Similarity.Tf(freq) * value_Renamed; // raw score
+ return norms == null?raw:raw * Similarity.DecodeNorm(norms[doc]); // normalize
+ }
+
+ /// <summary>
+ /// This method is no longer an official member of <see cref="Scorer"/>
+ /// but it is needed by SpanWeight to build an explanation.
+ /// </summary>
+ protected internal virtual Explanation Explain(int doc)
+ {
+ Explanation tfExplanation = new Explanation();
+
+ int expDoc = Advance(doc);
+
+ float phraseFreq = (expDoc == doc)?freq:0.0f;
+ tfExplanation.Value = Similarity.Tf(phraseFreq);
+ tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")";
+
+ return tfExplanation;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanTermQuery.cs b/src/core/Search/Spans/SpanTermQuery.cs
new file mode 100644
index 0000000..d6fa721
--- /dev/null
+++ b/src/core/Search/Spans/SpanTermQuery.cs
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>Matches spans containing a term. </summary>
+ [Serializable]
+ public class SpanTermQuery:SpanQuery
+ {
+ protected internal Term internalTerm;
+
+ /// <summary>Construct a SpanTermQuery matching the named term's spans. </summary>
+ public SpanTermQuery(Term term)
+ {
+ this.internalTerm = term;
+ }
+
+ /// <summary>Return the term whose spans are matched. </summary>
+ public virtual Term Term
+ {
+ get { return internalTerm; }
+ }
+
+ public override string Field
+ {
+ get { return internalTerm.Field; }
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ terms.Add(internalTerm);
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (internalTerm.Field.Equals(field))
+ buffer.Append(internalTerm.Text);
+ else
+ {
+ buffer.Append(internalTerm.ToString());
+ }
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((internalTerm == null)?0:internalTerm.GetHashCode());
+ return result;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!base.Equals(obj))
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ SpanTermQuery other = (SpanTermQuery) obj;
+ if (internalTerm == null)
+ {
+ if (other.internalTerm != null)
+ return false;
+ }
+ else if (!internalTerm.Equals(other.internalTerm))
+ return false;
+ return true;
+ }
+
+ public override Spans GetSpans(IndexReader reader)
+ {
+ return new TermSpans(reader.TermPositions(internalTerm), internalTerm);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/SpanWeight.cs b/src/core/Search/Spans/SpanWeight.cs
new file mode 100644
index 0000000..3590049
--- /dev/null
+++ b/src/core/Search/Spans/SpanWeight.cs
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Lucene.Net.Search;
+using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary> Expert-only. Public for use by other weight implementations</summary>
+ [Serializable]
+ public class SpanWeight:Weight
+ {
+ protected internal Similarity similarity;
+ protected internal float value_Renamed;
+ protected internal float idf;
+ protected internal float queryNorm;
+ protected internal float queryWeight;
+
+ protected internal ISet<Term> terms;
+ protected internal SpanQuery internalQuery;
+ private IDFExplanation idfExp;
+
+ public SpanWeight(SpanQuery query, Searcher searcher)
+ {
+ this.similarity = query.GetSimilarity(searcher);
+ this.internalQuery = query;
+
+ terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<Term>();
+ query.ExtractTerms(terms);
+
+ idfExp = similarity.IdfExplain(terms, searcher);
+ idf = idfExp.Idf;
+ }
+
+ public override Query Query
+ {
+ get { return internalQuery; }
+ }
+
+ public override float Value
+ {
+ get { return value_Renamed; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ queryWeight = idf*internalQuery.Boost; // compute query weight
+ return queryWeight*queryWeight; // square it
+ }
+
+ public override void Normalize(float queryNorm)
+ {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value_Renamed = queryWeight * idf; // idf for document
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ return new SpanScorer(internalQuery.GetSpans(reader), this, similarity, reader.Norms(internalQuery.Field));
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+
+ ComplexExplanation result = new ComplexExplanation();
+ result.Description = "weight(" + Query + " in " + doc + "), product of:";
+ System.String field = ((SpanQuery) Query).Field;
+
+ Explanation idfExpl = new Explanation(idf, "idf(" + field + ": " + idfExp.Explain() + ")");
+
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.Description = "queryWeight(" + Query + "), product of:";
+
+ Explanation boostExpl = new Explanation(Query.Boost, "boost");
+ if (Query.Boost != 1.0f)
+ queryExpl.AddDetail(boostExpl);
+ queryExpl.AddDetail(idfExpl);
+
+ Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
+ queryExpl.AddDetail(queryNormExpl);
+
+ queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value;
+
+ result.AddDetail(queryExpl);
+
+ // explain field weight
+ ComplexExplanation fieldExpl = new ComplexExplanation();
+ fieldExpl.Description = "fieldWeight(" + field + ":" + internalQuery.ToString(field) + " in " + doc + "), product of:";
+
+ Explanation tfExpl = ((SpanScorer)Scorer(reader, true, false)).Explain(doc);
+ fieldExpl.AddDetail(tfExpl);
+ fieldExpl.AddDetail(idfExpl);
+
+ Explanation fieldNormExpl = new Explanation();
+ byte[] fieldNorms = reader.Norms(field);
+ float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f;
+ fieldNormExpl.Value = fieldNorm;
+ fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")";
+ fieldExpl.AddDetail(fieldNormExpl);
+
+ fieldExpl.Match = tfExpl.IsMatch;
+ fieldExpl.Value = tfExpl.Value * idfExpl.Value * fieldNormExpl.Value;
+
+ result.AddDetail(fieldExpl);
+ System.Boolean? tempAux = fieldExpl.Match;
+ result.Match = tempAux;
+
+ // combine them
+ result.Value = queryExpl.Value * fieldExpl.Value;
+
+ if (queryExpl.Value == 1.0f)
+ return fieldExpl;
+
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/Spans.cs b/src/core/Search/Spans/Spans.cs
new file mode 100644
index 0000000..dad803f
--- /dev/null
+++ b/src/core/Search/Spans/Spans.cs
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary>Expert: an enumeration of span matches. Used to implement span searching.
+ /// Each span represents a range of term positions within a document. Matches
+ /// are enumerated in order, by increasing document number, within that by
+ /// increasing start position and finally by increasing end position.
+ /// </summary>
+ public abstract class Spans
+ {
+ /// <summary>Move to the next match, returning true iff any such exists. </summary>
+ public abstract bool Next();
+
+ /// <summary>Skips to the first match beyond the current, whose document number is
+ /// greater than or equal to <i>target</i>. <p/>Returns true iff there is such
+ /// a match. <p/>Behaves as if written: <code>
+ /// boolean skipTo(int target) {
+ /// do {
+ /// if (!next())
+ /// return false;
+ /// } while (target > doc());
+ /// return true;
+ /// }
+ /// </code>
+ /// Most implementations are considerably more efficient than that.
+ /// </summary>
+ public abstract bool SkipTo(int target);
+
+ /// <summary>Returns the document number of the current match. Initially invalid. </summary>
+ public abstract int Doc();
+
+ /// <summary>Returns the start position of the current match. Initially invalid. </summary>
+ public abstract int Start();
+
+ /// <summary>Returns the end position of the current match. Initially invalid. </summary>
+ public abstract int End();
+
+ /// <summary> Returns the payload data for the current span.
+ /// This is invalid until <see cref="Next()" /> is called for
+ /// the first time.
+ /// This method must not be called more than once after each call
+ /// of <see cref="Next()" />. However, most payloads are loaded lazily,
+ /// so if the payload data for the current position is not needed,
+ /// this method may not be called at all for performance reasons. An ordered
+ /// SpanQuery does not lazy load, so if you have payloads in your index and
+ /// you do not want ordered SpanNearQuerys to collect payloads, you can
+ /// disable collection with a constructor option.<br/>
+ ///
+ /// Note that the return type is a collection, thus the ordering should not be relied upon.
+ /// <br/>
+ /// <p/><font color="#FF0000">
+ /// WARNING: The status of the <b>Payloads</b> feature is experimental.
+ /// The APIs introduced here might change in the future and will not be
+ /// supported anymore in such a case.</font><p/>
+ ///
+ /// </summary>
+ /// <returns> a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false </returns>
+ /// <throws> java.io.IOException </throws>
+ // TODO: Remove warning after API has been finalized
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public abstract ICollection<byte[]> GetPayload();
+
+ /// <summary> Checks if a payload can be loaded at this position.
+ /// <p/>
+ /// Payloads can only be loaded once per call to
+ /// <see cref="Next()" />.
+ ///
+ /// </summary>
+ /// <returns> true if there is a payload available at this position that can be loaded </returns>
+ public abstract bool IsPayloadAvailable();
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Spans/TermSpans.cs b/src/core/Search/Spans/TermSpans.cs
new file mode 100644
index 0000000..3e9a3bb
--- /dev/null
+++ b/src/core/Search/Spans/TermSpans.cs
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Term = Lucene.Net.Index.Term;
+using TermPositions = Lucene.Net.Index.TermPositions;
+
+namespace Lucene.Net.Search.Spans
+{
+
+ /// <summary> Expert:
+ /// Public for extension only
+ /// </summary>
+ public class TermSpans:Spans
+ {
+ protected internal TermPositions internalPositions;
+ protected internal Term term;
+ protected internal int internalDoc;
+ protected internal int freq;
+ protected internal int count;
+ protected internal int position;
+
+
+ public TermSpans(TermPositions positions, Term term)
+ {
+
+ this.internalPositions = positions;
+ this.term = term;
+ internalDoc = - 1;
+ }
+
+ public override bool Next()
+ {
+ if (count == freq)
+ {
+ if (!internalPositions.Next())
+ {
+ internalDoc = int.MaxValue;
+ return false;
+ }
+ internalDoc = internalPositions.Doc;
+ freq = internalPositions.Freq;
+ count = 0;
+ }
+ position = internalPositions.NextPosition();
+ count++;
+ return true;
+ }
+
+ public override bool SkipTo(int target)
+ {
+ if (!internalPositions.SkipTo(target))
+ {
+ internalDoc = int.MaxValue;
+ return false;
+ }
+
+ internalDoc = internalPositions.Doc;
+ freq = internalPositions.Freq;
+ count = 0;
+
+ position = internalPositions.NextPosition();
+ count++;
+
+ return true;
+ }
+
+ public override int Doc()
+ {
+ return internalDoc;
+ }
+
+ public override int Start()
+ {
+ return position;
+ }
+
+ public override int End()
+ {
+ return position + 1;
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override ICollection<byte[]> GetPayload()
+ {
+ byte[] bytes = new byte[internalPositions.PayloadLength];
+ bytes = internalPositions.GetPayload(bytes, 0);
+ var val = new System.Collections.Generic.List<byte[]>();
+ val.Add(bytes);
+ return val;
+ }
+
+ // TODO: Remove warning after API has been finalized
+
+ public override bool IsPayloadAvailable()
+ {
+ return internalPositions.IsPayloadAvailable;
+ }
+
+ public override System.String ToString()
+ {
+ return "spans(" + term.ToString() + ")@" + (internalDoc == - 1?"START":((internalDoc == System.Int32.MaxValue)?"END":internalDoc + "-" + position));
+ }
+
+ public virtual TermPositions Positions
+ {
+ get { return internalPositions; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TermQuery.cs b/src/core/Search/TermQuery.cs
new file mode 100644
index 0000000..a04ec7a
--- /dev/null
+++ b/src/core/Search/TermQuery.cs
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using TermDocs = Lucene.Net.Index.TermDocs;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>A Query that matches documents containing a term.
+ /// This may be combined with other terms with a <see cref="BooleanQuery" />.
+ /// </summary>
+ [Serializable]
+ public class TermQuery:Query
+ {
+ private Term term;
+
+ [Serializable]
+ private class TermWeight:Weight
+ {
+ private void InitBlock(TermQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private TermQuery enclosingInstance;
+ public TermQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Similarity similarity;
+ private float value_Renamed;
+ private float idf;
+ private float queryNorm;
+ private float queryWeight;
+ private IDFExplanation idfExp;
+
+ public TermWeight(TermQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+ idfExp = similarity.IdfExplain(Enclosing_Instance.term, searcher);
+ idf = idfExp.Idf;
+ }
+
+ public override System.String ToString()
+ {
+ return "weight(" + Enclosing_Instance + ")";
+ }
+
+ public override Query Query
+ {
+ get { return Enclosing_Instance; }
+ }
+
+ public override float Value
+ {
+ get { return value_Renamed; }
+ }
+
+ public override float GetSumOfSquaredWeights()
+ {
+ queryWeight = idf*Enclosing_Instance.Boost; // compute query weight
+ return queryWeight*queryWeight; // square it
+ }
+
+ public override void Normalize(float queryNorm)
+ {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value_Renamed = queryWeight * idf; // idf for document
+ }
+
+ public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
+ {
+ TermDocs termDocs = reader.TermDocs(Enclosing_Instance.term);
+
+ if (termDocs == null)
+ return null;
+
+ return new TermScorer(this, termDocs, similarity, reader.Norms(Enclosing_Instance.term.Field));
+ }
+
+ public override Explanation Explain(IndexReader reader, int doc)
+ {
+
+ ComplexExplanation result = new ComplexExplanation();
+ result.Description = "weight(" + Query + " in " + doc + "), product of:";
+
+ Explanation expl = new Explanation(idf, idfExp.Explain());
+
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.Description = "queryWeight(" + Query + "), product of:";
+
+ Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");
+ if (Enclosing_Instance.Boost != 1.0f)
+ queryExpl.AddDetail(boostExpl);
+ queryExpl.AddDetail(expl);
+
+ Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
+ queryExpl.AddDetail(queryNormExpl);
+
+ queryExpl.Value = boostExpl.Value * expl.Value * queryNormExpl.Value;
+
+ result.AddDetail(queryExpl);
+
+ // explain field weight
+ System.String field = Enclosing_Instance.term.Field;
+ ComplexExplanation fieldExpl = new ComplexExplanation();
+ fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:";
+
+ Explanation tfExplanation = new Explanation();
+ int tf = 0;
+ TermDocs termDocs = reader.TermDocs(enclosingInstance.term);
+ if (termDocs != null)
+ {
+ try
+ {
+ if (termDocs.SkipTo(doc) && termDocs.Doc == doc)
+ {
+ tf = termDocs.Freq;
+ }
+ }
+ finally
+ {
+ termDocs.Close();
+ }
+ tfExplanation.Value = similarity.Tf(tf);
+ tfExplanation.Description = "tf(termFreq(" + enclosingInstance.term + ")=" + tf + ")";
+ }
+ else
+ {
+ tfExplanation.Value = 0.0f;
+ tfExplanation.Description = "no matching term";
+ }
+ fieldExpl.AddDetail(tfExplanation);
+ fieldExpl.AddDetail(expl);
+
+ Explanation fieldNormExpl = new Explanation();
+ byte[] fieldNorms = reader.Norms(field);
+ float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f;
+ fieldNormExpl.Value = fieldNorm;
+ fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")";
+ fieldExpl.AddDetail(fieldNormExpl);
+
+ fieldExpl.Match = tfExplanation.IsMatch;
+ fieldExpl.Value = tfExplanation.Value * expl.Value * fieldNormExpl.Value;
+
+ result.AddDetail(fieldExpl);
+ System.Boolean? tempAux = fieldExpl.Match;
+ result.Match = tempAux;
+
+ // combine them
+ result.Value = queryExpl.Value * fieldExpl.Value;
+
+ if (queryExpl.Value == 1.0f)
+ return fieldExpl;
+
+ return result;
+ }
+ }
+
+ /// <summary>Constructs a query for the term <c>t</c>. </summary>
+ public TermQuery(Term t)
+ {
+ term = t;
+ }
+
+ /// <summary>Returns the term of this query. </summary>
+ public virtual Term Term
+ {
+ get { return term; }
+ }
+
+ public override Weight CreateWeight(Searcher searcher)
+ {
+ return new TermWeight(this, searcher);
+ }
+
+ public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
+ {
+ terms.Add(Term);
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (!term.Field.Equals(field))
+ {
+ buffer.Append(term.Field);
+ buffer.Append(":");
+ }
+ buffer.Append(term.Text);
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is TermQuery))
+ return false;
+ TermQuery other = (TermQuery) o;
+ return (this.Boost == other.Boost) && this.term.Equals(other.term);
+ }
+
+ /// <summary>Returns a hash code value for this object.</summary>
+ public override int GetHashCode()
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ term.GetHashCode();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TermRangeFilter.cs b/src/core/Search/TermRangeFilter.cs
new file mode 100644
index 0000000..ed157bd
--- /dev/null
+++ b/src/core/Search/TermRangeFilter.cs
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A Filter that restricts search results to a range of values in a given
+ /// field.
+ ///
+ /// <p/>This filter matches the documents looking for terms that fall into the
+ /// supplied range according to <see cref="String.CompareTo(String)" />. It is not intended
+ /// for numerical ranges, use <see cref="NumericRangeFilter{T}" /> instead.
+ ///
+ /// <p/>If you construct a large number of range filters with different ranges but on the
+ /// same field, <see cref="FieldCacheRangeFilter" /> may have significantly better performance.
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+ [Serializable]
+ public class TermRangeFilter:MultiTermQueryWrapperFilter<TermRangeQuery>
+ {
+
+ /// <param name="fieldName">The field this range applies to
+ /// </param>
+ /// <param name="lowerTerm">The lower bound on this range
+ /// </param>
+ /// <param name="upperTerm">The upper bound on this range
+ /// </param>
+ /// <param name="includeLower">Does this range include the lower bound?
+ /// </param>
+ /// <param name="includeUpper">Does this range include the upper bound?
+ /// </param>
+ /// <throws> IllegalArgumentException if both terms are null or if </throws>
+ /// <summary> lowerTerm is null and includeLower is true (similar for upperTerm
+ /// and includeUpper)
+ /// </summary>
+ public TermRangeFilter(System.String fieldName, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper):base(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper))
+ {
+ }
+
+ /// <summary> <strong>WARNING:</strong> Using this constructor and supplying a non-null
+ /// value in the <c>collator</c> parameter will cause every single
+ /// index Term in the Field referenced by lowerTerm and/or upperTerm to be
+ /// examined. Depending on the number of index Terms in this Field, the
+ /// operation could be very slow.
+ ///
+ /// </summary>
+ /// <param name="fieldName"></param>
+ /// <param name="lowerTerm">The lower bound on this range
+ /// </param>
+ /// <param name="upperTerm">The upper bound on this range
+ /// </param>
+ /// <param name="includeLower">Does this range include the lower bound?
+ /// </param>
+ /// <param name="includeUpper">Does this range include the upper bound?
+ /// </param>
+ /// <param name="collator">The collator to use when determining range inclusion; set
+ /// to null to use Unicode code point ordering instead of collation.
+ /// </param>
+ /// <throws> IllegalArgumentException if both terms are null or if </throws>
+ /// <summary> lowerTerm is null and includeLower is true (similar for upperTerm
+ /// and includeUpper)
+ /// </summary>
+ public TermRangeFilter(System.String fieldName, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper, System.Globalization.CompareInfo collator):base(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator))
+ {
+ }
+
+ /// <summary> Constructs a filter for field <c>fieldName</c> matching
+ /// less than or equal to <c>upperTerm</c>.
+ /// </summary>
+ public static TermRangeFilter Less(System.String fieldName, System.String upperTerm)
+ {
+ return new TermRangeFilter(fieldName, null, upperTerm, false, true);
+ }
+
+ /// <summary> Constructs a filter for field <c>fieldName</c> matching
+ /// greater than or equal to <c>lowerTerm</c>.
+ /// </summary>
+ public static TermRangeFilter More(System.String fieldName, System.String lowerTerm)
+ {
+ return new TermRangeFilter(fieldName, lowerTerm, null, true, false);
+ }
+
+ /// <summary>Returns the field name for this filter </summary>
+ public virtual string Field
+ {
+ get { return query.Field; }
+ }
+
+ /// <summary>Returns the lower value of this range filter </summary>
+ public virtual string LowerTerm
+ {
+ get { return query.LowerTerm; }
+ }
+
+ /// <summary>Returns the upper value of this range filter </summary>
+ public virtual string UpperTerm
+ {
+ get { return query.UpperTerm; }
+ }
+
+ /// <summary>Returns <c>true</c> if the lower endpoint is inclusive </summary>
+ public virtual bool IncludesLower
+ {
+ get { return query.IncludesLower; }
+ }
+
+ /// <summary>Returns <c>true</c> if the upper endpoint is inclusive </summary>
+ public virtual bool IncludesUpper
+ {
+ get { return query.IncludesUpper; }
+ }
+
+ /// <summary>Returns the collator used to determine range inclusion, if any. </summary>
+ public virtual CompareInfo Collator
+ {
+ get { return query.Collator; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TermRangeQuery.cs b/src/core/Search/TermRangeQuery.cs
new file mode 100644
index 0000000..a27b18e
--- /dev/null
+++ b/src/core/Search/TermRangeQuery.cs
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Globalization;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A Query that matches documents within an exclusive range of terms.
+ ///
+ /// <p/>This query matches the documents looking for terms that fall into the
+ /// supplied range according to <see cref="String.CompareTo(String)" />. It is not intended
+ /// for numerical ranges, use <see cref="NumericRangeQuery{T}" /> instead.
+ ///
+ /// <p/>This query uses the <see cref="MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT" />
+ ///
+ /// rewrite method.
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+
+ [Serializable]
+ public class TermRangeQuery:MultiTermQuery
+ {
+ private System.String lowerTerm;
+ private System.String upperTerm;
+ private System.Globalization.CompareInfo collator;
+ private System.String field;
+ private bool includeLower;
+ private bool includeUpper;
+
+
+ /// <summary> Constructs a query selecting all terms greater/equal than <c>lowerTerm</c>
+ /// but less/equal than <c>upperTerm</c>.
+ ///
+ /// <p/>
+ /// If an endpoint is null, it is said
+ /// to be "open". Either or both endpoints may be open. Open endpoints may not
+ /// be exclusive (you can't select all but the first or last term without
+ /// explicitly specifying the term to exclude.)
+ ///
+ /// </summary>
+ /// <param name="field">The field that holds both lower and upper terms.
+ /// </param>
+ /// <param name="lowerTerm">The term text at the lower end of the range
+ /// </param>
+ /// <param name="upperTerm">The term text at the upper end of the range
+ /// </param>
+ /// <param name="includeLower">If true, the <c>lowerTerm</c> is
+ /// included in the range.
+ /// </param>
+ /// <param name="includeUpper">If true, the <c>upperTerm</c> is
+ /// included in the range.
+ /// </param>
+ public TermRangeQuery(System.String field, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper):this(field, lowerTerm, upperTerm, includeLower, includeUpper, null)
+ {
+ }
+
+ /// <summary>Constructs a query selecting all terms greater/equal than
+ /// <c>lowerTerm</c> but less/equal than <c>upperTerm</c>.
+ /// <p/>
+ /// If an endpoint is null, it is said
+ /// to be "open". Either or both endpoints may be open. Open endpoints may not
+ /// be exclusive (you can't select all but the first or last term without
+ /// explicitly specifying the term to exclude.)
+ /// <p/>
+ /// If <c>collator</c> is not null, it will be used to decide whether
+ /// index terms are within the given range, rather than using the Unicode code
+ /// point order in which index terms are stored.
+ /// <p/>
+ /// <strong>WARNING:</strong> Using this constructor and supplying a non-null
+ /// value in the <c>collator</c> parameter will cause every single
+ /// index Term in the Field referenced by lowerTerm and/or upperTerm to be
+ /// examined. Depending on the number of index Terms in this Field, the
+ /// operation could be very slow.
+ ///
+ /// </summary>
+ /// <param name="field"></param>
+ /// <param name="lowerTerm">The Term text at the lower end of the range
+ /// </param>
+ /// <param name="upperTerm">The Term text at the upper end of the range
+ /// </param>
+ /// <param name="includeLower">If true, the <c>lowerTerm</c> is
+ /// included in the range.
+ /// </param>
+ /// <param name="includeUpper">If true, the <c>upperTerm</c> is
+ /// included in the range.
+ /// </param>
+ /// <param name="collator">The collator to use to collate index Terms, to determine
+ /// their membership in the range bounded by <c>lowerTerm</c> and
+ /// <c>upperTerm</c>.
+ /// </param>
+ public TermRangeQuery(System.String field, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper, System.Globalization.CompareInfo collator)
+ {
+ this.field = field;
+ this.lowerTerm = lowerTerm;
+ this.upperTerm = upperTerm;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ this.collator = collator;
+ }
+
+ /// <summary>Returns the field name for this query </summary>
+ public virtual string Field
+ {
+ get { return field; }
+ }
+
+ /// <summary>Returns the lower value of this range query </summary>
+ public virtual string LowerTerm
+ {
+ get { return lowerTerm; }
+ }
+
+ /// <summary>Returns the upper value of this range query </summary>
+ public virtual string UpperTerm
+ {
+ get { return upperTerm; }
+ }
+
+ /// <summary>Returns <c>true</c> if the lower endpoint is inclusive </summary>
+ public virtual bool IncludesLower
+ {
+ get { return includeLower; }
+ }
+
+ /// <summary>Returns <c>true</c> if the upper endpoint is inclusive </summary>
+ public virtual bool IncludesUpper
+ {
+ get { return includeUpper; }
+ }
+
+ /// <summary>Returns the collator used to determine range inclusion, if any. </summary>
+ public virtual CompareInfo Collator
+ {
+ get { return collator; }
+ }
+
+ protected internal override FilteredTermEnum GetEnum(IndexReader reader)
+ {
+ return new TermRangeTermEnum(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (!Field.Equals(field))
+ {
+ buffer.Append(Field);
+ buffer.Append(":");
+ }
+ buffer.Append(includeLower?'[':'{');
+ buffer.Append(lowerTerm != null?lowerTerm:"*");
+ buffer.Append(" TO ");
+ buffer.Append(upperTerm != null?upperTerm:"*");
+ buffer.Append(includeUpper?']':'}');
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ //@Override
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((collator == null)?0:collator.GetHashCode());
+ result = prime * result + ((field == null)?0:field.GetHashCode());
+ result = prime * result + (includeLower?1231:1237);
+ result = prime * result + (includeUpper?1231:1237);
+ result = prime * result + ((lowerTerm == null)?0:lowerTerm.GetHashCode());
+ result = prime * result + ((upperTerm == null)?0:upperTerm.GetHashCode());
+ return result;
+ }
+
+ //@Override
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!base.Equals(obj))
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ TermRangeQuery other = (TermRangeQuery) obj;
+ if (collator == null)
+ {
+ if (other.collator != null)
+ return false;
+ }
+ else if (!collator.Equals(other.collator))
+ return false;
+ if (field == null)
+ {
+ if (other.field != null)
+ return false;
+ }
+ else if (!field.Equals(other.field))
+ return false;
+ if (includeLower != other.includeLower)
+ return false;
+ if (includeUpper != other.includeUpper)
+ return false;
+ if (lowerTerm == null)
+ {
+ if (other.lowerTerm != null)
+ return false;
+ }
+ else if (!lowerTerm.Equals(other.lowerTerm))
+ return false;
+ if (upperTerm == null)
+ {
+ if (other.upperTerm != null)
+ return false;
+ }
+ else if (!upperTerm.Equals(other.upperTerm))
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TermRangeTermEnum.cs b/src/core/Search/TermRangeTermEnum.cs
new file mode 100644
index 0000000..fa03ff2
--- /dev/null
+++ b/src/core/Search/TermRangeTermEnum.cs
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Subclass of FilteredTermEnum for enumerating all terms that match the
+ /// specified range parameters.
+ /// <p/>
+ /// Term enumerations are always ordered by Term.compareTo(). Each term in
+ /// the enumeration is greater than all that precede it.
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+ public class TermRangeTermEnum:FilteredTermEnum
+ {
+
+ private System.Globalization.CompareInfo collator = null;
+ private bool endEnum = false;
+ private System.String field;
+ private System.String upperTermText;
+ private System.String lowerTermText;
+ private bool includeLower;
+ private bool includeUpper;
+
+ /// <summary> Enumerates all terms greater/equal than <c>lowerTerm</c>
+ /// but less/equal than <c>upperTerm</c>.
+ ///
+ /// If an endpoint is null, it is said to be "open". Either or both
+ /// endpoints may be open. Open endpoints may not be exclusive
+ /// (you can't select all but the first or last term without
+ /// explicitly specifying the term to exclude.)
+ ///
+ /// </summary>
+ /// <param name="reader">
+ /// </param>
+ /// <param name="field">An interned field that holds both lower and upper terms.
+ /// </param>
+ /// <param name="lowerTermText">The term text at the lower end of the range
+ /// </param>
+ /// <param name="upperTermText">The term text at the upper end of the range
+ /// </param>
+ /// <param name="includeLower">If true, the <c>lowerTerm</c> is included in the range.
+ /// </param>
+ /// <param name="includeUpper">If true, the <c>upperTerm</c> is included in the range.
+ /// </param>
+ /// <param name="collator">The collator to use to collate index Terms, to determine their
+ /// membership in the range bounded by <c>lowerTerm</c> and
+ /// <c>upperTerm</c>.
+ ///
+ /// </param>
+ /// <throws> IOException </throws>
+ public TermRangeTermEnum(IndexReader reader, System.String field, System.String lowerTermText, System.String upperTermText, bool includeLower, bool includeUpper, System.Globalization.CompareInfo collator)
+ {
+ this.collator = collator;
+ this.upperTermText = upperTermText;
+ this.lowerTermText = lowerTermText;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ this.field = StringHelper.Intern(field);
+
+ // do a little bit of normalization...
+ // open ended range queries should always be inclusive.
+ if (this.lowerTermText == null)
+ {
+ this.lowerTermText = "";
+ this.includeLower = true;
+ }
+
+ if (this.upperTermText == null)
+ {
+ this.includeUpper = true;
+ }
+
+ System.String startTermText = collator == null?this.lowerTermText:"";
+ SetEnum(reader.Terms(new Term(this.field, startTermText)));
+ }
+
+ public override float Difference()
+ {
+ return 1.0f;
+ }
+
+ public override bool EndEnum()
+ {
+ return endEnum;
+ }
+
+ protected internal override bool TermCompare(Term term)
+ {
+ if (collator == null)
+ {
+ // Use Unicode code point ordering
+ bool checkLower = !includeLower;
+ if (term != null && (System.Object) term.Field == (System.Object) field)
+ {
+ // interned comparison
+ if (!checkLower || null == lowerTermText || String.CompareOrdinal(term.Text, lowerTermText) > 0)
+ {
+ checkLower = false;
+ if (upperTermText != null)
+ {
+ int compare = String.CompareOrdinal(upperTermText, term.Text);
+ /*
+ * if beyond the upper term, or is exclusive and this is equal to
+ * the upper term, break out
+ */
+ if ((compare < 0) || (!includeUpper && compare == 0))
+ {
+ endEnum = true;
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+ else
+ {
+ // break
+ endEnum = true;
+ return false;
+ }
+ return false;
+ }
+ else
+ {
+ if (term != null && (System.Object) term.Field == (System.Object) field)
+ {
+ // interned comparison
+ if ((lowerTermText == null || (includeLower?collator.Compare(term.Text.ToString(), lowerTermText.ToString()) >= 0:collator.Compare(term.Text.ToString(), lowerTermText.ToString()) > 0)) && (upperTermText == null || (includeUpper?collator.Compare(term.Text.ToString(), upperTermText.ToString()) <= 0:collator.Compare(term.Text.ToString(), upperTermText.ToString()) < 0)))
+ {
+ return true;
+ }
+ return false;
+ }
+ endEnum = true;
+ return false;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TermScorer.cs b/src/core/Search/TermScorer.cs
new file mode 100644
index 0000000..88863bb
--- /dev/null
+++ b/src/core/Search/TermScorer.cs
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using TermDocs = Lucene.Net.Index.TermDocs;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Expert: A <c>Scorer</c> for documents matching a <c>Term</c>.</summary>
+ public sealed class TermScorer:Scorer
+ {
+
+ private static readonly float[] SIM_NORM_DECODER;
+
+ private Weight weight;
+ private TermDocs termDocs;
+ private byte[] norms;
+ private float weightValue;
+ private int doc = - 1;
+
+ private int[] docs = new int[32]; // buffered doc numbers
+ private int[] freqs = new int[32]; // buffered term freqs
+ private int pointer;
+ private int pointerMax;
+
+ private const int SCORE_CACHE_SIZE = 32;
+ private float[] scoreCache = new float[SCORE_CACHE_SIZE];
+
+ /// <summary> Construct a <c>TermScorer</c>.
+ ///
+ /// </summary>
+ /// <param name="weight">The weight of the <c>Term</c> in the query.
+ /// </param>
+ /// <param name="td">An iterator over the documents matching the <c>Term</c>.
+ /// </param>
+ /// <param name="similarity">The <c>Similarity</c> implementation to be used for score
+ /// computations.
+ /// </param>
+ /// <param name="norms">The field norms of the document fields for the <c>Term</c>.
+ /// </param>
+ public /*internal*/ TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms):base(similarity)
+ {
+ this.weight = weight;
+ this.termDocs = td;
+ this.norms = norms;
+ this.weightValue = weight.Value;
+
+ for (int i = 0; i < SCORE_CACHE_SIZE; i++)
+ scoreCache[i] = Similarity.Tf(i) * weightValue;
+ }
+
+ public override void Score(Collector c)
+ {
+ Score(c, System.Int32.MaxValue, NextDoc());
+ }
+
+ // firstDocID is ignored since nextDoc() sets 'doc'
+ public /*protected internal*/ override bool Score(Collector c, int end, int firstDocID)
+ {
+ c.SetScorer(this);
+ while (doc < end)
+ {
+ // for docs in window
+ c.Collect(doc); // collect score
+
+ if (++pointer >= pointerMax)
+ {
+ pointerMax = termDocs.Read(docs, freqs); // refill buffers
+ if (pointerMax != 0)
+ {
+ pointer = 0;
+ }
+ else
+ {
+ termDocs.Close(); // close stream
+ doc = System.Int32.MaxValue; // set to sentinel value
+ return false;
+ }
+ }
+ doc = docs[pointer];
+ }
+ return true;
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ /// <summary> Advances to the next document matching the query. <br/>
+ /// The iterator over the matching documents is buffered using
+ /// <see cref="TermDocs.Read(int[],int[])" />.
+ ///
+ /// </summary>
+ /// <returns> the document matching the query or -1 if there are no more documents.
+ /// </returns>
+ public override int NextDoc()
+ {
+ pointer++;
+ if (pointer >= pointerMax)
+ {
+ pointerMax = termDocs.Read(docs, freqs); // refill buffer
+ if (pointerMax != 0)
+ {
+ pointer = 0;
+ }
+ else
+ {
+ termDocs.Close(); // close stream
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ doc = docs[pointer];
+ return doc;
+ }
+
+ public override float Score()
+ {
+ System.Diagnostics.Debug.Assert(doc != - 1);
+ int f = freqs[pointer];
+ float raw = f < SCORE_CACHE_SIZE?scoreCache[f]:Similarity.Tf(f) * weightValue; // cache miss
+
+ return norms == null?raw:raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field
+ }
+
+ /// <summary> Advances to the first match beyond the current whose document number is
+ /// greater than or equal to a given target. <br/>
+ /// The implementation uses <see cref="TermDocs.SkipTo(int)" />.
+ ///
+ /// </summary>
+ /// <param name="target">The target document number.
+ /// </param>
+ /// <returns> the matching document or -1 if none exist.
+ /// </returns>
+ public override int Advance(int target)
+ {
+ // first scan in cache
+ for (pointer++; pointer < pointerMax; pointer++)
+ {
+ if (docs[pointer] >= target)
+ {
+ return doc = docs[pointer];
+ }
+ }
+
+ // not found in cache, seek underlying stream
+ bool result = termDocs.SkipTo(target);
+ if (result)
+ {
+ pointerMax = 1;
+ pointer = 0;
+ docs[pointer] = doc = termDocs.Doc;
+ freqs[pointer] = termDocs.Freq;
+ }
+ else
+ {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+
+ /// <summary>Returns a string representation of this <c>TermScorer</c>. </summary>
+ public override System.String ToString()
+ {
+ return "scorer(" + weight + ")";
+ }
+ static TermScorer()
+ {
+ SIM_NORM_DECODER = Search.Similarity.GetNormDecoder();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TimeLimitingCollector.cs b/src/core/Search/TimeLimitingCollector.cs
new file mode 100644
index 0000000..2917df3
--- /dev/null
+++ b/src/core/Search/TimeLimitingCollector.cs
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> The <see cref="TimeLimitingCollector" /> is used to timeout search requests that
+ /// take longer than the maximum allowed search time limit. After this time is
+ /// exceeded, the search thread is stopped by throwing a
+ /// <see cref="TimeExceededException" />.
+ /// </summary>
+ public class TimeLimitingCollector:Collector
+ {
+ private void InitBlock()
+ {
+ greedy = DEFAULT_GREEDY;
+ }
+
+ /// <summary> Default timer resolution.</summary>
+ /// <seealso cref="Resolution">
+ /// </seealso>
+ public const int DEFAULT_RESOLUTION = 20;
+
+ /// <summary> Default for <see cref="IsGreedy()" />.</summary>
+ /// <seealso cref="IsGreedy()">
+ /// </seealso>
+ public bool DEFAULT_GREEDY = false;
+
+ private static uint resolution = DEFAULT_RESOLUTION;
+
+ private bool greedy;
+
+ private sealed class TimerThread:ThreadClass
+ {
+
+ // NOTE: we can avoid explicit synchronization here for several reasons:
+ // * updates to volatile long variables are atomic
+ // * only single thread modifies this value
+ // * use of volatile keyword ensures that it does not reside in
+ // a register, but in main memory (so that changes are visible to
+ // other threads).
+ // * visibility of changes does not need to be instantanous, we can
+ // afford losing a tick or two.
+ //
+ // See section 17 of the Java Language Specification for details.
+ private volatile uint time = 0;
+
+ /// <summary> TimerThread provides a pseudo-clock service to all searching
+ /// threads, so that they can count elapsed time with less overhead
+ /// than repeatedly calling System.currentTimeMillis. A single
+ /// thread should be created to be used for all searches.
+ /// </summary>
+ internal TimerThread():base("TimeLimitedCollector timer thread")
+ {
+ this.IsBackground = true;
+ }
+
+ override public void Run()
+ {
+ while (true)
+ {
+ // TODO: Use System.nanoTime() when Lucene moves to Java SE 5.
+ time += Lucene.Net.Search.TimeLimitingCollector.resolution;
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Search.TimeLimitingCollector.resolution));
+
+ }
+ }
+
+ /// <summary> Get the timer value in milliseconds.</summary>
+ public long Milliseconds
+ {
+ get { return time; }
+ }
+ }
+
+ /// <summary>Thrown when elapsed search time exceeds allowed search time. </summary>
+ [Serializable]
+ public class TimeExceededException:System.SystemException
+ {
+ private long timeAllowed;
+ private long timeElapsed;
+ private int lastDocCollected;
+ internal TimeExceededException(long timeAllowed, long timeElapsed, int lastDocCollected):base("Elapsed time: " + timeElapsed + "Exceeded allowed search time: " + timeAllowed + " ms.")
+ {
+ this.timeAllowed = timeAllowed;
+ this.timeElapsed = timeElapsed;
+ this.lastDocCollected = lastDocCollected;
+ }
+
+ /// <summary>Returns allowed time (milliseconds). </summary>
+ public virtual long TimeAllowed
+ {
+ get { return timeAllowed; }
+ }
+
+ /// <summary>Returns elapsed time (milliseconds). </summary>
+ public virtual long TimeElapsed
+ {
+ get { return timeElapsed; }
+ }
+
+ /// <summary>Returns last doc(absolute doc id) that was collected when the search time exceeded. </summary>
+ public virtual int LastDocCollected
+ {
+ get { return lastDocCollected; }
+ }
+ }
+
+ // Declare and initialize a single static timer thread to be used by
+ // all TimeLimitedCollector instances. The JVM assures that
+ // this only happens once.
+ private static readonly TimerThread TIMER_THREAD = new TimerThread();
+
+ private long t0;
+ private long timeout;
+ private Collector collector;
+
+ private int docBase;
+
+ /// <summary> Create a TimeLimitedCollector wrapper over another <see cref="Collector" /> with a specified timeout.</summary>
+ /// <param name="collector">the wrapped <see cref="Collector" />
+ /// </param>
+ /// <param name="timeAllowed">max time allowed for collecting hits after which <see cref="TimeExceededException" /> is thrown
+ /// </param>
+ public TimeLimitingCollector(Collector collector, long timeAllowed)
+ {
+ InitBlock();
+ this.collector = collector;
+ t0 = TIMER_THREAD.Milliseconds;
+ this.timeout = t0 + timeAllowed;
+ }
+
+ /// <summary>
+ /// Gets or sets the timer resolution.
+ /// The default timer resolution is 20 milliseconds.
+ /// This means that a search required to take no longer than
+ /// 800 milliseconds may be stopped after 780 to 820 milliseconds.
+ /// <br/>Note that:
+ /// <list type="bullet">
+ /// <item>Finer (smaller) resolution is more accurate but less efficient.</item>
+ /// <item>Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds.</item>
+ /// <item>Setting resolution smaller than current resolution might take effect only after current
+ /// resolution. (Assume current resolution of 20 milliseconds is modified to 5 milliseconds,
+ /// then it can take up to 20 milliseconds for the change to have effect.</item>
+ /// </list>
+ /// </summary>
+ public static long Resolution
+ {
+ get { return resolution; }
+ set
+ {
+ // 5 milliseconds is about the minimum reasonable time for a Object.wait(long) call.
+ resolution = (uint)System.Math.Max(value, 5);
+ }
+ }
+
+ /// <summary> Checks if this time limited collector is greedy in collecting the last hit.
+ /// A non greedy collector, upon a timeout, would throw a <see cref="TimeExceededException" />
+ /// without allowing the wrapped collector to collect current doc. A greedy one would
+ /// first allow the wrapped hit collector to collect current doc and only then
+ /// throw a <see cref="TimeExceededException" />.
+ /// </summary>
+ public virtual bool IsGreedy
+ {
+ get { return greedy; }
+ set { this.greedy = value; }
+ }
+
+ /// <summary> Calls <see cref="Collector.Collect(int)" /> on the decorated <see cref="Collector" />
+ /// unless the allowed time has passed, in which case it throws an exception.
+ ///
+ /// </summary>
+ /// <throws> TimeExceededException </throws>
+ /// <summary> if the time allowed has exceeded.
+ /// </summary>
+ public override void Collect(int doc)
+ {
+ long time = TIMER_THREAD.Milliseconds;
+ if (timeout < time)
+ {
+ if (greedy)
+ {
+ //System.out.println(this+" greedy: before failing, collecting doc: "+doc+" "+(time-t0));
+ collector.Collect(doc);
+ }
+ //System.out.println(this+" failing on: "+doc+" "+(time-t0));
+ throw new TimeExceededException(timeout - t0, time - t0, docBase + doc);
+ }
+ //System.out.println(this+" collecting: "+doc+" "+(time-t0));
+ collector.Collect(doc);
+ }
+
+ public override void SetNextReader(IndexReader reader, int base_Renamed)
+ {
+ collector.SetNextReader(reader, base_Renamed);
+ this.docBase = base_Renamed;
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ collector.SetScorer(scorer);
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return collector.AcceptsDocsOutOfOrder; }
+ }
+
+ static TimeLimitingCollector()
+ {
+ {
+ TIMER_THREAD.Start();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TopDocs.cs b/src/core/Search/TopDocs.cs
new file mode 100644
index 0000000..142e703
--- /dev/null
+++ b/src/core/Search/TopDocs.cs
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Represents hits returned by <see cref="Searcher.Search(Query,Filter,int)" />
+ /// and <see cref="Searcher.Search(Query,int)" />
+ /// </summary>
+ [Serializable]
+ public class TopDocs
+ {
+ private int _totalHits;
+ private ScoreDoc[] _scoreDocs;
+ private float _maxScore;
+
+ /// <summary>The total number of hits for the query.</summary>
+ public int TotalHits
+ {
+ get { return _totalHits; }
+ set { _totalHits = value; }
+ }
+
+ /// <summary>The top hits for the query. </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1819:PropertiesShouldNotReturnArrays")]
+ public ScoreDoc[] ScoreDocs
+ {
+ get { return _scoreDocs; }
+ set { _scoreDocs = value; }
+ }
+
+ /// <summary>
+ /// Gets or sets the maximum score value encountered, needed for normalizing.
+ /// Note that in case scores are not tracked, this returns <see cref="float.NaN" />.
+ /// </summary>
+ public float MaxScore
+ {
+ get { return _maxScore; }
+ set { _maxScore = value; }
+ }
+
+ /// <summary>Constructs a TopDocs with a default maxScore=Float.NaN. </summary>
+ internal TopDocs(int totalHits, ScoreDoc[] scoreDocs):this(totalHits, scoreDocs, float.NaN)
+ {
+ }
+
+ /// <summary></summary>
+ public TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore)
+ {
+ this.TotalHits = totalHits;
+ this.ScoreDocs = scoreDocs;
+ this.MaxScore = maxScore;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TopDocsCollector.cs b/src/core/Search/TopDocsCollector.cs
new file mode 100644
index 0000000..f947e5c
--- /dev/null
+++ b/src/core/Search/TopDocsCollector.cs
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search
+{
+ /// <summary> A base class for all collectors that return a <see cref="Lucene.Net.Search.TopDocs" /> output. This
+ /// collector allows easy extension by providing a single constructor which
+ /// accepts a <see cref="PriorityQueue{T}" /> as well as protected members for that
+ /// priority queue and a counter of the number of total hits.<br/>
+ /// Extending classes can override <see cref="TopDocs(int, int)" /> and
+ /// <see cref="TotalHits" /> in order to provide their own implementation.
+ /// </summary>
+ public abstract class TopDocsCollector<T> : Collector where T : ScoreDoc
+ {
+
+ // This is used in case topDocs() is called with illegal parameters, or there
+ // simply aren't (enough) results.
+ protected internal static readonly TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], System.Single.NaN);
+
+ /// <summary> The priority queue which holds the top documents. Note that different
+ /// implementations of PriorityQueue give different meaning to 'top documents'.
+ /// HitQueue for example aggregates the top scoring documents, while other PQ
+ /// implementations may hold documents sorted by other criteria.
+ /// </summary>
+ protected internal PriorityQueue<T> pq;
+
+ /// <summary>The total number of documents that the collector encountered. </summary>
+ protected internal int internalTotalHits;
+
+ protected internal TopDocsCollector(PriorityQueue<T> pq)
+ {
+ this.pq = pq;
+ }
+
+ /// <summary> Populates the results array with the ScoreDoc instaces. This can be
+ /// overridden in case a different ScoreDoc type should be returned.
+ /// </summary>
+ protected internal virtual void PopulateResults(ScoreDoc[] results, int howMany)
+ {
+ for (int i = howMany - 1; i >= 0; i--)
+ {
+ results[i] = pq.Pop();
+ }
+ }
+
+ /// <summary> Returns a <see cref="Lucene.Net.Search.TopDocs" /> instance containing the given results. If
+ /// <c>results</c> is null it means there are no results to return,
+ /// either because there were 0 calls to collect() or because the arguments to
+ /// topDocs were invalid.
+ /// </summary>
+ public /*protected internal*/ virtual TopDocs NewTopDocs(ScoreDoc[] results, int start)
+ {
+ return results == null?EMPTY_TOPDOCS:new TopDocs(internalTotalHits, results);
+ }
+
+ /// <summary>The total number of documents that matched this query. </summary>
+ public virtual int TotalHits
+ {
+ get { return internalTotalHits; }
+ }
+
+ /// <summary>Returns the top docs that were collected by this collector. </summary>
+ public TopDocs TopDocs()
+ {
+ // In case pq was populated with sentinel values, there might be less
+ // results than pq.size(). Therefore return all results until either
+ // pq.size() or totalHits.
+ return TopDocs(0, internalTotalHits < pq.Size()?internalTotalHits:pq.Size());
+ }
+
+ /// <summary> Returns the documents in the rage [start .. pq.size()) that were collected
+ /// by this collector. Note that if start >= pq.size(), an empty TopDocs is
+ /// returned.<br/>
+ /// This method is convenient to call if the application allways asks for the
+ /// last results, starting from the last 'page'.<br/>
+ /// <b>NOTE:</b> you cannot call this method more than once for each search
+ /// execution. If you need to call it more than once, passing each time a
+ /// different <c>start</c>, you should call <see cref="TopDocs()" /> and work
+ /// with the returned <see cref="Lucene.Net.Search.TopDocs" /> object, which will contain all the
+ /// results this search execution collected.
+ /// </summary>
+ public TopDocs TopDocs(int start)
+ {
+ // In case pq was populated with sentinel values, there might be less
+ // results than pq.size(). Therefore return all results until either
+ // pq.size() or totalHits.
+ return TopDocs(start, internalTotalHits < pq.Size()?internalTotalHits:pq.Size());
+ }
+
+ /// <summary> Returns the documents in the rage [start .. start+howMany) that were
+ /// collected by this collector. Note that if start >= pq.size(), an empty
+ /// TopDocs is returned, and if pq.size() - start &lt; howMany, then only the
+ /// available documents in [start .. pq.size()) are returned.<br/>
+ /// This method is useful to call in case pagination of search results is
+ /// allowed by the search application, as well as it attempts to optimize the
+ /// memory used by allocating only as much as requested by howMany.<br/>
+ /// <b>NOTE:</b> you cannot call this method more than once for each search
+ /// execution. If you need to call it more than once, passing each time a
+ /// different range, you should call <see cref="TopDocs()" /> and work with the
+ /// returned <see cref="Lucene.Net.Search.TopDocs" /> object, which will contain all the results this
+ /// search execution collected.
+ /// </summary>
+ public TopDocs TopDocs(int start, int howMany)
+ {
+
+ // In case pq was populated with sentinel values, there might be less
+ // results than pq.size(). Therefore return all results until either
+ // pq.size() or totalHits.
+ int size = internalTotalHits < pq.Size()?internalTotalHits:pq.Size();
+
+ // Don't bother to throw an exception, just return an empty TopDocs in case
+ // the parameters are invalid or out of range.
+ if (start < 0 || start >= size || howMany <= 0)
+ {
+ return NewTopDocs(null, start);
+ }
+
+ // We know that start < pqsize, so just fix howMany.
+ howMany = System.Math.Min(size - start, howMany);
+ ScoreDoc[] results = new ScoreDoc[howMany];
+
+ // pq's pop() returns the 'least' element in the queue, therefore need
+ // to discard the first ones, until we reach the requested range.
+ // Note that this loop will usually not be executed, since the common usage
+ // should be that the caller asks for the last howMany results. However it's
+ // needed here for completeness.
+ for (int i = pq.Size() - start - howMany; i > 0; i--)
+ {
+ pq.Pop();
+ }
+
+ // Get the requested results from pq.
+ PopulateResults(results, howMany);
+
+ return NewTopDocs(results, start);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TopFieldCollector.cs b/src/core/Search/TopFieldCollector.cs
new file mode 100644
index 0000000..1cfc4d3
--- /dev/null
+++ b/src/core/Search/TopFieldCollector.cs
@@ -0,0 +1,1137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Entry = Lucene.Net.Search.FieldValueHitQueue.Entry;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A <see cref="Collector" /> that sorts by <see cref="SortField" /> using
+ /// <see cref="FieldComparator" />s.
+ /// <p/>
+ /// See the <see cref="Create" /> method
+ /// for instantiating a TopFieldCollector.
+ ///
+ /// <p/><b>NOTE:</b> This API is experimental and might change in
+ /// incompatible ways in the next release.<p/>
+ /// </summary>
+ public abstract class TopFieldCollector : TopDocsCollector<Entry>
+ {
+ // TODO: one optimization we could do is to pre-fill
+ // the queue with sentinel value that guaranteed to
+ // always compare lower than a real hit; this would
+ // save having to check queueFull on each insert
+
+ //
+ // Implements a TopFieldCollector over one SortField criteria, without
+ // tracking document scores and maxScore.
+ //
+ private class OneComparatorNonScoringCollector : TopFieldCollector
+ {
+ internal FieldComparator comparator;
+ internal int reverseMul;
+
+ public OneComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ comparator = queue.GetComparators()[0];
+ reverseMul = queue.GetReverseMul()[0];
+ }
+
+ internal void UpdateBottom(int doc)
+ {
+ // bottom.score is already set to Float.NaN in add().
+ bottom.Doc = docBase + doc;
+ bottom = pq.UpdateTop();
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ if ((reverseMul * comparator.CompareBottom(doc)) <= 0)
+ {
+ // since docs are visited in doc Id order, if compare is 0, it means
+ // this document is largest than anything else in the queue, and
+ // therefore not competitive.
+ return ;
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ comparator.Copy(bottom.slot, doc);
+ UpdateBottom(doc);
+ comparator.SetBottom(bottom.slot);
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ comparator.Copy(slot, doc);
+ Add(slot, doc, System.Single.NaN);
+ if (queueFull)
+ {
+ comparator.SetBottom(bottom.slot);
+ }
+ }
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ this.docBase = docBase;
+ comparator.SetNextReader(reader, docBase);
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ comparator.SetScorer(scorer);
+ }
+ }
+
+ //
+ // Implements a TopFieldCollector over one SortField criteria, without
+ // tracking document scores and maxScore, and assumes out of orderness in doc
+ // Ids collection.
+ //
+ private class OutOfOrderOneComparatorNonScoringCollector:OneComparatorNonScoringCollector
+ {
+
+ public OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ int cmp = reverseMul * comparator.CompareBottom(doc);
+ if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.Doc))
+ {
+ return ;
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ comparator.Copy(bottom.slot, doc);
+ UpdateBottom(doc);
+ comparator.SetBottom(bottom.slot);
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ comparator.Copy(slot, doc);
+ Add(slot, doc, System.Single.NaN);
+ if (queueFull)
+ {
+ comparator.SetBottom(bottom.slot);
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over one SortField criteria, while tracking
+ * document scores but no maxScore.
+ */
+ private class OneComparatorScoringNoMaxScoreCollector : OneComparatorNonScoringCollector
+ {
+
+ internal Scorer scorer;
+
+ public OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ internal void UpdateBottom(int doc, float score)
+ {
+ bottom.Doc = docBase + doc;
+ bottom.Score = score;
+ bottom = pq.UpdateTop();
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ if ((reverseMul * comparator.CompareBottom(doc)) <= 0)
+ {
+ // since docs are visited in doc Id order, if compare is 0, it means
+ // this document is largest than anything else in the queue, and
+ // therefore not competitive.
+ return ;
+ }
+
+ // Compute the score only if the hit is competitive.
+ float score = scorer.Score();
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ comparator.Copy(bottom.slot, doc);
+ UpdateBottom(doc, score);
+ comparator.SetBottom(bottom.slot);
+ }
+ else
+ {
+ // Compute the score only if the hit is competitive.
+ float score = scorer.Score();
+
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ comparator.Copy(slot, doc);
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ comparator.SetBottom(bottom.slot);
+ }
+ }
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ this.scorer = scorer;
+ comparator.SetScorer(scorer);
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over one SortField criteria, while tracking
+ * document scores but no maxScore, and assumes out of orderness in doc Ids
+ * collection.
+ */
+ private class OutOfOrderOneComparatorScoringNoMaxScoreCollector : OneComparatorScoringNoMaxScoreCollector
+ {
+
+ public OutOfOrderOneComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ int cmp = reverseMul * comparator.CompareBottom(doc);
+ if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.Doc))
+ {
+ return ;
+ }
+
+ // Compute the score only if the hit is competitive.
+ float score = scorer.Score();
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ comparator.Copy(bottom.slot, doc);
+ UpdateBottom(doc, score);
+ comparator.SetBottom(bottom.slot);
+ }
+ else
+ {
+ // Compute the score only if the hit is competitive.
+ float score = scorer.Score();
+
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ comparator.Copy(slot, doc);
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ comparator.SetBottom(bottom.slot);
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ //
+ // Implements a TopFieldCollector over one SortField criteria, with tracking
+ // document scores and maxScore.
+ //
+ private class OneComparatorScoringMaxScoreCollector:OneComparatorNonScoringCollector
+ {
+
+ internal Scorer scorer;
+
+ public OneComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
+ maxScore = System.Single.NegativeInfinity;
+ }
+
+ internal void UpdateBottom(int doc, float score)
+ {
+ bottom.Doc = docBase + doc;
+ bottom.Score = score;
+ bottom = pq.UpdateTop();
+ }
+
+ public override void Collect(int doc)
+ {
+ float score = scorer.Score();
+ if (score > maxScore)
+ {
+ maxScore = score;
+ }
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ if ((reverseMul * comparator.CompareBottom(doc)) <= 0)
+ {
+ // since docs are visited in doc Id order, if compare is 0, it means
+ // this document is largest than anything else in the queue, and
+ // therefore not competitive.
+ return ;
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ comparator.Copy(bottom.slot, doc);
+ UpdateBottom(doc, score);
+ comparator.SetBottom(bottom.slot);
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ comparator.Copy(slot, doc);
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ comparator.SetBottom(bottom.slot);
+ }
+ }
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ this.scorer = scorer;
+ base.SetScorer(scorer);
+ }
+ }
+
+ //
+ // Implements a TopFieldCollector over one SortField criteria, with tracking
+ // document scores and maxScore, and assumes out of orderness in doc Ids
+ // collection.
+ //
+ private class OutOfOrderOneComparatorScoringMaxScoreCollector : OneComparatorScoringMaxScoreCollector
+ {
+
+ public OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ float score = scorer.Score();
+ if (score > maxScore)
+ {
+ maxScore = score;
+ }
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ int cmp = reverseMul * comparator.CompareBottom(doc);
+ if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.Doc))
+ {
+ return ;
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ comparator.Copy(bottom.slot, doc);
+ UpdateBottom(doc, score);
+ comparator.SetBottom(bottom.slot);
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ comparator.Copy(slot, doc);
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ comparator.SetBottom(bottom.slot);
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over multiple SortField criteria, without
+ * tracking document scores and maxScore.
+ */
+ private class MultiComparatorNonScoringCollector:TopFieldCollector
+ {
+ internal FieldComparator[] comparators;
+ internal int[] reverseMul;
+
+ public MultiComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ comparators = queue.GetComparators();
+ reverseMul = queue.GetReverseMul();
+ }
+
+ internal void UpdateBottom(int doc)
+ {
+ // bottom.score is already set to Float.NaN in add().
+ bottom.Doc = docBase + doc;
+ bottom = pq.UpdateTop();
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ return ;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (i == comparators.Length - 1)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return ;
+ }
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.slot, doc);
+ }
+
+ UpdateBottom(doc);
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(slot, doc);
+ }
+ Add(slot, doc, System.Single.NaN);
+ if (queueFull)
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ }
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ this.docBase = docBase;
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetNextReader(reader, docBase);
+ }
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ // set the scorer on all comparators
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetScorer(scorer);
+ }
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over multiple SortField criteria, without
+ * tracking document scores and maxScore, and assumes out of orderness in doc
+ * Ids collection.
+ */
+ private class OutOfOrderMultiComparatorNonScoringCollector:MultiComparatorNonScoringCollector
+ {
+
+ public OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ return ;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (i == comparators.Length - 1)
+ {
+ // This is the equals case.
+ if (doc + docBase > bottom.Doc)
+ {
+ // Definitely not competitive
+ return ;
+ }
+ break;
+ }
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.slot, doc);
+ }
+
+ UpdateBottom(doc);
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(slot, doc);
+ }
+ Add(slot, doc, System.Single.NaN);
+ if (queueFull)
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over multiple SortField criteria, with
+ * tracking document scores and maxScore.
+ */
+ private class MultiComparatorScoringMaxScoreCollector : MultiComparatorNonScoringCollector
+ {
+
+ internal Scorer scorer;
+
+ public MultiComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
+ maxScore = System.Single.NegativeInfinity;
+ }
+
+ internal void UpdateBottom(int doc, float score)
+ {
+ bottom.Doc = docBase + doc;
+ bottom.Score = score;
+ bottom = pq.UpdateTop();
+ }
+
+ public override void Collect(int doc)
+ {
+ float score = scorer.Score();
+ if (score > maxScore)
+ {
+ maxScore = score;
+ }
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ return ;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (i == comparators.Length - 1)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return ;
+ }
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.slot, doc);
+ }
+
+ UpdateBottom(doc, score);
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(slot, doc);
+ }
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ }
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ this.scorer = scorer;
+ base.SetScorer(scorer);
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over multiple SortField criteria, with
+ * tracking document scores and maxScore, and assumes out of orderness in doc
+ * Ids collection.
+ */
+ private sealed class OutOfOrderMultiComparatorScoringMaxScoreCollector:MultiComparatorScoringMaxScoreCollector
+ {
+
+ public OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ float score = scorer.Score();
+ if (score > maxScore)
+ {
+ maxScore = score;
+ }
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ return ;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (i == comparators.Length - 1)
+ {
+ // This is the equals case.
+ if (doc + docBase > bottom.Doc)
+ {
+ // Definitely not competitive
+ return ;
+ }
+ break;
+ }
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.slot, doc);
+ }
+
+ UpdateBottom(doc, score);
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(slot, doc);
+ }
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over multiple SortField criteria, with
+ * tracking document scores and maxScore.
+ */
+ private class MultiComparatorScoringNoMaxScoreCollector:MultiComparatorNonScoringCollector
+ {
+
+ internal Scorer scorer;
+
+ public MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ internal void UpdateBottom(int doc, float score)
+ {
+ bottom.Doc = docBase + doc;
+ bottom.Score = score;
+ bottom = pq.UpdateTop();
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ return ;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (i == comparators.Length - 1)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return ;
+ }
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.slot, doc);
+ }
+
+ // Compute score only if it is competitive.
+ float score = scorer.Score();
+ UpdateBottom(doc, score);
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(slot, doc);
+ }
+
+ // Compute score only if it is competitive.
+ float score = scorer.Score();
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ }
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ this.scorer = scorer;
+ base.SetScorer(scorer);
+ }
+ }
+
+ /*
+ * Implements a TopFieldCollector over multiple SortField criteria, with
+ * tracking document scores and maxScore, and assumes out of orderness in doc
+ * Ids collection.
+ */
+ private sealed class OutOfOrderMultiComparatorScoringNoMaxScoreCollector:MultiComparatorScoringNoMaxScoreCollector
+ {
+
+ public OutOfOrderMultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ ++internalTotalHits;
+ if (queueFull)
+ {
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ return ;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (i == comparators.Length - 1)
+ {
+ // This is the equals case.
+ if (doc + docBase > bottom.Doc)
+ {
+ // Definitely not competitive
+ return ;
+ }
+ break;
+ }
+ }
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.slot, doc);
+ }
+
+ // Compute score only if it is competitive.
+ float score = scorer.Score();
+ UpdateBottom(doc, score);
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ else
+ {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = internalTotalHits - 1;
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(slot, doc);
+ }
+
+ // Compute score only if it is competitive.
+ float score = scorer.Score();
+ Add(slot, doc, score);
+ if (queueFull)
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].SetBottom(bottom.slot);
+ }
+ }
+ }
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ this.scorer = scorer;
+ base.SetScorer(scorer);
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ private static readonly ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0];
+
+ private bool fillFields;
+
+ /*
+ * Stores the maximum score value encountered, needed for normalizing. If
+ * document scores are not tracked, this value is initialized to NaN.
+ */
+ internal float maxScore = System.Single.NaN;
+
+ internal int numHits;
+ internal FieldValueHitQueue.Entry bottom = null;
+ internal bool queueFull;
+ internal int docBase;
+
+ // Declaring the constructor private prevents extending this class by anyone
+ // else. Note that the class cannot be final since it's extended by the
+ // internal versions. If someone will define a constructor with any other
+ // visibility, then anyone will be able to extend the class, which is not what
+ // we want.
+ private TopFieldCollector(PriorityQueue<Entry> pq, int numHits, bool fillFields)
+ : base(pq)
+ {
+ this.numHits = numHits;
+ this.fillFields = fillFields;
+ }
+
+ /// <summary> Creates a new <see cref="TopFieldCollector" /> from the given
+ /// arguments.
+ ///
+ /// <p/><b>NOTE</b>: The instances returned by this method
+ /// pre-allocate a full array of length
+ /// <c>numHits</c>.
+ ///
+ /// </summary>
+ /// <param name="sort">the sort criteria (SortFields).
+ /// </param>
+ /// <param name="numHits">the number of results to collect.
+ /// </param>
+ /// <param name="fillFields">specifies whether the actual field values should be returned on
+ /// the results (FieldDoc).
+ /// </param>
+ /// <param name="trackDocScores">specifies whether document scores should be tracked and set on the
+ /// results. Note that if set to false, then the results' scores will
+ /// be set to Float.NaN. Setting this to true affects performance, as
+ /// it incurs the score computation on each competitive result.
+ /// Therefore if document scores are not required by the application,
+ /// it is recommended to set it to false.
+ /// </param>
+ /// <param name="trackMaxScore">specifies whether the query's maxScore should be tracked and set
+ /// on the resulting <see cref="TopDocs" />. Note that if set to false,
+ /// <see cref="TopDocs.MaxScore" /> returns Float.NaN. Setting this to
+ /// true affects performance as it incurs the score computation on
+ /// each result. Also, setting this true automatically sets
+ /// <c>trackDocScores</c> to true as well.
+ /// </param>
+ /// <param name="docsScoredInOrder">specifies whether documents are scored in doc Id order or not by
+ /// the given <see cref="Scorer" /> in <see cref="Collector.SetScorer(Scorer)" />.
+ /// </param>
+ /// <returns> a <see cref="TopFieldCollector" /> instance which will sort the results by
+ /// the sort criteria.
+ /// </returns>
+ /// <throws> IOException </throws>
+ public static TopFieldCollector Create(Sort sort, int numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder)
+ {
+ if (sort.fields.Length == 0)
+ {
+ throw new System.ArgumentException("Sort must contain at least one field");
+ }
+
+ FieldValueHitQueue queue = FieldValueHitQueue.Create(sort.fields, numHits);
+ if (queue.GetComparators().Length == 1)
+ {
+ if (docsScoredInOrder)
+ {
+ if (trackMaxScore)
+ {
+ return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else if (trackDocScores)
+ {
+ return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else
+ {
+ return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
+ }
+ }
+ else
+ {
+ if (trackMaxScore)
+ {
+ return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else if (trackDocScores)
+ {
+ return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else
+ {
+ return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
+ }
+ }
+ }
+
+ // multiple comparators.
+ if (docsScoredInOrder)
+ {
+ if (trackMaxScore)
+ {
+ return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else if (trackDocScores)
+ {
+ return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else
+ {
+ return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
+ }
+ }
+ else
+ {
+ if (trackMaxScore)
+ {
+ return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else if (trackDocScores)
+ {
+ return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
+ }
+ else
+ {
+ return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
+ }
+ }
+ }
+
+ internal void Add(int slot, int doc, float score)
+ {
+ bottom = pq.Add(new Entry(slot, docBase + doc, score));
+ queueFull = internalTotalHits == numHits;
+ }
+
+ /*
+ * Only the following callback methods need to be overridden since
+ * topDocs(int, int) calls them to return the results.
+ */
+
+ protected internal override void PopulateResults(ScoreDoc[] results, int howMany)
+ {
+ if (fillFields)
+ {
+ // avoid casting if unnecessary.
+ FieldValueHitQueue queue = (FieldValueHitQueue) pq;
+ for (int i = howMany - 1; i >= 0; i--)
+ {
+ results[i] = queue.FillFields(queue.Pop());
+ }
+ }
+ else
+ {
+ for (int i = howMany - 1; i >= 0; i--)
+ {
+ Entry entry = pq.Pop();
+ results[i] = new FieldDoc(entry.Doc, entry.Score);
+ }
+ }
+ }
+
+ public /*protected internal*/ override TopDocs NewTopDocs(ScoreDoc[] results, int start)
+ {
+ if (results == null)
+ {
+ results = EMPTY_SCOREDOCS;
+ // Set maxScore to NaN, in case this is a maxScore tracking collector.
+ maxScore = System.Single.NaN;
+ }
+
+ // If this is a maxScoring tracking collector and there were no results,
+ return new TopFieldDocs(internalTotalHits, results, ((FieldValueHitQueue) pq).GetFields(), maxScore);
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return false; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TopFieldDocs.cs b/src/core/Search/TopFieldDocs.cs
new file mode 100644
index 0000000..4d8662f
--- /dev/null
+++ b/src/core/Search/TopFieldDocs.cs
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>
+ /// Represents hits returned by <see cref="Searcher.Search(Query,Filter,int,Sort)" />.
+ /// </summary>
+ [Serializable]
+ public class TopFieldDocs:TopDocs
+ {
+
+ /// <summary>The fields which were used to sort results by. </summary>
+ public SortField[] fields;
+
+ /// <summary>Creates one of these objects.</summary>
+ /// <param name="totalHits"> Total number of hits for the query.
+ /// </param>
+ /// <param name="scoreDocs"> The top hits for the query.
+ /// </param>
+ /// <param name="fields"> The sort criteria used to find the top hits.
+ /// </param>
+ /// <param name="maxScore"> The maximum score encountered.
+ /// </param>
+ public TopFieldDocs(int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore):base(totalHits, scoreDocs, maxScore)
+ {
+ this.fields = fields;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/TopScoreDocCollector.cs b/src/core/Search/TopScoreDocCollector.cs
new file mode 100644
index 0000000..e5a1234
--- /dev/null
+++ b/src/core/Search/TopScoreDocCollector.cs
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> A <see cref="Collector" /> implementation that collects the top-scoring hits,
+ /// returning them as a <see cref="TopDocs" />. This is used by <see cref="IndexSearcher" /> to
+ /// implement <see cref="TopDocs" />-based search. Hits are sorted by score descending
+ /// and then (when the scores are tied) docID ascending. When you create an
+ /// instance of this collector you should know in advance whether documents are
+ /// going to be collected in doc Id order or not.
+ ///
+ /// <p/><b>NOTE</b>: The values <see cref="float.NaN" /> and
+ /// <see cref="float.NegativeInfinity" /> are not valid scores. This
+ /// collector will not properly collect hits with such
+ /// scores.
+ /// </summary>
+ public abstract class TopScoreDocCollector : TopDocsCollector<ScoreDoc>
+ {
+
+ // Assumes docs are scored in order.
+ private class InOrderTopScoreDocCollector:TopScoreDocCollector
+ {
+ internal InOrderTopScoreDocCollector(int numHits):base(numHits)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ float score = scorer.Score();
+
+ // This collector cannot handle these scores:
+ System.Diagnostics.Debug.Assert(score != float.NegativeInfinity);
+ System.Diagnostics.Debug.Assert(!float.IsNaN(score));
+
+ internalTotalHits++;
+ if (score <= pqTop.Score)
+ {
+ // Since docs are returned in-order (i.e., increasing doc Id), a document
+ // with equal score to pqTop.score cannot compete since HitQueue favors
+ // documents with lower doc Ids. Therefore reject those docs too.
+ return ;
+ }
+ pqTop.Doc = doc + docBase;
+ pqTop.Score = score;
+ pqTop = pq.UpdateTop();
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return false; }
+ }
+ }
+
+ // Assumes docs are scored out of order.
+ private class OutOfOrderTopScoreDocCollector:TopScoreDocCollector
+ {
+ internal OutOfOrderTopScoreDocCollector(int numHits):base(numHits)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ float score = scorer.Score();
+
+ // This collector cannot handle NaN
+ System.Diagnostics.Debug.Assert(!float.IsNaN(score));
+
+ internalTotalHits++;
+ doc += docBase;
+ if (score < pqTop.Score || (score == pqTop.Score && doc > pqTop.Doc))
+ {
+ return ;
+ }
+ pqTop.Doc = doc;
+ pqTop.Score = score;
+ pqTop = pq.UpdateTop();
+ }
+
+ public override bool AcceptsDocsOutOfOrder
+ {
+ get { return true; }
+ }
+ }
+
+ /// <summary> Creates a new <see cref="TopScoreDocCollector" /> given the number of hits to
+ /// collect and whether documents are scored in order by the input
+ /// <see cref="Scorer" /> to <see cref="SetScorer(Scorer)" />.
+ ///
+ /// <p/><b>NOTE</b>: The instances returned by this method
+ /// pre-allocate a full array of length
+ /// <c>numHits</c>, and fill the array with sentinel
+ /// objects.
+ /// </summary>
+ public static TopScoreDocCollector Create(int numHits, bool docsScoredInOrder)
+ {
+
+ if (docsScoredInOrder)
+ {
+ return new InOrderTopScoreDocCollector(numHits);
+ }
+ else
+ {
+ return new OutOfOrderTopScoreDocCollector(numHits);
+ }
+ }
+
+ internal ScoreDoc pqTop;
+ internal int docBase = 0;
+ internal Scorer scorer;
+
+ // prevents instantiation
+ private TopScoreDocCollector(int numHits):base(new HitQueue(numHits, true))
+ {
+ // HitQueue implements getSentinelObject to return a ScoreDoc, so we know
+ // that at this point top() is already initialized.
+ pqTop = pq.Top();
+ }
+
+ public /*protected internal*/ override TopDocs NewTopDocs(ScoreDoc[] results, int start)
+ {
+ if (results == null)
+ {
+ return EMPTY_TOPDOCS;
+ }
+
+ // We need to compute maxScore in order to set it in TopDocs. If start == 0,
+ // it means the largest element is already in results, use its score as
+ // maxScore. Otherwise pop everything else, until the largest element is
+ // extracted and use its score as maxScore.
+ float maxScore = System.Single.NaN;
+ if (start == 0)
+ {
+ maxScore = results[0].Score;
+ }
+ else
+ {
+ for (int i = pq.Size(); i > 1; i--)
+ {
+ pq.Pop();
+ }
+ maxScore = pq.Pop().Score;
+ }
+
+ return new TopDocs(internalTotalHits, results, maxScore);
+ }
+
+ public override void SetNextReader(IndexReader reader, int base_Renamed)
+ {
+ docBase = base_Renamed;
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ this.scorer = scorer;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/Weight.cs b/src/core/Search/Weight.cs
new file mode 100644
index 0000000..7e274a1
--- /dev/null
+++ b/src/core/Search/Weight.cs
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Expert: Calculate query weights and build query scorers.
+ /// <p/>
+ /// The purpose of <see cref="Weight" /> is to ensure searching does not
+ /// modify a <see cref="Query" />, so that a <see cref="Query" /> instance can be reused. <br/>
+ /// <see cref="Searcher" /> dependent state of the query should reside in the
+ /// <see cref="Weight" />. <br/>
+ /// <see cref="IndexReader" /> dependent state should reside in the <see cref="Scorer" />.
+ /// <p/>
+ /// A <c>Weight</c> is used in the following way:
+ /// <list type="bullet">
+ /// <item>A <c>Weight</c> is constructed by a top-level query, given a
+ /// <c>Searcher</c> (<see cref="Lucene.Net.Search.Query.CreateWeight(Searcher)" />).</item>
+ /// <item>The <see cref="GetSumOfSquaredWeights()" /> method is called on the
+ /// <c>Weight</c> to compute the query normalization factor
+ /// <see cref="Similarity.QueryNorm(float)" /> of the query clauses contained in the
+ /// query.</item>
+ /// <item>The query normalization factor is passed to <see cref="Normalize(float)" />. At
+ /// this point the weighting is complete.</item>
+ /// <item>A <c>Scorer</c> is constructed by <see cref="Scorer(IndexReader,bool,bool)" />.</item>
+ /// </list>
+ ///
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+ [Serializable]
+ public abstract class Weight
+ {
+
+ /// <summary> An explanation of the score computation for the named document.
+ ///
+ /// </summary>
+ /// <param name="reader">sub-reader containing the give doc
+ /// </param>
+ /// <param name="doc">
+ /// </param>
+ /// <returns> an Explanation for the score
+ /// </returns>
+ /// <throws> IOException </throws>
+ public abstract Explanation Explain(IndexReader reader, int doc);
+
+ /// <summary>The query that this concerns. </summary>
+ public abstract Query Query { get; }
+
+ /// <summary>The weight for this query. </summary>
+ public abstract float Value { get; }
+
+ /// <summary>Assigns the query normalization factor to this. </summary>
+ public abstract void Normalize(float norm);
+
+ /// <summary> Returns a <see cref="Scorer" /> which scores documents in/out-of order according
+ /// to <c>scoreDocsInOrder</c>.
+ /// <p/>
+ /// <b>NOTE:</b> even if <c>scoreDocsInOrder</c> is false, it is
+ /// recommended to check whether the returned <c>Scorer</c> indeed scores
+ /// documents out of order (i.e., call <see cref="GetScoresDocsOutOfOrder()" />), as
+ /// some <c>Scorer</c> implementations will always return documents
+ /// in-order.<br/>
+ /// <b>NOTE:</b> null can be returned if no documents will be scored by this
+ /// query.
+ ///
+ /// </summary>
+ /// <param name="reader">
+ /// the <see cref="IndexReader" /> for which to return the <see cref="Lucene.Net.Search.Scorer" />.
+ /// </param>
+ /// <param name="scoreDocsInOrder">specifies whether in-order scoring of documents is required. Note
+ /// that if set to false (i.e., out-of-order scoring is required),
+ /// this method can return whatever scoring mode it supports, as every
+ /// in-order scorer is also an out-of-order one. However, an
+ /// out-of-order scorer may not support <see cref="DocIdSetIterator.NextDoc" />
+ /// and/or <see cref="DocIdSetIterator.Advance(int)" />, therefore it is recommended to
+ /// request an in-order scorer if use of these methods is required.
+ /// </param>
+ /// <param name="topScorer">
+ /// if true, <see cref="Lucene.Net.Search.Scorer.Score(Lucene.Net.Search.Collector)" /> will be called; if false,
+ /// <see cref="DocIdSetIterator.NextDoc" /> and/or <see cref="DocIdSetIterator.Advance(int)" /> will
+ /// be called.
+ /// </param>
+ /// <returns> a <see cref="Scorer" /> which scores documents in/out-of order.
+ /// </returns>
+ /// <throws> IOException </throws>
+ public abstract Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer);
+
+ /// <summary>The sum of squared weights of contained query clauses. </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public abstract float GetSumOfSquaredWeights();
+
+ /// <summary> Returns true iff this implementation scores docs only out of order. This
+ /// method is used in conjunction with <see cref="Collector" />'s
+ /// <see cref="Collector.AcceptsDocsOutOfOrder()">AcceptsDocsOutOfOrder</see> and
+ /// <see cref="Scorer(Lucene.Net.Index.IndexReader, bool, bool)" /> to
+ /// create a matching <see cref="Scorer" /> instance for a given <see cref="Collector" />, or
+ /// vice versa.
+ /// <p/>
+ /// <b>NOTE:</b> the default implementation returns <c>false</c>, i.e.
+ /// the <c>Scorer</c> scores documents in-order.
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual bool GetScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/WildcardQuery.cs b/src/core/Search/WildcardQuery.cs
new file mode 100644
index 0000000..fe02427
--- /dev/null
+++ b/src/core/Search/WildcardQuery.cs
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary>Implements the wildcard search query. Supported wildcards are <c>*</c>, which
+ /// matches any character sequence (including the empty one), and <c>?</c>,
+ /// which matches any single character. Note this query can be slow, as it
+ /// needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
+ /// a Wildcard term should not start with one of the wildcards <c>*</c> or
+ /// <c>?</c>.
+ ///
+ /// <p/>This query uses the <see cref="MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT" />
+ ///
+ /// rewrite method.
+ ///
+ /// </summary>
+ /// <seealso cref="WildcardTermEnum">
+ /// </seealso>
+ [Serializable]
+ public class WildcardQuery : MultiTermQuery
+ {
+ private readonly bool _termContainsWildcard;
+ private readonly bool _termIsPrefix;
+ protected internal Term internalTerm;
+
+ public WildcardQuery(Term term)
+ {
+ this.internalTerm = term;
+ string text = term.Text;
+ _termContainsWildcard = (term.Text.IndexOf('*') != -1)
+ || (term.Text.IndexOf('?') != -1);
+ _termIsPrefix = _termContainsWildcard
+ && (text.IndexOf('?') == -1)
+ && (text.IndexOf('*') == text.Length - 1);
+ }
+
+ protected internal override FilteredTermEnum GetEnum(IndexReader reader)
+ {
+ if (_termContainsWildcard)
+ {
+ return new WildcardTermEnum(reader, Term);
+ }
+ else
+ {
+ return new SingleTermEnum(reader, Term);
+ }
+ }
+
+ /// <summary> Returns the pattern term.</summary>
+ public Term Term
+ {
+ get { return internalTerm; }
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ if (_termIsPrefix)
+ {
+ MultiTermQuery rewritten =
+ new PrefixQuery(internalTerm.CreateTerm(internalTerm.Text.Substring(0, internalTerm.Text.IndexOf('*'))));
+ rewritten.Boost = Boost;
+ rewritten.RewriteMethod = RewriteMethod;
+ return rewritten;
+ }
+ else
+ {
+ return base.Rewrite(reader);
+ }
+ }
+
+ /// <summary>Prints a user-readable version of this query. </summary>
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (!internalTerm.Field.Equals(field))
+ {
+ buffer.Append(internalTerm.Field);
+ buffer.Append(":");
+ }
+ buffer.Append(internalTerm.Text);
+ buffer.Append(ToStringUtils.Boost(Boost));
+ return buffer.ToString();
+ }
+
+ //@Override
+ public override int GetHashCode()
+ {
+ int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((internalTerm == null)?0:internalTerm.GetHashCode());
+ return result;
+ }
+
+ //@Override
+ public override bool Equals(System.Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!base.Equals(obj))
+ return false;
+ if (GetType() != obj.GetType())
+ return false;
+ WildcardQuery other = (WildcardQuery) obj;
+ if (internalTerm == null)
+ {
+ if (other.internalTerm != null)
+ return false;
+ }
+ else if (!internalTerm.Equals(other.internalTerm))
+ return false;
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Search/WildcardTermEnum.cs b/src/core/Search/WildcardTermEnum.cs
new file mode 100644
index 0000000..f2d555f
--- /dev/null
+++ b/src/core/Search/WildcardTermEnum.cs
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+
+namespace Lucene.Net.Search
+{
+
+ /// <summary> Subclass of FilteredTermEnum for enumerating all terms that match the
+ /// specified wildcard filter term.
+ /// <p/>
+ /// Term enumerations are always ordered by Term.compareTo(). Each term in
+ /// the enumeration is greater than all that precede it.
+ /// </summary>
+ public class WildcardTermEnum : FilteredTermEnum
+ {
+ internal Term searchTerm;
+ internal System.String field;
+ internal System.String text;
+ internal System.String pre;
+ internal int preLen;
+ internal bool endEnum = false;
+
+ /// <summary> Creates a new <c>WildcardTermEnum</c>.
+ /// <p/>
+ /// After calling the constructor the enumeration is already pointing to the first
+ /// valid term if such a term exists.
+ /// </summary>
+ public WildcardTermEnum(IndexReader reader, Term term):base()
+ {
+ searchTerm = term;
+ field = searchTerm.Field;
+ System.String searchTermText = searchTerm.Text;
+
+ int sidx = searchTermText.IndexOf((System.Char) WILDCARD_STRING);
+ int cidx = searchTermText.IndexOf((System.Char) WILDCARD_CHAR);
+ int idx = sidx;
+ if (idx == - 1)
+ {
+ idx = cidx;
+ }
+ else if (cidx >= 0)
+ {
+ idx = System.Math.Min(idx, cidx);
+ }
+ pre = idx != - 1?searchTerm.Text.Substring(0, (idx) - (0)):"";
+
+ preLen = pre.Length;
+ text = searchTermText.Substring(preLen);
+ SetEnum(reader.Terms(new Term(searchTerm.Field, pre)));
+ }
+
+ /*protected internal*/ protected internal override bool TermCompare(Term term)
+ {
+ if ((System.Object) field == (System.Object) term.Field)
+ {
+ System.String searchText = term.Text;
+ if (searchText.StartsWith(pre))
+ {
+ return WildcardEquals(text, 0, searchText, preLen);
+ }
+ }
+ endEnum = true;
+ return false;
+ }
+
+ public override float Difference()
+ {
+ return 1.0f;
+ }
+
+ public override bool EndEnum()
+ {
+ return endEnum;
+ }
+
+ /// <summary>*****************************************
+ /// String equality with support for wildcards
+ /// ******************************************
+ /// </summary>
+
+ public const char WILDCARD_STRING = '*';
+ public const char WILDCARD_CHAR = '?';
+
+ /// <summary> Determines if a word matches a wildcard pattern.
+ /// <small>Work released by Granta Design Ltd after originally being done on
+ /// company time.</small>
+ /// </summary>
+ public static bool WildcardEquals(System.String pattern, int patternIdx, System.String string_Renamed, int stringIdx)
+ {
+ int p = patternIdx;
+
+ for (int s = stringIdx; ; ++p, ++s)
+ {
+ // End of string yet?
+ bool sEnd = (s >= string_Renamed.Length);
+ // End of pattern yet?
+ bool pEnd = (p >= pattern.Length);
+
+ // If we're looking at the end of the string...
+ if (sEnd)
+ {
+ // Assume the only thing left on the pattern is/are wildcards
+ bool justWildcardsLeft = true;
+
+ // Current wildcard position
+ int wildcardSearchPos = p;
+ // While we haven't found the end of the pattern,
+ // and haven't encountered any non-wildcard characters
+ while (wildcardSearchPos < pattern.Length && justWildcardsLeft)
+ {
+ // Check the character at the current position
+ char wildchar = pattern[wildcardSearchPos];
+
+ // If it's not a wildcard character, then there is more
+ // pattern information after this/these wildcards.
+ if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING)
+ {
+ justWildcardsLeft = false;
+ }
+ else
+ {
+ // to prevent "cat" matches "ca??"
+ if (wildchar == WILDCARD_CHAR)
+ {
+ return false;
+ }
+
+ // Look at the next character
+ wildcardSearchPos++;
+ }
+ }
+
+ // This was a prefix wildcard search, and we've matched, so
+ // return true.
+ if (justWildcardsLeft)
+ {
+ return true;
+ }
+ }
+
+ // If we've gone past the end of the string, or the pattern,
+ // return false.
+ if (sEnd || pEnd)
+ {
+ break;
+ }
+
+ // Match a single character, so continue.
+ if (pattern[p] == WILDCARD_CHAR)
+ {
+ continue;
+ }
+
+ //
+ if (pattern[p] == WILDCARD_STRING)
+ {
+ // Look at the character beyond the '*' characters.
+ while (p < pattern.Length && pattern[p] == WILDCARD_STRING)
+ ++p;
+ // Examine the string, starting at the last character.
+ for (int i = string_Renamed.Length; i >= s; --i)
+ {
+ if (WildcardEquals(pattern, p, string_Renamed, i))
+ {
+ return true;
+ }
+ }
+ break;
+ }
+ if (pattern[p] != string_Renamed[s])
+ {
+ break;
+ }
+ }
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/AlreadyClosedException.cs b/src/core/Store/AlreadyClosedException.cs
new file mode 100644
index 0000000..15188bf
--- /dev/null
+++ b/src/core/Store/AlreadyClosedException.cs
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Runtime.Serialization;
+
+namespace Lucene.Net.Store
+{
+ /// <summary> This exception is thrown when there is an attempt to
+ /// access something that has already been closed.
+ /// </summary>
+ [Serializable]
+ public class AlreadyClosedException : SystemException
+ {
+ public AlreadyClosedException()
+ {
+ }
+
+ public AlreadyClosedException(string message) : base(message)
+ {
+ }
+
+ public AlreadyClosedException(string message, Exception inner) : base(message, inner)
+ {
+ }
+
+ protected AlreadyClosedException(
+ SerializationInfo info,
+ StreamingContext context) : base(info, context)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/BufferedIndexInput.cs b/src/core/Store/BufferedIndexInput.cs
new file mode 100644
index 0000000..55c8fdc
--- /dev/null
+++ b/src/core/Store/BufferedIndexInput.cs
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>Base implementation class for buffered <see cref="IndexInput" />. </summary>
+ public abstract class BufferedIndexInput : IndexInput, System.ICloneable
+ {
+
+ /// <summary>Default buffer size </summary>
+ public const int BUFFER_SIZE = 1024;
+
+ private int _bufferSize = BUFFER_SIZE;
+
+ protected internal byte[] buffer;
+
+ private long bufferStart = 0; // position in file of buffer
+ private int bufferLength = 0; // end of valid bytes
+ private int bufferPosition = 0; // next byte to read
+
+ public override byte ReadByte()
+ {
+ if (bufferPosition >= bufferLength)
+ Refill();
+ return buffer[bufferPosition++];
+ }
+
+ protected BufferedIndexInput()
+ {
+ }
+
+ /// <summary>Inits BufferedIndexInput with a specific bufferSize </summary>
+ protected BufferedIndexInput(int bufferSize)
+ {
+ CheckBufferSize(bufferSize);
+ this._bufferSize = bufferSize;
+ }
+
+ /// <summary>Change the buffer size used by this IndexInput </summary>
+ public virtual void SetBufferSize(int newSize)
+ {
+ System.Diagnostics.Debug.Assert(buffer == null || _bufferSize == buffer.Length, "buffer=" + buffer + " bufferSize=" + _bufferSize + " buffer.length=" +(buffer != null ? buffer.Length: 0));
+ if (newSize != _bufferSize)
+ {
+ CheckBufferSize(newSize);
+ _bufferSize = newSize;
+ if (buffer != null)
+ {
+ // Resize the existing buffer and carefully save as
+ // many bytes as possible starting from the current
+ // bufferPosition
+ byte[] newBuffer = new byte[newSize];
+ int leftInBuffer = bufferLength - bufferPosition;
+ int numToCopy;
+ if (leftInBuffer > newSize)
+ numToCopy = newSize;
+ else
+ numToCopy = leftInBuffer;
+ Array.Copy(buffer, bufferPosition, newBuffer, 0, numToCopy);
+ bufferStart += bufferPosition;
+ bufferPosition = 0;
+ bufferLength = numToCopy;
+ NewBuffer(newBuffer);
+ }
+ }
+ }
+
+ protected internal virtual void NewBuffer(byte[] newBuffer)
+ {
+ // Subclasses can do something here
+ buffer = newBuffer;
+ }
+
+ /// <seealso cref="SetBufferSize">
+ /// </seealso>
+ public virtual int BufferSize
+ {
+ get { return _bufferSize; }
+ }
+
+ private void CheckBufferSize(int bufferSize)
+ {
+ if (bufferSize <= 0)
+ throw new System.ArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")");
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ ReadBytes(b, offset, len, true);
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len, bool useBuffer)
+ {
+
+ if (len <= (bufferLength - bufferPosition))
+ {
+ // the buffer contains enough data to satisfy this request
+ if (len > 0)
+ // to allow b to be null if len is 0...
+ Array.Copy(buffer, bufferPosition, b, offset, len);
+ bufferPosition += len;
+ }
+ else
+ {
+ // the buffer does not have enough data. First serve all we've got.
+ int available = bufferLength - bufferPosition;
+ if (available > 0)
+ {
+ Array.Copy(buffer, bufferPosition, b, offset, available);
+ offset += available;
+ len -= available;
+ bufferPosition += available;
+ }
+ // and now, read the remaining 'len' bytes:
+ if (useBuffer && len < _bufferSize)
+ {
+ // If the amount left to read is small enough, and
+ // we are allowed to use our buffer, do it in the usual
+ // buffered way: fill the buffer and copy from it:
+ Refill();
+ if (bufferLength < len)
+ {
+ // Throw an exception when refill() could not read len bytes:
+ Array.Copy(buffer, 0, b, offset, bufferLength);
+ throw new System.IO.IOException("read past EOF");
+ }
+ else
+ {
+ Array.Copy(buffer, 0, b, offset, len);
+ bufferPosition = len;
+ }
+ }
+ else
+ {
+ // The amount left to read is larger than the buffer
+ // or we've been asked to not use our buffer -
+ // there's no performance reason not to read it all
+ // at once. Note that unlike the previous code of
+ // this function, there is no need to do a seek
+ // here, because there's no need to reread what we
+ // had in the buffer.
+ long after = bufferStart + bufferPosition + len;
+ if (after > Length())
+ throw new System.IO.IOException("read past EOF");
+ ReadInternal(b, offset, len);
+ bufferStart = after;
+ bufferPosition = 0;
+ bufferLength = 0; // trigger refill() on read
+ }
+ }
+ }
+
+ private void Refill()
+ {
+ long start = bufferStart + bufferPosition;
+ long end = start + _bufferSize;
+ if (end > Length())
+ // don't read past EOF
+ end = Length();
+ int newLength = (int) (end - start);
+ if (newLength <= 0)
+ throw new System.IO.IOException("read past EOF");
+
+ if (buffer == null)
+ {
+ NewBuffer(new byte[_bufferSize]); // allocate buffer lazily
+ SeekInternal(bufferStart);
+ }
+ ReadInternal(buffer, 0, newLength);
+ bufferLength = newLength;
+ bufferStart = start;
+ bufferPosition = 0;
+ }
+
+ /// <summary>Expert: implements buffer refill. Reads bytes from the current position
+ /// in the input.
+ /// </summary>
+ /// <param name="b">the array to read bytes into
+ /// </param>
+ /// <param name="offset">the offset in the array to start storing bytes
+ /// </param>
+ /// <param name="length">the number of bytes to read
+ /// </param>
+ public abstract void ReadInternal(byte[] b, int offset, int length);
+
+ public override long FilePointer
+ {
+ get { return bufferStart + bufferPosition; }
+ }
+
+ public override void Seek(long pos)
+ {
+ if (pos >= bufferStart && pos < (bufferStart + bufferLength))
+ bufferPosition = (int) (pos - bufferStart);
+ // seek within buffer
+ else
+ {
+ bufferStart = pos;
+ bufferPosition = 0;
+ bufferLength = 0; // trigger refill() on read()
+ SeekInternal(pos);
+ }
+ }
+
+ /// <summary>Expert: implements seek. Sets current position in this file, where the
+ /// next <see cref="ReadInternal(byte[],int,int)" /> will occur.
+ /// </summary>
+ /// <seealso cref="ReadInternal(byte[],int,int)">
+ /// </seealso>
+ public abstract void SeekInternal(long pos);
+
+ public override System.Object Clone()
+ {
+ BufferedIndexInput clone = (BufferedIndexInput) base.Clone();
+
+ clone.buffer = null;
+ clone.bufferLength = 0;
+ clone.bufferPosition = 0;
+ clone.bufferStart = FilePointer;
+
+ return clone;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/BufferedIndexOutput.cs b/src/core/Store/BufferedIndexOutput.cs
new file mode 100644
index 0000000..af805a1
--- /dev/null
+++ b/src/core/Store/BufferedIndexOutput.cs
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>Base implementation class for buffered <see cref="IndexOutput" />. </summary>
+ public abstract class BufferedIndexOutput:IndexOutput
+ {
+ internal const int BUFFER_SIZE = 16384;
+
+ private byte[] buffer = new byte[BUFFER_SIZE];
+ private long bufferStart = 0; // position in file of buffer
+ private int bufferPosition = 0; // position in buffer
+
+ private bool isDisposed;
+
+ /// <summary>Writes a single byte.</summary>
+ /// <seealso cref="IndexInput.ReadByte()">
+ /// </seealso>
+ public override void WriteByte(byte b)
+ {
+ if (bufferPosition >= BUFFER_SIZE)
+ Flush();
+ buffer[bufferPosition++] = b;
+ }
+
+ /// <summary>Writes an array of bytes.</summary>
+ /// <param name="b">the bytes to write
+ /// </param>
+ /// <param name="length">the number of bytes to write
+ /// </param>
+ /// <seealso cref="IndexInput.ReadBytes(byte[],int,int)">
+ /// </seealso>
+ public override void WriteBytes(byte[] b, int offset, int length)
+ {
+ int bytesLeft = BUFFER_SIZE - bufferPosition;
+ // is there enough space in the buffer?
+ if (bytesLeft >= length)
+ {
+ // we add the data to the end of the buffer
+ Array.Copy(b, offset, buffer, bufferPosition, length);
+ bufferPosition += length;
+ // if the buffer is full, flush it
+ if (BUFFER_SIZE - bufferPosition == 0)
+ Flush();
+ }
+ else
+ {
+ // is data larger then buffer?
+ if (length > BUFFER_SIZE)
+ {
+ // we flush the buffer
+ if (bufferPosition > 0)
+ Flush();
+ // and write data at once
+ FlushBuffer(b, offset, length);
+ bufferStart += length;
+ }
+ else
+ {
+ // we fill/flush the buffer (until the input is written)
+ int pos = 0; // position in the input data
+ int pieceLength;
+ while (pos < length)
+ {
+ pieceLength = (length - pos < bytesLeft)?length - pos:bytesLeft;
+ Array.Copy(b, pos + offset, buffer, bufferPosition, pieceLength);
+ pos += pieceLength;
+ bufferPosition += pieceLength;
+ // if the buffer is full, flush it
+ bytesLeft = BUFFER_SIZE - bufferPosition;
+ if (bytesLeft == 0)
+ {
+ Flush();
+ bytesLeft = BUFFER_SIZE;
+ }
+ }
+ }
+ }
+ }
+
+ /// <summary>Forces any buffered output to be written. </summary>
+ public override void Flush()
+ {
+ FlushBuffer(buffer, bufferPosition);
+ bufferStart += bufferPosition;
+ bufferPosition = 0;
+ }
+
+ /// <summary>Expert: implements buffer write. Writes bytes at the current position in
+ /// the output.
+ /// </summary>
+ /// <param name="b">the bytes to write
+ /// </param>
+ /// <param name="len">the number of bytes to write
+ /// </param>
+ private void FlushBuffer(byte[] b, int len)
+ {
+ FlushBuffer(b, 0, len);
+ }
+
+ /// <summary>Expert: implements buffer write. Writes bytes at the current position in
+ /// the output.
+ /// </summary>
+ /// <param name="b">the bytes to write
+ /// </param>
+ /// <param name="offset">the offset in the byte array
+ /// </param>
+ /// <param name="len">the number of bytes to write
+ /// </param>
+ public abstract void FlushBuffer(byte[] b, int offset, int len);
+
+ /// <summary>Closes this stream to further operations. </summary>
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ Flush();
+ }
+
+ isDisposed = true;
+ }
+
+ /// <summary>Returns the current position in this file, where the next write will
+ /// occur.
+ /// </summary>
+ /// <seealso cref="Seek(long)">
+ /// </seealso>
+ public override long FilePointer
+ {
+ get { return bufferStart + bufferPosition; }
+ }
+
+ /// <summary>Sets current position in this file, where the next write will occur.</summary>
+ /// <seealso cref="FilePointer">
+ /// </seealso>
+ public override void Seek(long pos)
+ {
+ Flush();
+ bufferStart = pos;
+ }
+
+ /// <summary>The number of bytes in the file. </summary>
+ public abstract override long Length { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/CheckSumIndexInput.cs b/src/core/Store/CheckSumIndexInput.cs
new file mode 100644
index 0000000..d8cfaf6
--- /dev/null
+++ b/src/core/Store/CheckSumIndexInput.cs
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>Writes bytes through to a primary IndexOutput, computing
+ /// checksum as it goes. Note that you cannot use seek().
+ /// </summary>
+ public class ChecksumIndexInput : IndexInput
+ {
+ internal IndexInput main;
+ internal IChecksum digest;
+
+ private bool isDisposed;
+
+ public ChecksumIndexInput(IndexInput main)
+ {
+ this.main = main;
+ digest = new CRC32();
+ }
+
+ public override byte ReadByte()
+ {
+ byte b = main.ReadByte();
+ digest.Update(b);
+ return b;
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ main.ReadBytes(b, offset, len);
+ digest.Update(b, offset, len);
+ }
+
+ public virtual long Checksum
+ {
+ get { return digest.Value; }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (main != null)
+ {
+ main.Dispose();
+ }
+ }
+
+ main = null;
+ isDisposed = true;
+ }
+
+ public override long FilePointer
+ {
+ get { return main.FilePointer; }
+ }
+
+ public override void Seek(long pos)
+ {
+ throw new System.SystemException("not allowed");
+ }
+
+ public override long Length()
+ {
+ return main.Length();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/CheckSumIndexOutput.cs b/src/core/Store/CheckSumIndexOutput.cs
new file mode 100644
index 0000000..9abe54f
--- /dev/null
+++ b/src/core/Store/CheckSumIndexOutput.cs
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using CRC32 = Lucene.Net.Support.CRC32;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>Writes bytes through to a primary IndexOutput, computing
+ /// checksum. Note that you cannot use seek().
+ /// </summary>
+ public class ChecksumIndexOutput:IndexOutput
+ {
+ internal IndexOutput main;
+ internal IChecksum digest;
+
+ private bool isDisposed;
+
+ public ChecksumIndexOutput(IndexOutput main)
+ {
+ this.main = main;
+ digest = new CRC32();
+ }
+
+ public override void WriteByte(byte b)
+ {
+ digest.Update(b);
+ main.WriteByte(b);
+ }
+
+ public override void WriteBytes(byte[] b, int offset, int length)
+ {
+ digest.Update(b, offset, length);
+ main.WriteBytes(b, offset, length);
+ }
+
+ public virtual long Checksum
+ {
+ get { return digest.Value; }
+ }
+
+ public override void Flush()
+ {
+ main.Flush();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ main.Close();
+ }
+
+ isDisposed = true;
+ }
+
+ public override long FilePointer
+ {
+ get { return main.FilePointer; }
+ }
+
+ public override void Seek(long pos)
+ {
+ throw new System.SystemException("not allowed");
+ }
+
+ /// <summary> Starts but does not complete the commit of this file (=
+ /// writing of the final checksum at the end). After this
+ /// is called must call <see cref="FinishCommit" /> and the
+ /// <see cref="Dispose" /> to complete the commit.
+ /// </summary>
+ public virtual void PrepareCommit()
+ {
+ long checksum = Checksum;
+ // Intentionally write a mismatched checksum. This is
+ // because we want to 1) test, as best we can, that we
+ // are able to write a long to the file, but 2) not
+ // actually "commit" the file yet. This (prepare
+ // commit) is phase 1 of a two-phase commit.
+ long pos = main.FilePointer;
+ main.WriteLong(checksum - 1);
+ main.Flush();
+ main.Seek(pos);
+ }
+
+ /// <summary>See <see cref="PrepareCommit" /> </summary>
+ public virtual void FinishCommit()
+ {
+ main.WriteLong(Checksum);
+ }
+
+ public override long Length
+ {
+ get { return main.Length; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/Directory.cs b/src/core/Store/Directory.cs
new file mode 100644
index 0000000..4ec45a2
--- /dev/null
+++ b/src/core/Store/Directory.cs
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexFileNameFilter = Lucene.Net.Index.IndexFileNameFilter;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>A Directory is a flat list of files. Files may be written once, when they
+ /// are created. Once a file is created it may only be opened for read, or
+ /// deleted. Random access is permitted both when reading and writing.
+ ///
+ /// <p/> Java's i/o APIs not used directly, but rather all i/o is
+ /// through this API. This permits things such as: <list>
+ /// <item> implementation of RAM-based indices;</item>
+ /// <item> implementation indices stored in a database, via JDBC;</item>
+ /// <item> implementation of an index as a single file;</item>
+ /// </list>
+ ///
+ /// Directory locking is implemented by an instance of <see cref="LockFactory" />
+ ///, and can be changed for each Directory
+ /// instance using <see cref="SetLockFactory" />.
+ ///
+ /// </summary>
+ [Serializable]
+ public abstract class Directory : System.IDisposable
+ {
+ protected internal volatile bool isOpen = true;
+
+ /// <summary>Holds the LockFactory instance (implements locking for
+ /// this Directory instance).
+ /// </summary>
+ [NonSerialized]
+ protected internal LockFactory interalLockFactory;
+
+ /// <summary>Returns an array of strings, one for each file in the directory.</summary>
+ /// <exception cref="System.IO.IOException"></exception>
+ public abstract System.String[] ListAll();
+
+ /// <summary>Returns true iff a file with the given name exists. </summary>
+ public abstract bool FileExists(System.String name);
+
+ /// <summary>Returns the time the named file was last modified. </summary>
+ public abstract long FileModified(System.String name);
+
+ /// <summary>Set the modified time of an existing file to now. </summary>
+ public abstract void TouchFile(System.String name);
+
+ /// <summary>Removes an existing file in the directory. </summary>
+ public abstract void DeleteFile(System.String name);
+
+ /// <summary>Returns the length of a file in the directory. </summary>
+ public abstract long FileLength(System.String name);
+
+
+ /// <summary>Creates a new, empty file in the directory with the given name.
+ /// Returns a stream writing this file.
+ /// </summary>
+ public abstract IndexOutput CreateOutput(System.String name);
+
+ /// <summary>Ensure that any writes to this file are moved to
+ /// stable storage. Lucene uses this to properly commit
+ /// changes to the index, to prevent a machine/OS crash
+ /// from corrupting the index.
+ /// </summary>
+ public virtual void Sync(System.String name)
+ {
+ }
+
+ /// <summary>Returns a stream reading an existing file. </summary>
+ public abstract IndexInput OpenInput(System.String name);
+
+ /// <summary>Returns a stream reading an existing file, with the
+ /// specified read buffer size. The particular Directory
+ /// implementation may ignore the buffer size. Currently
+ /// the only Directory implementations that respect this
+ /// parameter are <see cref="FSDirectory" /> and <see cref="Lucene.Net.Index.CompoundFileReader" />
+ ///.
+ /// </summary>
+ public virtual IndexInput OpenInput(System.String name, int bufferSize)
+ {
+ return OpenInput(name);
+ }
+
+ /// <summary>Construct a <see cref="Lock" />.</summary>
+ /// <param name="name">the name of the lock file
+ /// </param>
+ public virtual Lock MakeLock(System.String name)
+ {
+ return interalLockFactory.MakeLock(name);
+ }
+ /// <summary> Attempt to clear (forcefully unlock and remove) the
+ /// specified lock. Only call this at a time when you are
+ /// certain this lock is no longer in use.
+ /// </summary>
+ /// <param name="name">name of the lock to be cleared.
+ /// </param>
+ public virtual void ClearLock(System.String name)
+ {
+ if (interalLockFactory != null)
+ {
+ interalLockFactory.ClearLock(name);
+ }
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary>Closes the store. </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+
+ /// <summary> Set the LockFactory that this Directory instance should
+ /// use for its locking implementation. Each * instance of
+ /// LockFactory should only be used for one directory (ie,
+ /// do not share a single instance across multiple
+ /// Directories).
+ ///
+ /// </summary>
+ /// <param name="lockFactory">instance of <see cref="LockFactory" />.
+ /// </param>
+ public virtual void SetLockFactory(LockFactory lockFactory)
+ {
+ System.Diagnostics.Debug.Assert(lockFactory != null);
+ this.interalLockFactory = lockFactory;
+ lockFactory.LockPrefix = this.GetLockId();
+ }
+
+ /// <summary> Get the LockFactory that this Directory instance is
+ /// using for its locking implementation. Note that this
+ /// may be null for Directory implementations that provide
+ /// their own locking implementation.
+ /// </summary>
+ public virtual LockFactory LockFactory
+ {
+ get { return this.interalLockFactory; }
+ }
+
+ /// <summary> Return a string identifier that uniquely differentiates
+ /// this Directory instance from other Directory instances.
+ /// This ID should be the same if two Directory instances
+ /// (even in different JVMs and/or on different machines)
+ /// are considered "the same index". This is how locking
+ /// "scopes" to the right index.
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual string GetLockId()
+ {
+ return ToString();
+ }
+
+ public override string ToString()
+ {
+ return base.ToString() + " lockFactory=" + LockFactory;
+ }
+
+ /// <summary> Copy contents of a directory src to a directory dest.
+ /// If a file in src already exists in dest then the
+ /// one in dest will be blindly overwritten.
+ ///
+ /// <p/><b>NOTE:</b> the source directory cannot change
+ /// while this method is running. Otherwise the results
+ /// are undefined and you could easily hit a
+ /// FileNotFoundException.
+ ///
+ /// <p/><b>NOTE:</b> this method only copies files that look
+ /// like index files (ie, have extensions matching the
+ /// known extensions of index files).
+ ///
+ /// </summary>
+ /// <param name="src">source directory
+ /// </param>
+ /// <param name="dest">destination directory
+ /// </param>
+ /// <param name="closeDirSrc">if <c>true</c>, call <see cref="Close()" /> method on source directory
+ /// </param>
+ /// <throws> IOException </throws>
+ public static void Copy(Directory src, Directory dest, bool closeDirSrc)
+ {
+ System.String[] files = src.ListAll();
+
+ IndexFileNameFilter filter = IndexFileNameFilter.Filter;
+
+ byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE];
+ for (int i = 0; i < files.Length; i++)
+ {
+
+ if (!filter.Accept(null, files[i]))
+ continue;
+
+ IndexOutput os = null;
+ IndexInput is_Renamed = null;
+ try
+ {
+ // create file in dest directory
+ os = dest.CreateOutput(files[i]);
+ // read current file
+ is_Renamed = src.OpenInput(files[i]);
+ // and copy to dest directory
+ long len = is_Renamed.Length();
+ long readCount = 0;
+ while (readCount < len)
+ {
+ int toRead = readCount + BufferedIndexOutput.BUFFER_SIZE > len?(int) (len - readCount):BufferedIndexOutput.BUFFER_SIZE;
+ is_Renamed.ReadBytes(buf, 0, toRead);
+ os.WriteBytes(buf, toRead);
+ readCount += toRead;
+ }
+ }
+ finally
+ {
+ // graceful cleanup
+ try
+ {
+ if (os != null)
+ os.Close();
+ }
+ finally
+ {
+ if (is_Renamed != null)
+ is_Renamed.Close();
+ }
+ }
+ }
+ if (closeDirSrc)
+ src.Close();
+ }
+
+ /// <throws> AlreadyClosedException if this Directory is closed </throws>
+ public /*protected internal*/ void EnsureOpen()
+ {
+ if (!isOpen)
+ throw new AlreadyClosedException("this Directory is closed");
+ }
+
+ public bool isOpen_ForNUnit
+ {
+ get { return isOpen; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/FSDirectory.cs b/src/core/Store/FSDirectory.cs
new file mode 100644
index 0000000..04c8c0a
--- /dev/null
+++ b/src/core/Store/FSDirectory.cs
@@ -0,0 +1,533 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+// Used only for WRITE_LOCK_NAME in deprecated create=true case:
+using System.IO;
+using Lucene.Net.Support;
+using IndexFileNameFilter = Lucene.Net.Index.IndexFileNameFilter;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using Constants = Lucene.Net.Util.Constants;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> <a name="subclasses"/>
+ /// Base class for Directory implementations that store index
+ /// files in the file system. There are currently three core
+ /// subclasses:
+ ///
+ /// <list type="bullet">
+ ///
+ /// <item> <see cref="SimpleFSDirectory" /> is a straightforward
+ /// implementation using java.io.RandomAccessFile.
+ /// However, it has poor concurrent performance
+ /// (multiple threads will bottleneck) as it
+ /// synchronizes when multiple threads read from the
+ /// same file.</item>
+ ///
+ /// <item> <see cref="NIOFSDirectory" /> uses java.nio's
+ /// FileChannel's positional io when reading to avoid
+ /// synchronization when reading from the same file.
+ /// Unfortunately, due to a Windows-only <a
+ /// href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265734">Sun
+ /// JRE bug</a> this is a poor choice for Windows, but
+ /// on all other platforms this is the preferred
+ /// choice. Applications using <see cref="System.Threading.Thread.Interrupt()" /> or
+ /// <c>Future#cancel(boolean)</c> (on Java 1.5) should use
+ /// <see cref="SimpleFSDirectory" /> instead. See <see cref="NIOFSDirectory" /> java doc
+ /// for details.
+ ///
+ ///
+ ///
+ /// <item> <see cref="MMapDirectory" /> uses memory-mapped IO when
+ /// reading. This is a good choice if you have plenty
+ /// of virtual memory relative to your index size, eg
+ /// if you are running on a 64 bit JRE, or you are
+ /// running on a 32 bit JRE but your index sizes are
+ /// small enough to fit into the virtual memory space.
+ /// Java has currently the limitation of not being able to
+ /// unmap files from user code. The files are unmapped, when GC
+ /// releases the byte buffers. Due to
+ /// <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038">
+ /// this bug</a> in Sun's JRE, MMapDirectory's <see cref="IndexInput.Close" />
+ /// is unable to close the underlying OS file handle. Only when
+ /// GC finally collects the underlying objects, which could be
+ /// quite some time later, will the file handle be closed.
+ /// This will consume additional transient disk usage: on Windows,
+ /// attempts to delete or overwrite the files will result in an
+ /// exception; on other platforms, which typically have a &quot;delete on
+ /// last close&quot; semantics, while such operations will succeed, the bytes
+ /// are still consuming space on disk. For many applications this
+ /// limitation is not a problem (e.g. if you have plenty of disk space,
+ /// and you don't rely on overwriting files on Windows) but it's still
+ /// an important limitation to be aware of. This class supplies a
+ /// (possibly dangerous) workaround mentioned in the bug report,
+ /// which may fail on non-Sun JVMs.</item>
+ ///
+ /// Applications using <see cref="System.Threading.Thread.Interrupt()" /> or
+ /// <c>Future#cancel(boolean)</c> (on Java 1.5) should use
+ /// <see cref="SimpleFSDirectory" /> instead. See <see cref="MMapDirectory" />
+ /// java doc for details.</item>
+ /// </list>
+ ///
+ /// Unfortunately, because of system peculiarities, there is
+ /// no single overall best implementation. Therefore, we've
+ /// added the <see cref="Open(System.IO.DirectoryInfo)" /> method, to allow Lucene to choose
+ /// the best FSDirectory implementation given your
+ /// environment, and the known limitations of each
+ /// implementation. For users who have no reason to prefer a
+ /// specific implementation, it's best to simply use <see cref="FSDirectory.Open(System.IO.DirectoryInfo)" />
+ ///. For all others, you should instantiate the
+ /// desired implementation directly.
+ ///
+ /// <p/>The locking implementation is by default <see cref="NativeFSLockFactory" />
+ ///, but can be changed by
+ /// passing in a custom <see cref="LockFactory" /> instance.
+ /// </summary>
+ public abstract class FSDirectory : Directory
+ {
+ private static System.Security.Cryptography.HashAlgorithm DIGESTER;
+
+ static FSDirectory()
+ {
+ try
+ {
+ DIGESTER = Cryptography.HashAlgorithm;
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException(e.ToString(), e);
+ }
+ }
+
+ private bool checked_Renamed;
+
+ internal void CreateDir()
+ {
+ if (!checked_Renamed)
+ {
+ if (!this.internalDirectory.Exists)
+ {
+ try
+ {
+ this.internalDirectory.Create();
+ }
+ catch (Exception)
+ {
+ throw new System.IO.IOException("Cannot create directory: " + internalDirectory);
+ }
+ this.internalDirectory.Refresh(); // need to see the creation
+ }
+
+ checked_Renamed = true;
+ }
+ }
+
+ /// <summary>Initializes the directory to create a new file with the given name.
+ /// This method should be used in <see cref="Lucene.Net.Store.Directory.CreateOutput(string)" />.
+ /// </summary>
+ protected internal void InitOutput(System.String name)
+ {
+ EnsureOpen();
+ CreateDir();
+ System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name));
+ if (file.Exists) // delete existing, if any
+ {
+ try
+ {
+ file.Delete();
+ }
+ catch (Exception)
+ {
+ throw new System.IO.IOException("Cannot overwrite: " + file);
+ }
+ }
+ }
+
+ /// <summary>The underlying filesystem directory </summary>
+ protected internal System.IO.DirectoryInfo internalDirectory = null;
+
+ /// <summary>Create a new FSDirectory for the named location (ctor for subclasses).</summary>
+ /// <param name="path">the path of the directory
+ /// </param>
+ /// <param name="lockFactory">the lock factory to use, or null for the default
+ /// (<see cref="NativeFSLockFactory" />);
+ /// </param>
+ /// <throws> IOException </throws>
+ protected internal FSDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory)
+ {
+ // new ctors use always NativeFSLockFactory as default:
+ if (lockFactory == null)
+ {
+ lockFactory = new NativeFSLockFactory();
+ }
+ // Set up lockFactory with cascaded defaults: if an instance was passed in,
+ // use that; else if locks are disabled, use NoLockFactory; else if the
+ // system property Lucene.Net.Store.FSDirectoryLockFactoryClass is set,
+ // instantiate that; else, use SimpleFSLockFactory:
+
+ internalDirectory = path;
+
+ // due to differences in how Java & .NET refer to files, the checks are a bit different
+ if (!internalDirectory.Exists && System.IO.File.Exists(internalDirectory.FullName))
+ {
+ throw new NoSuchDirectoryException("file '" + internalDirectory.FullName + "' exists but is not a directory");
+ }
+ SetLockFactory(lockFactory);
+
+ // for filesystem based LockFactory, delete the lockPrefix, if the locks are placed
+ // in index dir. If no index dir is given, set ourselves
+ if (lockFactory is FSLockFactory)
+ {
+ FSLockFactory lf = (FSLockFactory)lockFactory;
+ System.IO.DirectoryInfo dir = lf.LockDir;
+ // if the lock factory has no lockDir set, use the this directory as lockDir
+ if (dir == null)
+ {
+ lf.LockDir = this.internalDirectory;
+ lf.LockPrefix = null;
+ }
+ else if (dir.FullName.Equals(this.internalDirectory.FullName))
+ {
+ lf.LockPrefix = null;
+ }
+ }
+ }
+
+ /// <summary>Creates an FSDirectory instance, trying to pick the
+ /// best implementation given the current environment.
+ /// The directory returned uses the <see cref="NativeFSLockFactory" />.
+ ///
+ /// <p/>Currently this returns <see cref="SimpleFSDirectory" /> as
+ /// NIOFSDirectory is currently not supported.
+ ///
+ /// <p/><b>NOTE</b>: this method may suddenly change which
+ /// implementation is returned from release to release, in
+ /// the event that higher performance defaults become
+ /// possible; if the precise implementation is important to
+ /// your application, please instantiate it directly,
+ /// instead. On 64 bit systems, it may also good to
+ /// return <see cref="MMapDirectory" />, but this is disabled
+ /// because of officially missing unmap support in Java.
+ /// For optimal performance you should consider using
+ /// this implementation on 64 bit JVMs.
+ ///
+ /// <p/>See <a href="#subclasses">above</a>
+ /// </summary>
+ public static FSDirectory Open(string path)
+ {
+ return Open(new DirectoryInfo(path), null);
+ }
+
+ /// <summary>Creates an FSDirectory instance, trying to pick the
+ /// best implementation given the current environment.
+ /// The directory returned uses the <see cref="NativeFSLockFactory" />.
+ ///
+ /// <p/>Currently this returns <see cref="SimpleFSDirectory" /> as
+ /// NIOFSDirectory is currently not supported.
+ ///
+ /// <p/><b>NOTE</b>: this method may suddenly change which
+ /// implementation is returned from release to release, in
+ /// the event that higher performance defaults become
+ /// possible; if the precise implementation is important to
+ /// your application, please instantiate it directly,
+ /// instead. On 64 bit systems, it may also good to
+ /// return <see cref="MMapDirectory" />, but this is disabled
+ /// because of officially missing unmap support in Java.
+ /// For optimal performance you should consider using
+ /// this implementation on 64 bit JVMs.
+ ///
+ /// <p/>See <a href="#subclasses">above</a>
+ /// </summary>
+ public static FSDirectory Open(System.IO.DirectoryInfo path)
+ {
+ return Open(path, null);
+ }
+
+ /// <summary>Just like <see cref="Open(System.IO.DirectoryInfo)" />, but allows you to
+ /// also specify a custom <see cref="LockFactory" />.
+ /// </summary>
+ public static FSDirectory Open(System.IO.DirectoryInfo path, LockFactory lockFactory)
+ {
+ /* For testing:
+ MMapDirectory dir=new MMapDirectory(path, lockFactory);
+ dir.setUseUnmap(true);
+ return dir;
+ */
+
+ if (Constants.WINDOWS)
+ {
+ return new SimpleFSDirectory(path, lockFactory);
+ }
+ else
+ {
+ //NIOFSDirectory is not implemented in Lucene.Net
+ //return new NIOFSDirectory(path, lockFactory);
+ return new SimpleFSDirectory(path, lockFactory);
+ }
+ }
+
+ /// <summary>Lists all files (not subdirectories) in the
+ /// directory. This method never returns null (throws
+ /// <see cref="System.IO.IOException" /> instead).
+ ///
+ /// </summary>
+ /// <throws> NoSuchDirectoryException if the directory </throws>
+ /// <summary> does not exist, or does exist but is not a
+ /// directory.
+ /// </summary>
+ /// <throws> IOException if list() returns null </throws>
+ public static System.String[] ListAll(System.IO.DirectoryInfo dir)
+ {
+ if (!dir.Exists)
+ {
+ throw new NoSuchDirectoryException("directory '" + dir.FullName + "' does not exist");
+ }
+ else if (System.IO.File.Exists(dir.FullName))
+ {
+ throw new NoSuchDirectoryException("File '" + dir.FullName + "' does not exist");
+ }
+
+ // Exclude subdirs, only the file names, not the paths
+ System.IO.FileInfo[] files = dir.GetFiles();
+ System.String[] result = new System.String[files.Length];
+ for (int i = 0; i < files.Length; i++)
+ {
+ result[i] = files[i].Name;
+ }
+
+ // no reason to return null, if the directory cannot be listed, an exception
+ // will be thrown on the above call to dir.GetFiles()
+ // use of LINQ to create the return value array may be a bit more efficient
+
+ return result;
+ }
+
+ /// <summary>Lists all files (not subdirectories) in the
+ /// directory.
+ /// </summary>
+ /// <seealso cref="ListAll(System.IO.DirectoryInfo)">
+ /// </seealso>
+ public override System.String[] ListAll()
+ {
+ EnsureOpen();
+ return ListAll(internalDirectory);
+ }
+
+ /// <summary>Returns true iff a file with the given name exists. </summary>
+ public override bool FileExists(System.String name)
+ {
+ EnsureOpen();
+ System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name));
+ return file.Exists;
+ }
+
+ /// <summary>Returns the time the named file was last modified. </summary>
+ public override long FileModified(System.String name)
+ {
+ EnsureOpen();
+ System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name));
+ return (long)file.LastWriteTime.ToUniversalTime().Subtract(new DateTime(1970, 1, 1, 0, 0, 0)).TotalMilliseconds; //{{LUCENENET-353}}
+ }
+
+ /// <summary>Returns the time the named file was last modified. </summary>
+ public static long FileModified(System.IO.FileInfo directory, System.String name)
+ {
+ System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, name));
+ return (long)file.LastWriteTime.ToUniversalTime().Subtract(new DateTime(1970, 1, 1, 0, 0, 0)).TotalMilliseconds; //{{LUCENENET-353}}
+ }
+
+ /// <summary>Set the modified time of an existing file to now. </summary>
+ public override void TouchFile(System.String name)
+ {
+ EnsureOpen();
+ System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name));
+ file.LastWriteTime = System.DateTime.Now;
+ }
+
+ /// <summary>Returns the length in bytes of a file in the directory. </summary>
+ public override long FileLength(System.String name)
+ {
+ EnsureOpen();
+ System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name));
+ return file.Exists ? file.Length : 0;
+ }
+
+ /// <summary>Removes an existing file in the directory. </summary>
+ public override void DeleteFile(System.String name)
+ {
+ EnsureOpen();
+ System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name));
+ try
+ {
+ file.Delete();
+ }
+ catch (Exception)
+ {
+ throw new System.IO.IOException("Cannot delete " + file);
+ }
+ }
+
+ public override void Sync(System.String name)
+ {
+ EnsureOpen();
+ System.IO.FileInfo fullFile = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name));
+ bool success = false;
+ int retryCount = 0;
+ System.IO.IOException exc = null;
+ while (!success && retryCount < 5)
+ {
+ retryCount++;
+ System.IO.FileStream file = null;
+ try
+ {
+ try
+ {
+ file = new System.IO.FileStream(fullFile.FullName, System.IO.FileMode.OpenOrCreate, System.IO.FileAccess.Write, System.IO.FileShare.ReadWrite);
+ FileSupport.Sync(file);
+ success = true;
+ }
+ finally
+ {
+ if (file != null)
+ file.Close();
+ }
+ }
+ catch (System.IO.IOException ioe)
+ {
+ if (exc == null)
+ exc = ioe;
+
+ // Pause 5 msec
+ System.Threading.Thread.Sleep(5);
+
+ }
+ }
+
+ if (!success && exc != null)
+ // Throw original exception
+ throw exc;
+ }
+
+ // Inherit javadoc
+ public override IndexInput OpenInput(System.String name)
+ {
+ EnsureOpen();
+ return OpenInput(name, BufferedIndexInput.BUFFER_SIZE);
+ }
+
+ /// <summary> So we can do some byte-to-hexchar conversion below</summary>
+ private static readonly char[] HEX_DIGITS = new char[]{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+
+
+ public override string GetLockId()
+ {
+ EnsureOpen();
+ System.String dirName; // name to be hashed
+ try
+ {
+ dirName = internalDirectory.FullName;
+ }
+ catch (System.IO.IOException e)
+ {
+ throw new System.SystemException(e.ToString(), e);
+ }
+
+ byte[] digest;
+ lock (DIGESTER)
+ {
+ digest = DIGESTER.ComputeHash(System.Text.Encoding.UTF8.GetBytes(dirName));
+ }
+ System.Text.StringBuilder buf = new System.Text.StringBuilder();
+ buf.Append("lucene-");
+ for (int i = 0; i < digest.Length; i++)
+ {
+ int b = digest[i];
+ buf.Append(HEX_DIGITS[(b >> 4) & 0xf]);
+ buf.Append(HEX_DIGITS[b & 0xf]);
+ }
+
+ return buf.ToString();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ lock (this)
+ {
+ isOpen = false;
+ }
+ }
+
+ // Java Lucene implements GetFile() which returns a FileInfo.
+ // For Lucene.Net, GetDirectory() is more appropriate
+
+ public virtual DirectoryInfo Directory
+ {
+ get
+ {
+ EnsureOpen();
+ return internalDirectory;
+ }
+ }
+
+ /// <summary>For debug output. </summary>
+ public override System.String ToString()
+ {
+ return this.GetType().FullName + "@" + internalDirectory + " lockFactory=" + LockFactory;
+ }
+
+ /// <summary> Default read chunk size. This is a conditional
+ /// default: on 32bit JVMs, it defaults to 100 MB. On
+ /// 64bit JVMs, it's <c>Integer.MAX_VALUE</c>.
+ /// </summary>
+ /// <seealso cref="ReadChunkSize">
+ /// </seealso>
+ public static readonly int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? int.MaxValue: 100 * 1024 * 1024;
+
+ // LUCENE-1566
+ private int chunkSize = DEFAULT_READ_CHUNK_SIZE;
+
+ /// <summary> The maximum number of bytes to read at once from the
+ /// underlying file during <see cref="IndexInput.ReadBytes(byte[],int,int)" />.
+ /// </summary>
+ /// <seealso cref="ReadChunkSize">
+ /// </seealso>
+ public int ReadChunkSize
+ {
+ get
+ {
+ // LUCENE-1566
+ return chunkSize;
+ }
+ set
+ {
+ // LUCENE-1566
+ if (value <= 0)
+ {
+ throw new System.ArgumentException("chunkSize must be positive");
+ }
+ if (!Constants.JRE_IS_64BIT)
+ {
+ this.chunkSize = value;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/FSLockFactory.cs b/src/core/Store/FSLockFactory.cs
new file mode 100644
index 0000000..47e8acf
--- /dev/null
+++ b/src/core/Store/FSLockFactory.cs
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> Base class for file system based locking implementation.</summary>
+
+ public abstract class FSLockFactory:LockFactory
+ {
+
+ /// <summary> Directory for the lock files.</summary>
+ protected internal System.IO.DirectoryInfo internalLockDir = null;
+
+ /// <summary> Gets the lock directory.
+ /// <para>Subclasses can use this to set the lock directory.
+ /// This method can be only called
+ /// once to initialize the lock directory. It is used by <see cref="FSDirectory" />
+ /// to set the lock directory to itsself.
+ /// Subclasses can also use this method to set the directory
+ /// in the constructor.
+ /// </para>
+ /// </summary>
+ public virtual DirectoryInfo LockDir
+ {
+ get { return internalLockDir; }
+ protected internal set
+ {
+ if (this.internalLockDir != null)
+ throw new System.SystemException("You can set the lock directory for this factory only once.");
+ this.internalLockDir = value;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/FileSwitchDirectory.cs b/src/core/Store/FileSwitchDirectory.cs
new file mode 100644
index 0000000..e5d1f40
--- /dev/null
+++ b/src/core/Store/FileSwitchDirectory.cs
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> Expert: A Directory instance that switches files between
+ /// two other Directory instances.
+ /// <p/>Files with the specified extensions are placed in the
+ /// primary directory; others are placed in the secondary
+ /// directory. The provided Set must not change once passed
+ /// to this class, and must allow multiple threads to call
+ /// contains at once.<p/>
+ ///
+ /// <p/><b>NOTE</b>: this API is new and experimental and is
+ /// subject to suddenly change in the next release.
+ /// </summary>
+
+ public class FileSwitchDirectory:Directory
+ {
+ private Directory secondaryDir;
+ private Directory primaryDir;
+ private System.Collections.Generic.HashSet<string> primaryExtensions;
+ private bool doClose;
+ private bool isDisposed;
+
+ public FileSwitchDirectory(System.Collections.Generic.HashSet<string> primaryExtensions,
+ Directory primaryDir,
+ Directory secondaryDir,
+ bool doClose)
+ {
+ this.primaryExtensions = primaryExtensions;
+ this.primaryDir = primaryDir;
+ this.secondaryDir = secondaryDir;
+ this.doClose = doClose;
+ this.interalLockFactory = primaryDir.LockFactory;
+ }
+
+ /// <summary>Return the primary directory </summary>
+ public virtual Directory PrimaryDir
+ {
+ get { return primaryDir; }
+ }
+
+ /// <summary>Return the secondary directory </summary>
+ public virtual Directory SecondaryDir
+ {
+ get { return secondaryDir; }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (doClose)
+ {
+ try
+ {
+ if (secondaryDir != null)
+ {
+ secondaryDir.Close();
+ }
+ }
+ finally
+ {
+ if (primaryDir != null)
+ {
+ primaryDir.Close();
+ }
+ }
+ doClose = false;
+ }
+
+ secondaryDir = null;
+ primaryDir = null;
+ isDisposed = true;
+ }
+
+ public override System.String[] ListAll()
+ {
+ var files = new System.Collections.Generic.List<string>();
+ files.AddRange(primaryDir.ListAll());
+ files.AddRange(secondaryDir.ListAll());
+ return files.ToArray();
+ }
+
+ /// <summary>Utility method to return a file's extension. </summary>
+ public static System.String GetExtension(System.String name)
+ {
+ int i = name.LastIndexOf('.');
+ if (i == - 1)
+ {
+ return "";
+ }
+ return name.Substring(i + 1, (name.Length) - (i + 1));
+ }
+
+ private Directory GetDirectory(System.String name)
+ {
+ System.String ext = GetExtension(name);
+ if (primaryExtensions.Contains(ext))
+ {
+ return primaryDir;
+ }
+ else
+ {
+ return secondaryDir;
+ }
+ }
+
+ public override bool FileExists(System.String name)
+ {
+ return GetDirectory(name).FileExists(name);
+ }
+
+ public override long FileModified(System.String name)
+ {
+ return GetDirectory(name).FileModified(name);
+ }
+
+ public override void TouchFile(System.String name)
+ {
+ GetDirectory(name).TouchFile(name);
+ }
+
+ public override void DeleteFile(System.String name)
+ {
+ GetDirectory(name).DeleteFile(name);
+ }
+
+ public override long FileLength(System.String name)
+ {
+ return GetDirectory(name).FileLength(name);
+ }
+
+ public override IndexOutput CreateOutput(System.String name)
+ {
+ return GetDirectory(name).CreateOutput(name);
+ }
+
+ public override void Sync(System.String name)
+ {
+ GetDirectory(name).Sync(name);
+ }
+
+ public override IndexInput OpenInput(System.String name)
+ {
+ return GetDirectory(name).OpenInput(name);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/IndexInput.cs b/src/core/Store/IndexInput.cs
new file mode 100644
index 0000000..b7012ce
--- /dev/null
+++ b/src/core/Store/IndexInput.cs
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>Abstract base class for input from a file in a <see cref="Directory" />. A
+ /// random-access input stream. Used for all Lucene index input operations.
+ /// </summary>
+ /// <seealso cref="Directory">
+ /// </seealso>
+ public abstract class IndexInput : System.ICloneable, IDisposable
+ {
+ private bool preUTF8Strings; // true if we are reading old (modified UTF8) string format
+
+ /// <summary>Reads and returns a single byte.</summary>
+ /// <seealso cref="IndexOutput.WriteByte(byte)">
+ /// </seealso>
+ public abstract byte ReadByte();
+
+ /// <summary>Reads a specified number of bytes into an array at the specified offset.</summary>
+ /// <param name="b">the array to read bytes into
+ /// </param>
+ /// <param name="offset">the offset in the array to start storing bytes
+ /// </param>
+ /// <param name="len">the number of bytes to read
+ /// </param>
+ /// <seealso cref="IndexOutput.WriteBytes(byte[],int)">
+ /// </seealso>
+ public abstract void ReadBytes(byte[] b, int offset, int len);
+
+ /// <summary>Reads a specified number of bytes into an array at the
+ /// specified offset with control over whether the read
+ /// should be buffered (callers who have their own buffer
+ /// should pass in "false" for useBuffer). Currently only
+ /// <see cref="BufferedIndexInput" /> respects this parameter.
+ /// </summary>
+ /// <param name="b">the array to read bytes into
+ /// </param>
+ /// <param name="offset">the offset in the array to start storing bytes
+ /// </param>
+ /// <param name="len">the number of bytes to read
+ /// </param>
+ /// <param name="useBuffer">set to false if the caller will handle
+ /// buffering.
+ /// </param>
+ /// <seealso cref="IndexOutput.WriteBytes(byte[],int)">
+ /// </seealso>
+ public virtual void ReadBytes(byte[] b, int offset, int len, bool useBuffer)
+ {
+ // Default to ignoring useBuffer entirely
+ ReadBytes(b, offset, len);
+ }
+
+ /// <summary>Reads four bytes and returns an int.</summary>
+ /// <seealso cref="IndexOutput.WriteInt(int)">
+ /// </seealso>
+ public virtual int ReadInt()
+ {
+ return ((ReadByte() & 0xFF) << 24) | ((ReadByte() & 0xFF) << 16) | ((ReadByte() & 0xFF) << 8) | (ReadByte() & 0xFF);
+ }
+
+ /// <summary>Reads an int stored in variable-length format. Reads between one and
+ /// five bytes. Smaller values take fewer bytes. Negative numbers are not
+ /// supported.
+ /// </summary>
+ /// <seealso cref="IndexOutput.WriteVInt(int)">
+ /// </seealso>
+ public virtual int ReadVInt()
+ {
+ byte b = ReadByte();
+ int i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7)
+ {
+ b = ReadByte();
+ i |= (b & 0x7F) << shift;
+ }
+ return i;
+ }
+
+ /// <summary>Reads eight bytes and returns a long.</summary>
+ /// <seealso cref="IndexOutput.WriteLong(long)">
+ /// </seealso>
+ public virtual long ReadLong()
+ {
+ return (((long) ReadInt()) << 32) | (ReadInt() & 0xFFFFFFFFL);
+ }
+
+ /// <summary>Reads a long stored in variable-length format. Reads between one and
+ /// nine bytes. Smaller values take fewer bytes. Negative numbers are not
+ /// supported.
+ /// </summary>
+ public virtual long ReadVLong()
+ {
+ byte b = ReadByte();
+ long i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7)
+ {
+ b = ReadByte();
+ i |= (b & 0x7FL) << shift;
+ }
+ return i;
+ }
+
+ /// <summary>Call this if readString should read characters stored
+ /// in the old modified UTF8 format (length in java chars
+ /// and java's modified UTF8 encoding). This is used for
+ /// indices written pre-2.4 See LUCENE-510 for details.
+ /// </summary>
+ public virtual void SetModifiedUTF8StringsMode()
+ {
+ preUTF8Strings = true;
+ }
+
+ /// <summary>Reads a string.</summary>
+ /// <seealso cref="IndexOutput.WriteString(String)">
+ /// </seealso>
+ public virtual System.String ReadString()
+ {
+ if (preUTF8Strings)
+ return ReadModifiedUTF8String();
+ int length = ReadVInt();
+ byte[] bytes = new byte[length];
+ ReadBytes(bytes, 0, length);
+ return System.Text.Encoding.UTF8.GetString(bytes, 0, length);
+ }
+
+ private System.String ReadModifiedUTF8String()
+ {
+ int length = ReadVInt();
+ char[] chars = new char[length];
+ ReadChars(chars, 0, length);
+ return new System.String(chars, 0, length);
+ }
+
+ /// <summary>Reads Lucene's old "modified UTF-8" encoded
+ /// characters into an array.
+ /// </summary>
+ /// <param name="buffer">the array to read characters into
+ /// </param>
+ /// <param name="start">the offset in the array to start storing characters
+ /// </param>
+ /// <param name="length">the number of characters to read
+ /// </param>
+ /// <seealso cref="IndexOutput.WriteChars(String,int,int)">
+ /// </seealso>
+ /// <deprecated> -- please use readString or readBytes
+ /// instead, and construct the string
+ /// from those utf8 bytes
+ /// </deprecated>
+ [Obsolete("-- please use ReadString or ReadBytes instead, and construct the string from those utf8 bytes")]
+ public virtual void ReadChars(char[] buffer, int start, int length)
+ {
+ int end = start + length;
+ for (int i = start; i < end; i++)
+ {
+ byte b = ReadByte();
+ if ((b & 0x80) == 0)
+ buffer[i] = (char) (b & 0x7F);
+ else if ((b & 0xE0) != 0xE0)
+ {
+ buffer[i] = (char) (((b & 0x1F) << 6) | (ReadByte() & 0x3F));
+ }
+ else
+ buffer[i] = (char) (((b & 0x0F) << 12) | ((ReadByte() & 0x3F) << 6) | (ReadByte() & 0x3F));
+ }
+ }
+
+ /// <summary> Expert
+ ///
+ /// Similar to <see cref="ReadChars(char[], int, int)" /> but does not do any conversion operations on the bytes it is reading in. It still
+ /// has to invoke <see cref="ReadByte()" /> just as <see cref="ReadChars(char[], int, int)" /> does, but it does not need a buffer to store anything
+ /// and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
+ /// how many more bytes to read
+ /// </summary>
+ /// <param name="length">The number of chars to read
+ /// </param>
+ /// <deprecated> this method operates on old "modified utf8" encoded
+ /// strings
+ /// </deprecated>
+ [Obsolete("this method operates on old \"modified utf8\" encoded strings")]
+ public virtual void SkipChars(int length)
+ {
+ for (int i = 0; i < length; i++)
+ {
+ byte b = ReadByte();
+ if ((b & 0x80) == 0)
+ {
+ //do nothing, we only need one byte
+ }
+ else if ((b & 0xE0) != 0xE0)
+ {
+ ReadByte(); //read an additional byte
+ }
+ else
+ {
+ //read two additional bytes.
+ ReadByte();
+ ReadByte();
+ }
+ }
+ }
+
+ [Obsolete("Use Dispose() instead.")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary>Closes the stream to futher operations. </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+
+ /// <summary>Returns the current position in this file, where the next read will
+ /// occur.
+ /// </summary>
+ /// <seealso cref="Seek(long)">
+ /// </seealso>
+ public abstract long FilePointer { get; }
+
+ /// <summary>Sets current position in this file, where the next read will occur.</summary>
+ /// <seealso cref="FilePointer">
+ /// </seealso>
+ public abstract void Seek(long pos);
+
+ /// <summary>The number of bytes in the file. </summary>
+ public abstract long Length();
+
+ /// <summary>Returns a clone of this stream.
+ ///
+ /// <p/>Clones of a stream access the same data, and are positioned at the same
+ /// point as the stream they were cloned from.
+ ///
+ /// <p/>Expert: Subclasses must ensure that clones may be positioned at
+ /// different points in the input from each other and from the stream they
+ /// were cloned from.
+ /// </summary>
+ public virtual System.Object Clone()
+ {
+ IndexInput clone = null;
+ try
+ {
+ clone = (IndexInput) base.MemberwiseClone();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ return clone;
+ }
+
+ // returns Map<String, String>
+ public virtual System.Collections.Generic.IDictionary<string,string> ReadStringStringMap()
+ {
+ var map = new HashMap<string, string>();
+ int count = ReadInt();
+ for (int i = 0; i < count; i++)
+ {
+ System.String key = ReadString();
+ System.String val = ReadString();
+ map[key] = val;
+ }
+
+ return map;
+ }
+
+ /*public abstract void Dispose();*/
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/IndexOutput.cs b/src/core/Store/IndexOutput.cs
new file mode 100644
index 0000000..687c99c
--- /dev/null
+++ b/src/core/Store/IndexOutput.cs
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>Abstract base class for output to a file in a Directory. A random-access
+ /// output stream. Used for all Lucene index output operations.
+ /// </summary>
+ /// <seealso cref="Directory">
+ /// </seealso>
+ /// <seealso cref="IndexInput">
+ /// </seealso>
+ public abstract class IndexOutput : IDisposable
+ {
+ /// <summary>Writes a single byte.</summary>
+ /// <seealso cref="IndexInput.ReadByte()">
+ /// </seealso>
+ public abstract void WriteByte(byte b);
+
+ /// <summary>Writes an array of bytes.</summary>
+ /// <param name="b">the bytes to write
+ /// </param>
+ /// <param name="length">the number of bytes to write
+ /// </param>
+ /// <seealso cref="IndexInput.ReadBytes(byte[],int,int)">
+ /// </seealso>
+ public virtual void WriteBytes(byte[] b, int length)
+ {
+ WriteBytes(b, 0, length);
+ }
+
+ /// <summary>Writes an array of bytes.</summary>
+ /// <param name="b">the bytes to write
+ /// </param>
+ /// <param name="offset">the offset in the byte array
+ /// </param>
+ /// <param name="length">the number of bytes to write
+ /// </param>
+ /// <seealso cref="IndexInput.ReadBytes(byte[],int,int)">
+ /// </seealso>
+ public abstract void WriteBytes(byte[] b, int offset, int length);
+
+ /// <summary>Writes an int as four bytes.</summary>
+ /// <seealso cref="IndexInput.ReadInt()">
+ /// </seealso>
+ public virtual void WriteInt(int i)
+ {
+ WriteByte((byte) (i >> 24));
+ WriteByte((byte) (i >> 16));
+ WriteByte((byte) (i >> 8));
+ WriteByte((byte) i);
+ }
+
+ /// <summary>Writes an int in a variable-length format. Writes between one and
+ /// five bytes. Smaller values take fewer bytes. Negative numbers are not
+ /// supported.
+ /// </summary>
+ /// <seealso cref="IndexInput.ReadVInt()">
+ /// </seealso>
+ public virtual void WriteVInt(int i)
+ {
+ while ((i & ~ 0x7F) != 0)
+ {
+ WriteByte((byte) ((i & 0x7f) | 0x80));
+ i = Number.URShift(i, 7);
+ }
+ WriteByte((byte) i);
+ }
+
+ /// <summary>Writes a long as eight bytes.</summary>
+ /// <seealso cref="IndexInput.ReadLong()">
+ /// </seealso>
+ public virtual void WriteLong(long i)
+ {
+ WriteInt((int) (i >> 32));
+ WriteInt((int) i);
+ }
+
+ /// <summary>Writes an long in a variable-length format. Writes between one and five
+ /// bytes. Smaller values take fewer bytes. Negative numbers are not
+ /// supported.
+ /// </summary>
+ /// <seealso cref="IndexInput.ReadVLong()">
+ /// </seealso>
+ public virtual void WriteVLong(long i)
+ {
+ while ((i & ~ 0x7F) != 0)
+ {
+ WriteByte((byte) ((i & 0x7f) | 0x80));
+ i = Number.URShift(i, 7);
+ }
+ WriteByte((byte) i);
+ }
+
+ /// <summary>Writes a string.</summary>
+ /// <seealso cref="IndexInput.ReadString()">
+ /// </seealso>
+ public virtual void WriteString(System.String s)
+ {
+ UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result();
+ UnicodeUtil.UTF16toUTF8(s, 0, s.Length, utf8Result);
+ WriteVInt(utf8Result.length);
+ WriteBytes(utf8Result.result, 0, utf8Result.length);
+ }
+
+ /// <summary>Writes a sub sequence of characters from s as the old
+ /// format (modified UTF-8 encoded bytes).
+ /// </summary>
+ /// <param name="s">the source of the characters
+ /// </param>
+ /// <param name="start">the first character in the sequence
+ /// </param>
+ /// <param name="length">the number of characters in the sequence
+ /// </param>
+ /// <deprecated> -- please pre-convert to utf8 bytes
+ /// instead or use <see cref="WriteString" />
+ /// </deprecated>
+ [Obsolete("-- please pre-convert to utf8 bytes instead or use WriteString")]
+ public virtual void WriteChars(System.String s, int start, int length)
+ {
+ int end = start + length;
+ for (int i = start; i < end; i++)
+ {
+ int code = (int) s[i];
+ if (code >= 0x01 && code <= 0x7F)
+ WriteByte((byte) code);
+ else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0)
+ {
+ WriteByte((byte) (0xC0 | (code >> 6)));
+ WriteByte((byte) (0x80 | (code & 0x3F)));
+ }
+ else
+ {
+ WriteByte((byte) (0xE0 | (Number.URShift(code, 12))));
+ WriteByte((byte) (0x80 | ((code >> 6) & 0x3F)));
+ WriteByte((byte) (0x80 | (code & 0x3F)));
+ }
+ }
+ }
+
+ /// <summary>Writes a sub sequence of characters from char[] as
+ /// the old format (modified UTF-8 encoded bytes).
+ /// </summary>
+ /// <param name="s">the source of the characters
+ /// </param>
+ /// <param name="start">the first character in the sequence
+ /// </param>
+ /// <param name="length">the number of characters in the sequence
+ /// </param>
+ /// <deprecated> -- please pre-convert to utf8 bytes instead or use <see cref="WriteString" />
+ /// </deprecated>
+ [Obsolete("-- please pre-convert to utf8 bytes instead or use WriteString")]
+ public virtual void WriteChars(char[] s, int start, int length)
+ {
+ int end = start + length;
+ for (int i = start; i < end; i++)
+ {
+ int code = (int) s[i];
+ if (code >= 0x01 && code <= 0x7F)
+ WriteByte((byte) code);
+ else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0)
+ {
+ WriteByte((byte) (0xC0 | (code >> 6)));
+ WriteByte((byte) (0x80 | (code & 0x3F)));
+ }
+ else
+ {
+ WriteByte((byte) (0xE0 | (Number.URShift(code, 12))));
+ WriteByte((byte) (0x80 | ((code >> 6) & 0x3F)));
+ WriteByte((byte) (0x80 | (code & 0x3F)));
+ }
+ }
+ }
+
+ private static int COPY_BUFFER_SIZE = 16384;
+ private byte[] copyBuffer;
+
+ /// <summary>Copy numBytes bytes from input to ourself. </summary>
+ public virtual void CopyBytes(IndexInput input, long numBytes)
+ {
+ System.Diagnostics.Debug.Assert(numBytes >= 0, "numBytes=" + numBytes);
+ long left = numBytes;
+ if (copyBuffer == null)
+ copyBuffer = new byte[COPY_BUFFER_SIZE];
+ while (left > 0)
+ {
+ int toCopy;
+ if (left > COPY_BUFFER_SIZE)
+ toCopy = COPY_BUFFER_SIZE;
+ else
+ toCopy = (int) left;
+ input.ReadBytes(copyBuffer, 0, toCopy);
+ WriteBytes(copyBuffer, 0, toCopy);
+ left -= toCopy;
+ }
+ }
+
+ /// <summary>Forces any buffered output to be written. </summary>
+ public abstract void Flush();
+
+ /// <summary>Closes this stream to further operations. </summary>
+ [Obsolete("Use Dispose() instead.")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary>Closes this stream to further operations. </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+
+ /// <summary>Returns the current position in this file, where the next write will
+ /// occur.
+ /// </summary>
+ /// <seealso cref="Seek(long)">
+ /// </seealso>
+ public abstract long FilePointer { get; }
+
+ /// <summary>Sets current position in this file, where the next write will occur.</summary>
+ /// <seealso cref="FilePointer()">
+ /// </seealso>
+ public abstract void Seek(long pos);
+
+ /// <summary>The number of bytes in the file. </summary>
+ public abstract long Length { get; }
+
+ /// <summary>Set the file length. By default, this method does
+ /// nothing (it's optional for a Directory to implement
+ /// it). But, certain Directory implementations (for
+ /// </summary>
+ /// <seealso cref="FSDirectory"> can use this to inform the
+ /// underlying IO system to pre-allocate the file to the
+ /// specified size. If the length is longer than the
+ /// current file length, the bytes added to the file are
+ /// undefined. Otherwise the file is truncated.
+ /// </seealso>
+ /// <param name="length">file length
+ /// </param>
+ public virtual void SetLength(long length)
+ {
+ }
+
+
+ // map must be Map<String, String>
+ public virtual void WriteStringStringMap(System.Collections.Generic.IDictionary<string,string> map)
+ {
+ if (map == null)
+ {
+ WriteInt(0);
+ }
+ else
+ {
+ WriteInt(map.Count);
+ foreach (var entry in map)
+ {
+ WriteString(entry.Key);
+ WriteString(entry.Value);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/Lock.cs b/src/core/Store/Lock.cs
new file mode 100644
index 0000000..9c30012
--- /dev/null
+++ b/src/core/Store/Lock.cs
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>An interprocess mutex lock.
+ /// <p/>Typical use might look like:<code>
+ /// new Lock.With(directory.makeLock("my.lock")) {
+ /// public Object doBody() {
+ /// <i>... code to execute while locked ...</i>
+ /// }
+ /// }.run();
+ /// </code>
+ /// </summary>
+ /// <seealso cref="Directory.MakeLock(String)" />
+ public abstract class Lock
+ {
+
+ /// <summary>How long <see cref="Obtain(long)" /> waits, in milliseconds,
+ /// in between attempts to acquire the lock.
+ /// </summary>
+ public static long LOCK_POLL_INTERVAL = 1000;
+
+ /// <summary>Pass this value to <see cref="Obtain(long)" /> to try
+ /// forever to obtain the lock.
+ /// </summary>
+ public const long LOCK_OBTAIN_WAIT_FOREVER = - 1;
+
+ /// <summary>Attempts to obtain exclusive access and immediately return
+ /// upon success or failure.
+ /// </summary>
+ /// <returns> true iff exclusive access is obtained
+ /// </returns>
+ public abstract bool Obtain();
+
+ /// <summary> If a lock obtain called, this failureReason may be set
+ /// with the "root cause" Exception as to why the lock was
+ /// not obtained.
+ /// </summary>
+ protected internal System.Exception failureReason;
+
+ /// <summary>Attempts to obtain an exclusive lock within amount of
+ /// time given. Polls once per <see cref="LOCK_POLL_INTERVAL" />
+ /// (currently 1000) milliseconds until lockWaitTimeout is
+ /// passed.
+ /// </summary>
+ /// <param name="lockWaitTimeout">length of time to wait in
+ /// milliseconds or <see cref="LOCK_OBTAIN_WAIT_FOREVER" />
+ /// to retry forever
+ /// </param>
+ /// <returns> true if lock was obtained
+ /// </returns>
+ /// <throws> LockObtainFailedException if lock wait times out </throws>
+ /// <throws> IllegalArgumentException if lockWaitTimeout is </throws>
+ /// <summary> out of bounds
+ /// </summary>
+ /// <throws> IOException if obtain() throws IOException </throws>
+ public virtual bool Obtain(long lockWaitTimeout)
+ {
+ failureReason = null;
+ bool locked = Obtain();
+ if (lockWaitTimeout < 0 && lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER)
+ throw new System.ArgumentException("lockWaitTimeout should be LOCK_OBTAIN_WAIT_FOREVER or a non-negative number (got " + lockWaitTimeout + ")");
+
+ long maxSleepCount = lockWaitTimeout / LOCK_POLL_INTERVAL;
+ long sleepCount = 0;
+ while (!locked)
+ {
+ if (lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER && sleepCount++ >= maxSleepCount)
+ {
+ System.String reason = "Lock obtain timed out: " + this.ToString();
+ if (failureReason != null)
+ {
+ reason += (": " + failureReason);
+ }
+ var e = failureReason != null
+ ? new LockObtainFailedException(reason, failureReason)
+ : new LockObtainFailedException(reason);
+ throw e;
+ }
+ try
+ {
+ System.Threading.Thread.Sleep(TimeSpan.FromMilliseconds(LOCK_POLL_INTERVAL));
+ }
+ catch (System.Threading.ThreadInterruptedException)
+ {
+ throw;
+ }
+ locked = Obtain();
+ }
+ return locked;
+ }
+
+ /// <summary>Releases exclusive access. </summary>
+ public abstract void Release();
+
+ /// <summary>Returns true if the resource is currently locked. Note that one must
+ /// still call <see cref="Obtain()" /> before using the resource.
+ /// </summary>
+ public abstract bool IsLocked();
+
+
+ /// <summary>Utility class for executing code with exclusive access. </summary>
+ public abstract class With
+ {
+ private Lock lock_Renamed;
+ private long lockWaitTimeout;
+
+
+ /// <summary>Constructs an executor that will grab the named lock. </summary>
+ protected With(Lock lock_Renamed, long lockWaitTimeout)
+ {
+ this.lock_Renamed = lock_Renamed;
+ this.lockWaitTimeout = lockWaitTimeout;
+ }
+
+ /// <summary>Code to execute with exclusive access. </summary>
+ protected internal abstract System.Object DoBody();
+
+ /// <summary>Calls <see cref="DoBody" /> while <i>lock</i> is obtained. Blocks if lock
+ /// cannot be obtained immediately. Retries to obtain lock once per second
+ /// until it is obtained, or until it has tried ten times. Lock is released when
+ /// <see cref="DoBody" /> exits.
+ /// </summary>
+ /// <throws> LockObtainFailedException if lock could not </throws>
+ /// <summary> be obtained
+ /// </summary>
+ /// <throws> IOException if <see cref="Lock.Obtain(long)" /> throws IOException </throws>
+ public virtual System.Object run()
+ {
+ bool locked = false;
+ try
+ {
+ locked = lock_Renamed.Obtain(lockWaitTimeout);
+ return DoBody();
+ }
+ finally
+ {
+ if (locked)
+ lock_Renamed.Release();
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/LockFactory.cs b/src/core/Store/LockFactory.cs
new file mode 100644
index 0000000..b3f34fc
--- /dev/null
+++ b/src/core/Store/LockFactory.cs
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> <p/>Base class for Locking implementation. <see cref="Directory" /> uses
+ /// instances of this class to implement locking.<p/>
+ ///
+ /// <p/>Note that there are some useful tools to verify that
+ /// your LockFactory is working correctly: <see cref="VerifyingLockFactory" />
+ ///, <see cref="LockStressTest" />, <see cref="LockVerifyServer" />
+ ///.<p/>
+ ///
+ /// </summary>
+ /// <seealso cref="LockVerifyServer">
+ /// </seealso>
+ /// <seealso cref="LockStressTest">
+ /// </seealso>
+ /// <seealso cref="VerifyingLockFactory">
+ /// </seealso>
+
+ public abstract class LockFactory
+ {
+ protected internal System.String internalLockPrefix = null;
+
+ /// <summary> Gets or sets the prefix in use for all locks created in this
+ /// LockFactory. This is normally called once, when a
+ /// Directory gets this LockFactory instance. However, you
+ /// can also call this (after this instance is assigned to
+ /// a Directory) to override the prefix in use. This
+ /// is helpful if you're running Lucene on machines that
+ /// have different mount points for the same shared
+ /// directory.
+ /// </summary>
+ public virtual string LockPrefix
+ {
+ get { return this.internalLockPrefix; }
+ set { this.internalLockPrefix = value; }
+ }
+
+ /// <summary> Return a new Lock instance identified by lockName.</summary>
+ /// <param name="lockName">name of the lock to be created.
+ /// </param>
+ public abstract Lock MakeLock(System.String lockName);
+
+ /// <summary> Attempt to clear (forcefully unlock and remove) the
+ /// specified lock. Only call this at a time when you are
+ /// certain this lock is no longer in use.
+ /// </summary>
+ /// <param name="lockName">name of the lock to be cleared.
+ /// </param>
+ abstract public void ClearLock(System.String lockName);
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/LockObtainFailedException.cs b/src/core/Store/LockObtainFailedException.cs
new file mode 100644
index 0000000..065b362
--- /dev/null
+++ b/src/core/Store/LockObtainFailedException.cs
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> This exception is thrown when the <c>write.lock</c>
+ /// could not be acquired. This
+ /// happens when a writer tries to open an index
+ /// that another writer already has open.
+ /// </summary>
+ /// <seealso cref="Lock.Obtain(long)">
+ /// </seealso>
+ [Serializable]
+ public class LockObtainFailedException:System.IO.IOException
+ {
+ public LockObtainFailedException(System.String message):base(message)
+ {
+ }
+
+ public LockObtainFailedException(System.String message, System.Exception ex) : base(message, ex)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/LockReleaseFailedException.cs b/src/core/Store/LockReleaseFailedException.cs
new file mode 100644
index 0000000..121283e
--- /dev/null
+++ b/src/core/Store/LockReleaseFailedException.cs
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> This exception is thrown when the <c>write.lock</c>
+ /// could not be released.
+ /// </summary>
+ /// <seealso cref="Lock.Release()">
+ /// </seealso>
+ [Serializable]
+ public class LockReleaseFailedException:System.IO.IOException
+ {
+ public LockReleaseFailedException(System.String message):base(message)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/LockStressTest.cs b/src/core/Store/LockStressTest.cs
new file mode 100644
index 0000000..5ac81fe
--- /dev/null
+++ b/src/core/Store/LockStressTest.cs
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> Simple standalone tool that forever acquires &amp; releases a
+ /// lock using a specific LockFactory. Run without any args
+ /// to see usage.
+ ///
+ /// </summary>
+ /// <seealso cref="VerifyingLockFactory">
+ /// </seealso>
+ /// <seealso cref="LockVerifyServer">
+ /// </seealso>
+
+ public class LockStressTest
+ {
+
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+
+ if (args.Length != 6)
+ {
+ System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Store.LockStressTest myID verifierHostOrIP verifierPort lockFactoryClassName lockDirName sleepTime\n" + "\n" + " myID = int from 0 .. 255 (should be unique for test process)\n" + " verifierHostOrIP = host name or IP address where LockVerifyServer is running\n" + " verifierPort = port that LockVerifyServer is listening on\n" + " lockFactoryClassName = primary LockFactory class that we will use\n" + " lockDirName = path to the lock directory (only set for Simple/NativeFSLockFactory\n" + " sleepTimeMS = milliseconds to pause betweeen each lock obtain/release\n" + "\n" + "You should run multiple instances of this process, each with its own\n" + "unique ID, and each pointing to the same lock directory, to verify\n" + "that locking is working correctly.\n" + "\n" + "Make sure you are first running LockVerifyServer.\n" + "\n");
+ System.Environment.Exit(1);
+ }
+
+ int myID = System.Int32.Parse(args[0]);
+
+ if (myID < 0 || myID > 255)
+ {
+ System.Console.Out.WriteLine("myID must be a unique int 0..255");
+ System.Environment.Exit(1);
+ }
+
+ System.String verifierHost = args[1];
+ int verifierPort = System.Int32.Parse(args[2]);
+ System.String lockFactoryClassName = args[3];
+ System.String lockDirName = args[4];
+ int sleepTimeMS = System.Int32.Parse(args[5]);
+
+ System.Type c;
+ try
+ {
+ c = System.Type.GetType(lockFactoryClassName);
+ }
+ catch (System.Exception)
+ {
+ throw new System.IO.IOException("unable to find LockClass " + lockFactoryClassName);
+ }
+
+ LockFactory lockFactory;
+ try
+ {
+ lockFactory = (LockFactory) System.Activator.CreateInstance(c);
+ }
+ catch (System.UnauthorizedAccessException)
+ {
+ throw new System.IO.IOException("IllegalAccessException when instantiating LockClass " + lockFactoryClassName);
+ }
+ catch (System.InvalidCastException)
+ {
+ throw new System.IO.IOException("unable to cast LockClass " + lockFactoryClassName + " instance to a LockFactory");
+ }
+ catch (System.Exception)
+ {
+ throw new System.IO.IOException("InstantiationException when instantiating LockClass " + lockFactoryClassName);
+ }
+
+ System.IO.DirectoryInfo lockDir = new System.IO.DirectoryInfo(lockDirName);
+
+ if (lockFactory is NativeFSLockFactory)
+ {
+ ((NativeFSLockFactory) lockFactory).LockDir = lockDir;
+ }
+ else if (lockFactory is SimpleFSLockFactory)
+ {
+ ((SimpleFSLockFactory) lockFactory).LockDir = lockDir;
+ }
+
+ lockFactory.LockPrefix = "test";
+
+ LockFactory verifyLF = new VerifyingLockFactory((sbyte) myID, lockFactory, verifierHost, verifierPort);
+
+ Lock l = verifyLF.MakeLock("test.lock");
+
+ while (true)
+ {
+
+ bool obtained = false;
+
+ try
+ {
+ obtained = l.Obtain(10);
+ }
+ catch (LockObtainFailedException)
+ {
+ System.Console.Out.Write("x");
+ }
+
+ if (obtained)
+ {
+ System.Console.Out.Write("l");
+ l.Release();
+ }
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * sleepTimeMS));
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/LockVerifyServer.cs b/src/core/Store/LockVerifyServer.cs
new file mode 100644
index 0000000..bf0a3c5
--- /dev/null
+++ b/src/core/Store/LockVerifyServer.cs
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> Simple standalone server that must be running when you
+ /// use <see cref="VerifyingLockFactory" />. This server simply
+ /// verifies at most one process holds the lock at a time.
+ /// Run without any args to see usage.
+ ///
+ /// </summary>
+ /// <seealso cref="VerifyingLockFactory">
+ /// </seealso>
+ /// <seealso cref="LockStressTest">
+ /// </seealso>
+
+ public class LockVerifyServer
+ {
+
+ private static System.String GetTime(long startTime)
+ {
+ return "[" + (((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - startTime) / 1000) + "s] ";
+ }
+
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+
+ if (args.Length != 1)
+ {
+ System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Store.LockVerifyServer port\n");
+ System.Environment.Exit(1);
+ }
+
+ int port = System.Int32.Parse(args[0]);
+
+ System.Net.Sockets.TcpListener temp_tcpListener;
+ temp_tcpListener = new System.Net.Sockets.TcpListener(System.Net.Dns.GetHostEntry(System.Net.Dns.GetHostName()).AddressList[0], port);
+ temp_tcpListener.Server.SetSocketOption(System.Net.Sockets.SocketOptionLevel.Socket, System.Net.Sockets.SocketOptionName.ReuseAddress, 1);
+ temp_tcpListener.Start();
+ System.Net.Sockets.TcpListener s = temp_tcpListener;
+ System.Console.Out.WriteLine("\nReady on port " + port + "...");
+
+ int lockedID = 0;
+ long startTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
+
+ while (true)
+ {
+ System.Net.Sockets.TcpClient cs = s.AcceptTcpClient();
+ System.IO.Stream out_Renamed = cs.GetStream();
+ System.IO.Stream in_Renamed = cs.GetStream();
+
+ int id = in_Renamed.ReadByte();
+ int command = in_Renamed.ReadByte();
+
+ bool err = false;
+
+ if (command == 1)
+ {
+ // Locked
+ if (lockedID != 0)
+ {
+ err = true;
+ System.Console.Out.WriteLine(GetTime(startTime) + " ERROR: id " + id + " got lock, but " + lockedID + " already holds the lock");
+ }
+ lockedID = id;
+ }
+ else if (command == 0)
+ {
+ if (lockedID != id)
+ {
+ err = true;
+ System.Console.Out.WriteLine(GetTime(startTime) + " ERROR: id " + id + " released the lock, but " + lockedID + " is the one holding the lock");
+ }
+ lockedID = 0;
+ }
+ else
+ throw new System.SystemException("unrecognized command " + command);
+
+ System.Console.Out.Write(".");
+
+ if (err)
+ out_Renamed.WriteByte((System.Byte) 1);
+ else
+ out_Renamed.WriteByte((System.Byte) 0);
+
+ out_Renamed.Close();
+ in_Renamed.Close();
+ cs.Close();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/MMapDirectory.cs b/src/core/Store/MMapDirectory.cs
new file mode 100644
index 0000000..65e68d5
--- /dev/null
+++ b/src/core/Store/MMapDirectory.cs
@@ -0,0 +1,535 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Constants = Lucene.Net.Util.Constants;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>File-based <see cref="Directory" /> implementation that uses
+ /// mmap for reading, and <see cref="SimpleFSDirectory.SimpleFSIndexOutput" />
+ /// for writing.
+ ///
+ /// <p/><b>NOTE</b>: memory mapping uses up a portion of the
+ /// virtual memory address space in your process equal to the
+ /// size of the file being mapped. Before using this class,
+ /// be sure your have plenty of virtual address space, e.g. by
+ /// using a 64 bit JRE, or a 32 bit JRE with indexes that are
+ /// guaranteed to fit within the address space.
+ /// On 32 bit platforms also consult <see cref="MaxChunkSize" />
+ /// if you have problems with mmap failing because of fragmented
+ /// address space. If you get an OutOfMemoryException, it is recommened
+ /// to reduce the chunk size, until it works.
+ ///
+ /// <p/>Due to <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038">
+ /// this bug</a> in Sun's JRE, MMapDirectory's <see cref="IndexInput.Close" />
+ /// is unable to close the underlying OS file handle. Only when GC
+ /// finally collects the underlying objects, which could be quite
+ /// some time later, will the file handle be closed.
+ ///
+ /// <p/>This will consume additional transient disk usage: on Windows,
+ /// attempts to delete or overwrite the files will result in an
+ /// exception; on other platforms, which typically have a &quot;delete on
+ /// last close&quot; semantics, while such operations will succeed, the bytes
+ /// are still consuming space on disk. For many applications this
+ /// limitation is not a problem (e.g. if you have plenty of disk space,
+ /// and you don't rely on overwriting files on Windows) but it's still
+ /// an important limitation to be aware of.
+ ///
+ /// <p/>This class supplies the workaround mentioned in the bug report
+ /// (disabled by default, see <see cref="UseUnmap" />), which may fail on
+ /// non-Sun JVMs. It forcefully unmaps the buffer on close by using
+ /// an undocumented internal cleanup functionality.
+ /// <see cref="UNMAP_SUPPORTED" /> is <c>true</c>, if the workaround
+ /// can be enabled (with no guarantees).
+ /// </summary>
+ public class MMapDirectory:FSDirectory
+ {
+ private class AnonymousClassPrivilegedExceptionAction // : SupportClass.IPriviligedAction // {{Aroush-2.9}}
+ {
+ public AnonymousClassPrivilegedExceptionAction(byte[] buffer, MMapDirectory enclosingInstance)
+ {
+ InitBlock(buffer, enclosingInstance);
+ }
+ private void InitBlock(byte[] buffer, MMapDirectory enclosingInstance)
+ {
+ this.buffer = buffer;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private byte[] buffer;
+ private MMapDirectory enclosingInstance;
+ public MMapDirectory Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public virtual System.Object Run()
+ {
+ // {{Aroush-2.9
+ /*
+ System.Reflection.MethodInfo getCleanerMethod = buffer.GetType().GetMethod("cleaner", (Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES == null)?new System.Type[0]:(System.Type[]) Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES);
+ getCleanerMethod.SetAccessible(true);
+ System.Object cleaner = getCleanerMethod.Invoke(buffer, (System.Object[]) Lucene.Net.Store.MMapDirectory.NO_PARAMS);
+ if (cleaner != null)
+ {
+ cleaner.GetType().GetMethod("clean", (Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES == null)?new System.Type[0]:(System.Type[]) Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES).Invoke(cleaner, (System.Object[]) Lucene.Net.Store.MMapDirectory.NO_PARAMS);
+ }
+ */
+ //System.Diagnostics.Debug.Fail("Port issue:", "sun.misc.Cleaner()"); // {{Aroush-2.9}}
+ throw new NotImplementedException("Port issue: sun.misc.Cleaner()");
+ // Aroush-2.9}}
+ //return null;
+ }
+ }
+ private void InitBlock()
+ {
+ maxBBuf = Constants.JRE_IS_64BIT?System.Int32.MaxValue:(256 * 1024 * 1024);
+ }
+
+ /// <summary>Create a new MMapDirectory for the named location.
+ ///
+ /// </summary>
+ /// <param name="path">the path of the directory
+ /// </param>
+ /// <param name="lockFactory">the lock factory to use, or null for the default.
+ /// </param>
+ /// <throws> IOException </throws>
+ public MMapDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory)
+ : base(path, lockFactory)
+ {
+ InitBlock();
+ }
+
+ /// <summary>Create a new MMapDirectory for the named location and the default lock factory.
+ ///
+ /// </summary>
+ /// <param name="path">the path of the directory
+ /// </param>
+ /// <throws> IOException </throws>
+ public MMapDirectory(System.IO.DirectoryInfo path)
+ : base(path, null)
+ {
+ InitBlock();
+ }
+
+ private bool useUnmapHack = false;
+ private int maxBBuf;
+
+ /// <summary> <c>true</c>, if this platform supports unmapping mmaped files.</summary>
+ public static bool UNMAP_SUPPORTED;
+
+ /// <summary> Enables or disables the workaround for unmapping the buffers
+ /// from address space after closing <see cref="IndexInput" />, that is
+ /// mentioned in the bug report. This hack may fail on non-Sun JVMs.
+ /// It forcefully unmaps the buffer on close by using
+ /// an undocumented internal cleanup functionality.
+ /// <p/><b>NOTE:</b> Enabling this is completely unsupported
+ /// by Java and may lead to JVM crashs if <c>IndexInput</c>
+ /// is closed while another thread is still accessing it (SIGSEGV).
+ /// </summary>
+ /// <throws> IllegalArgumentException if <see cref="UNMAP_SUPPORTED" /> </throws>
+ /// <summary> is <c>false</c> and the workaround cannot be enabled.
+ /// </summary>
+ public virtual bool UseUnmap
+ {
+ get { return useUnmapHack; }
+ set
+ {
+ if (value && !UNMAP_SUPPORTED)
+ throw new System.ArgumentException("Unmap hack not supported on this platform!");
+ this.useUnmapHack = value;
+ }
+ }
+
+ /// <summary> Try to unmap the buffer, this method silently fails if no support
+ /// for that in the JVM. On Windows, this leads to the fact,
+ /// that mmapped files cannot be modified or deleted.
+ /// </summary>
+ internal void CleanMapping(System.IO.MemoryStream buffer)
+ {
+ if (useUnmapHack)
+ {
+ try
+ {
+ // {{Aroush-2.9}} Not converted: java.security.AccessController.doPrivileged()
+ //System.Diagnostics.Debug.Fail("Port issue:", "java.security.AccessController.doPrivileged()"); // {{Aroush-2.9}}
+ throw new NotImplementedException("Port issue: java.security.AccessController.doPrivileged()");
+ // AccessController.DoPrivileged(new AnonymousClassPrivilegedExceptionAction(buffer, this));
+ }
+ catch (System.Exception e)
+ {
+ System.IO.IOException ioe = new System.IO.IOException("unable to unmap the mapped buffer", e.InnerException);
+ throw ioe;
+ }
+ }
+ }
+
+ /// <summary> Gets or sets the maximum chunk size (default is <see cref="int.MaxValue" /> for
+ /// 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping.
+ /// Especially on 32 bit platform, the address space can be very fragmented,
+ /// so large index files cannot be mapped.
+ /// Using a lower chunk size makes the directory implementation a little
+ /// bit slower (as the correct chunk must be resolved on each seek)
+ /// but the chance is higher that mmap does not fail. On 64 bit
+ /// Java platforms, this parameter should always be <see cref="int.MaxValue" />,
+ /// as the adress space is big enough.
+ /// </summary>
+ public virtual int MaxChunkSize
+ {
+ get { return maxBBuf; }
+ set
+ {
+ if (value <= 0)
+ throw new System.ArgumentException("Maximum chunk size for mmap must be >0");
+ this.maxBBuf = value;
+ }
+ }
+
+ private class MMapIndexInput : IndexInput
+ {
+ private void InitBlock(MMapDirectory enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MMapDirectory enclosingInstance;
+ public MMapDirectory Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ private System.IO.MemoryStream buffer;
+ private long length;
+ private bool isClone;
+ private bool isDisposed;
+
+ internal MMapIndexInput(MMapDirectory enclosingInstance, System.IO.FileStream raf)
+ {
+ byte[] data = new byte[raf.Length];
+ raf.Read(data, 0, (int) raf.Length);
+
+ InitBlock(enclosingInstance);
+ this.length = raf.Length;
+ this.buffer = new System.IO.MemoryStream(data);
+ }
+
+ public override byte ReadByte()
+ {
+ try
+ {
+ return (byte) buffer.ReadByte();
+ }
+ catch (ObjectDisposedException)
+ {
+ throw new System.IO.IOException("read past EOF");
+ }
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ try
+ {
+ buffer.Read(b, offset, len);
+ }
+ catch (ObjectDisposedException)
+ {
+ throw new System.IO.IOException("read past EOF");
+ }
+ }
+
+ public override long FilePointer
+ {
+ get
+ {
+ return buffer.Position;
+ }
+ }
+
+ public override void Seek(long pos)
+ {
+ buffer.Seek(pos, System.IO.SeekOrigin.Begin);
+ }
+
+ public override long Length()
+ {
+ return length;
+ }
+
+ public override System.Object Clone()
+ {
+ if (buffer == null)
+ throw new AlreadyClosedException("MMapIndexInput already closed");
+ MMapIndexInput clone = (MMapIndexInput) base.Clone();
+ clone.isClone = true;
+ // clone.buffer = buffer.duplicate(); // {{Aroush-1.9}}
+ return clone;
+ }
+
+ protected override void Dispose(bool isDisposing)
+ {
+ if (isDisposed) return;
+
+ if (isDisposing)
+ {
+ if (isClone || buffer == null)
+ return;
+ // unmap the buffer (if enabled) and at least unset it for GC
+ try
+ {
+ Enclosing_Instance.CleanMapping(buffer);
+ }
+ finally
+ {
+ buffer = null;
+ }
+ }
+
+ isDisposed = true;
+ }
+ }
+
+ // Because Java's ByteBuffer uses an int to address the
+ // values, it's necessary to access a file >
+ // Integer.MAX_VALUE in size using multiple byte buffers.
+ protected internal class MultiMMapIndexInput:IndexInput, System.ICloneable
+ {
+ private void InitBlock(MMapDirectory enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MMapDirectory enclosingInstance;
+ public MMapDirectory Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ private System.IO.MemoryStream[] buffers;
+ private int[] bufSizes; // keep here, ByteBuffer.size() method is optional
+
+ private long length;
+
+ private bool isDisposed;
+
+ private int curBufIndex;
+ private int maxBufSize;
+
+ private System.IO.MemoryStream curBuf; // redundant for speed: buffers[curBufIndex]
+ private int curAvail; // redundant for speed: (bufSizes[curBufIndex] - curBuf.position())
+
+ private bool isClone = false;
+
+ public MultiMMapIndexInput(MMapDirectory enclosingInstance, System.IO.FileStream raf, int maxBufSize)
+ {
+ InitBlock(enclosingInstance);
+ this.length = raf.Length;
+ this.maxBufSize = maxBufSize;
+
+ if (maxBufSize <= 0)
+ throw new System.ArgumentException("Non positive maxBufSize: " + maxBufSize);
+
+ if ((length / maxBufSize) > System.Int32.MaxValue)
+ {
+ throw new System.ArgumentException("RandomAccessFile too big for maximum buffer size: " + raf.ToString());
+ }
+
+ int nrBuffers = (int) (length / maxBufSize);
+ if (((long) nrBuffers * maxBufSize) < length)
+ nrBuffers++;
+
+ this.buffers = new System.IO.MemoryStream[nrBuffers];
+ this.bufSizes = new int[nrBuffers];
+
+ long bufferStart = 0;
+ System.IO.FileStream rafc = raf;
+ for (int bufNr = 0; bufNr < nrBuffers; bufNr++)
+ {
+ byte[] data = new byte[rafc.Length];
+ raf.Read(data, 0, (int) rafc.Length);
+
+ int bufSize = (length > (bufferStart + maxBufSize))?maxBufSize:(int) (length - bufferStart);
+ this.buffers[bufNr] = new System.IO.MemoryStream(data);
+ this.bufSizes[bufNr] = bufSize;
+ bufferStart += bufSize;
+ }
+ Seek(0L);
+ }
+
+ public override byte ReadByte()
+ {
+ // Performance might be improved by reading ahead into an array of
+ // e.g. 128 bytes and readByte() from there.
+ if (curAvail == 0)
+ {
+ curBufIndex++;
+ if (curBufIndex >= buffers.Length)
+ throw new System.IO.IOException("read past EOF");
+ curBuf = buffers[curBufIndex];
+ curBuf.Seek(0, System.IO.SeekOrigin.Begin);
+ curAvail = bufSizes[curBufIndex];
+ }
+ curAvail--;
+ return (byte) curBuf.ReadByte();
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ while (len > curAvail)
+ {
+ curBuf.Read(b, offset, curAvail);
+ len -= curAvail;
+ offset += curAvail;
+ curBufIndex++;
+ if (curBufIndex >= buffers.Length)
+ throw new System.IO.IOException("read past EOF");
+ curBuf = buffers[curBufIndex];
+ curBuf.Seek(0, System.IO.SeekOrigin.Begin);
+ curAvail = bufSizes[curBufIndex];
+ }
+ curBuf.Read(b, offset, len);
+ curAvail -= len;
+ }
+
+ public override long FilePointer
+ {
+ get { return ((long) curBufIndex*maxBufSize) + curBuf.Position; }
+ }
+
+ public override void Seek(long pos)
+ {
+ curBufIndex = (int) (pos / maxBufSize);
+ curBuf = buffers[curBufIndex];
+ int bufOffset = (int) (pos - ((long) curBufIndex * maxBufSize));
+ curBuf.Seek(bufOffset, System.IO.SeekOrigin.Begin);
+ curAvail = bufSizes[curBufIndex] - bufOffset;
+ }
+
+ public override long Length()
+ {
+ return length;
+ }
+
+ public override System.Object Clone()
+ {
+ MultiMMapIndexInput clone = (MultiMMapIndexInput) base.Clone();
+ clone.isClone = true;
+ clone.buffers = new System.IO.MemoryStream[buffers.Length];
+ // No need to clone bufSizes.
+ // Since most clones will use only one buffer, duplicate() could also be
+ // done lazy in clones, e.g. when adapting curBuf.
+ for (int bufNr = 0; bufNr < buffers.Length; bufNr++)
+ {
+ clone.buffers[bufNr] = buffers[bufNr]; // clone.buffers[bufNr] = buffers[bufNr].duplicate(); // {{Aroush-1.9}} how do we clone?!
+ }
+ try
+ {
+ clone.Seek(FilePointer);
+ }
+ catch (System.IO.IOException ioe)
+ {
+ System.SystemException newException = new System.SystemException(ioe.Message, ioe);
+ throw newException;
+ }
+ return clone;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+ if (isClone || buffers == null)
+ return;
+ try
+ {
+ for (int bufNr = 0; bufNr < buffers.Length; bufNr++)
+ {
+ // unmap the buffer (if enabled) and at least unset it for GC
+ try
+ {
+ Enclosing_Instance.CleanMapping(buffers[bufNr]);
+ }
+ finally
+ {
+ buffers[bufNr] = null;
+ }
+ }
+ }
+ finally
+ {
+ buffers = null;
+ }
+ isDisposed = true;
+ }
+ }
+
+ /// <summary>Creates an IndexInput for the file with the given name. </summary>
+ public override IndexInput OpenInput(System.String name, int bufferSize)
+ {
+ EnsureOpen();
+ System.String path = System.IO.Path.Combine(Directory.FullName, name);
+ System.IO.FileStream raf = new System.IO.FileStream(path, System.IO.FileMode.Open, System.IO.FileAccess.Read);
+ try
+ {
+ return (raf.Length <= (long) maxBBuf)?(IndexInput) new MMapIndexInput(this, raf):(IndexInput) new MultiMMapIndexInput(this, raf, maxBBuf);
+ }
+ finally
+ {
+ raf.Close();
+ }
+ }
+
+ /// <summary>Creates an IndexOutput for the file with the given name. </summary>
+ public override IndexOutput CreateOutput(System.String name)
+ {
+ InitOutput(name);
+ return new SimpleFSDirectory.SimpleFSIndexOutput(new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)));
+ }
+ static MMapDirectory()
+ {
+ {
+ bool v;
+ try
+ {
+ // {{Aroush-2.9
+ /*
+ System.Type.GetType("sun.misc.Cleaner"); // {{Aroush-2.9}} port issue?
+ System.Type.GetType("java.nio.DirectByteBuffer").GetMethod("cleaner", (NO_PARAM_TYPES == null)?new System.Type[0]:(System.Type[]) NO_PARAM_TYPES);
+ */
+ //System.Diagnostics.Debug.Fail("Port issue:", "sun.misc.Cleaner.clean()"); // {{Aroush-2.9}}
+ throw new NotImplementedException("Port issue: sun.misc.Cleaner.clean()");
+ // Aroush-2.9}}
+ //v = true;
+ }
+ catch (System.Exception)
+ {
+ v = false;
+ }
+ UNMAP_SUPPORTED = v;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/NIOFSDirectory.cs b/src/core/Store/NIOFSDirectory.cs
new file mode 100644
index 0000000..190a533
--- /dev/null
+++ b/src/core/Store/NIOFSDirectory.cs
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+namespace Lucene.Net.Store
+{
+ /// <summary>
+ /// Not implemented. Waiting for volunteers.
+ /// </summary>
+ public class NIOFSDirectory : Lucene.Net.Store.FSDirectory
+ {
+ public NIOFSDirectory(System.IO.DirectoryInfo dir, LockFactory lockFactory)
+ : base(dir, lockFactory)
+ {
+ throw new System.NotImplementedException("Waiting for volunteers to implement this class");
+ }
+
+ /// <summary>
+ /// Not implemented. Waiting for volunteers.
+ /// </summary>
+ public class NIOFSIndexInput
+ {
+ public NIOFSIndexInput()
+ {
+ throw new System.NotImplementedException("Waiting for volunteers to implement this class");
+ }
+ }
+
+ public override IndexOutput CreateOutput(string name)
+ {
+ throw new System.NotImplementedException("Waiting for volunteers to implement this class");
+ }
+ }
+}
+
+
+//namespace Lucene.Net.Store
+//{
+
+// /// <summary> An <see cref="FSDirectory" /> implementation that uses
+// /// java.nio's FileChannel's positional read, which allows
+// /// multiple threads to read from the same file without
+// /// synchronizing.
+// ///
+// /// <p/>This class only uses FileChannel when reading; writing
+// /// is achieved with <see cref="SimpleFSDirectory.SimpleFSIndexOutput" />.
+// ///
+// /// <p/><b>NOTE</b>: NIOFSDirectory is not recommended on Windows because of a bug
+// /// in how FileChannel.read is implemented in Sun's JRE.
+// /// Inside of the implementation the position is apparently
+// /// synchronized. See <a
+// /// href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265734">here</a>
+// /// for details.
+// /// </summary>
+// public class NIOFSDirectory:FSDirectory
+// {
+
+// /// <summary>Create a new NIOFSDirectory for the named location.
+// ///
+// /// </summary>
+// /// <param name="path">the path of the directory
+// /// </param>
+// /// <param name="lockFactory">the lock factory to use, or null for the default.
+// /// </param>
+// /// <throws> IOException </throws>
+// [System.Obsolete("Use the constructor that takes a DirectoryInfo, this will be removed in the 3.0 release")]
+// public NIOFSDirectory(System.IO.FileInfo path, LockFactory lockFactory):base(new System.IO.DirectoryInfo(path.FullName), lockFactory)
+// {
+// }
+
+// /// <summary>Create a new NIOFSDirectory for the named location.
+// ///
+// /// </summary>
+// /// <param name="path">the path of the directory
+// /// </param>
+// /// <param name="lockFactory">the lock factory to use, or null for the default.
+// /// </param>
+// /// <throws> IOException </throws>
+// public NIOFSDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory) : base(path, lockFactory)
+// {
+// }
+
+// /// <summary>Create a new NIOFSDirectory for the named location and the default lock factory.
+// ///
+// /// </summary>
+// /// <param name="path">the path of the directory
+// /// </param>
+// /// <throws> IOException </throws>
+// [System.Obsolete("Use the constructor that takes a DirectoryInfo, this will be removed in the 3.0 release")]
+// public NIOFSDirectory(System.IO.FileInfo path):base(new System.IO.DirectoryInfo(path.FullName), null)
+// {
+// }
+
+// /// <summary>Create a new NIOFSDirectory for the named location and the default lock factory.
+// ///
+// /// </summary>
+// /// <param name="path">the path of the directory
+// /// </param>
+// /// <throws> IOException </throws>
+// public NIOFSDirectory(System.IO.DirectoryInfo path) : base(path, null)
+// {
+// }
+
+// // back compatibility so FSDirectory can instantiate via reflection
+// /// <deprecated>
+// /// </deprecated>
+// [Obsolete]
+// internal NIOFSDirectory()
+// {
+// }
+
+// /// <summary>Creates an IndexInput for the file with the given name. </summary>
+// public override IndexInput OpenInput(System.String name, int bufferSize)
+// {
+// EnsureOpen();
+// return new NIOFSIndexInput(new System.IO.FileInfo(System.IO.Path.Combine(GetFile().FullName, name)), bufferSize, GetReadChunkSize());
+// }
+
+// /// <summary>Creates an IndexOutput for the file with the given name. </summary>
+// public override IndexOutput CreateOutput(System.String name)
+// {
+// InitOutput(name);
+// return new SimpleFSDirectory.SimpleFSIndexOutput(new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, name)));
+// }
+
+// public /*protected internal*/ class NIOFSIndexInput:SimpleFSDirectory.SimpleFSIndexInput
+// {
+
+// private System.IO.MemoryStream byteBuf; // wraps the buffer for NIO
+
+// private byte[] otherBuffer;
+// private System.IO.MemoryStream otherByteBuf;
+
+// internal System.IO.BinaryReader channel;
+
+// /// <deprecated> Please use ctor taking chunkSize
+// /// </deprecated>
+// [Obsolete("Please use ctor taking chunkSize")]
+// public NIOFSIndexInput(System.IO.FileInfo path, int bufferSize):this(path, bufferSize, FSDirectory.DEFAULT_READ_CHUNK_SIZE)
+// {
+// }
+
+// public NIOFSIndexInput(System.IO.FileInfo path, int bufferSize, int chunkSize):base(path, bufferSize, chunkSize)
+// {
+// channel = (System.IO.BinaryReader) file;
+// }
+
+// protected internal override void NewBuffer(byte[] newBuffer)
+// {
+// base.NewBuffer(newBuffer);
+// // {{Aroush-2.9}} byteBuf = ByteBuffer.wrap(newBuffer);
+// System.Diagnostics.Debug.Fail("Port issue:", "byteBuf = ByteBuffer.wrap(newBuffer)"); // {{Aroush-2.9}}
+// }
+
+// public override void Close()
+// {
+// if (!isClone && file.isOpen)
+// {
+// // Close the channel & file
+// try
+// {
+// channel.Close();
+// }
+// finally
+// {
+// file.Close();
+// }
+// }
+// }
+
+// public override void ReadInternal(byte[] b, int offset, int len)
+// {
+
+// System.IO.MemoryStream bb;
+
+// // Determine the ByteBuffer we should use
+// if (b == buffer && 0 == offset)
+// {
+// // Use our own pre-wrapped byteBuf:
+// System.Diagnostics.Debug.Assert(byteBuf != null);
+// byteBuf.Position = 0;
+// byteBuf.Capacity = len;
+// bb = byteBuf;
+// }
+// else
+// {
+// if (offset == 0)
+// {
+// if (otherBuffer != b)
+// {
+// // Now wrap this other buffer; with compound
+// // file, we are repeatedly called with its
+// // buffer, so we wrap it once and then re-use it
+// // on subsequent calls
+// otherBuffer = b;
+// // otherByteBuf = ByteBuffer.wrap(b); {{Aroush-2.9}}
+// System.Diagnostics.Debug.Fail("Port issue:", "otherByteBuf = ByteBuffer.wrap(b)"); // {{Aroush-2.9}}
+// }
+// else
+// otherByteBuf.Position = 0;
+// otherByteBuf.Capacity = len;
+// bb = otherByteBuf;
+// }
+// else
+// {
+// // Always wrap when offset != 0
+// bb = null; // bb = ByteBuffer.wrap(b, offset, len); {{Aroush-2.9}}
+// System.Diagnostics.Debug.Fail("Port issue:", "bb = ByteBuffer.wrap(b, offset, len)"); // {{Aroush-2.9}}
+// }
+// }
+
+// int readOffset = (int) bb.Position;
+// int readLength = bb.Capacity - readOffset;
+// System.Diagnostics.Debug.Assert(readLength == len);
+
+// long pos = GetFilePointer();
+
+// try
+// {
+// while (readLength > 0)
+// {
+// int limit;
+// if (readLength > chunkSize)
+// {
+// // LUCENE-1566 - work around JVM Bug by breaking
+// // very large reads into chunks
+// limit = readOffset + chunkSize;
+// }
+// else
+// {
+// limit = readOffset + readLength;
+// }
+// bb.Capacity = limit;
+// int i = -1; // int i = channel.Read(bb, pos, limit); // {{Aroush-2.9}} must read from 'channel' into 'bb'
+// System.Diagnostics.Debug.Fail("Port issue:", "channel.Read(bb, pos, limit)"); // {{Aroush-2.9}}
+// if (i == - 1)
+// {
+// throw new System.IO.IOException("read past EOF");
+// }
+// pos += i;
+// readOffset += i;
+// readLength -= i;
+// }
+// }
+// catch (System.OutOfMemoryException e)
+// {
+// // propagate OOM up and add a hint for 32bit VM Users hitting the bug
+// // with a large chunk size in the fast path.
+// System.OutOfMemoryException outOfMemoryError = new System.OutOfMemoryException("OutOfMemoryError likely caused by the Sun VM Bug described in " + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + "with a a value smaller than the current chunk size (" + chunkSize + ")", e);
+// throw outOfMemoryError;
+// }
+// }
+// }
+// }
+//} \ No newline at end of file
diff --git a/src/core/Store/NativeFSLockFactory.cs b/src/core/Store/NativeFSLockFactory.cs
new file mode 100644
index 0000000..ffab47b
--- /dev/null
+++ b/src/core/Store/NativeFSLockFactory.cs
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> <p/>Implements <see cref="LockFactory" /> using native OS file
+ /// locks. Note that because this LockFactory relies on
+ /// java.nio.* APIs for locking, any problems with those APIs
+ /// will cause locking to fail. Specifically, on certain NFS
+ /// environments the java.nio.* locks will fail (the lock can
+ /// incorrectly be double acquired) whereas <see cref="SimpleFSLockFactory" />
+ /// worked perfectly in those same
+ /// environments. For NFS based access to an index, it's
+ /// recommended that you try <see cref="SimpleFSLockFactory" />
+ /// first and work around the one limitation that a lock file
+ /// could be left when the JVM exits abnormally.<p/>
+ ///
+ /// <p/>The primary benefit of <see cref="NativeFSLockFactory" /> is
+ /// that lock files will be properly removed (by the OS) if
+ /// the JVM has an abnormal exit.<p/>
+ ///
+ /// <p/>Note that, unlike <see cref="SimpleFSLockFactory" />, the existence of
+ /// leftover lock files in the filesystem on exiting the JVM
+ /// is fine because the OS will free the locks held against
+ /// these files even though the files still remain.<p/>
+ ///
+ /// <p/>If you suspect that this or any other LockFactory is
+ /// not working properly in your environment, you can easily
+ /// test it by using <see cref="VerifyingLockFactory" />, <see cref="LockVerifyServer" />
+ /// and <see cref="LockStressTest" />.<p/>
+ ///
+ /// </summary>
+ /// <seealso cref="LockFactory">
+ /// </seealso>
+
+ public class NativeFSLockFactory : FSLockFactory
+ {
+ /// <summary> Create a NativeFSLockFactory instance, with null (unset)
+ /// lock directory. When you pass this factory to a <see cref="FSDirectory" />
+ /// subclass, the lock directory is automatically set to the
+ /// directory itsself. Be sure to create one instance for each directory
+ /// your create!
+ /// </summary>
+ public NativeFSLockFactory():this((System.IO.DirectoryInfo) null)
+ {
+ }
+
+ /// <summary> Create a NativeFSLockFactory instance, storing lock
+ /// files into the specified lockDirName:
+ ///
+ /// </summary>
+ /// <param name="lockDirName">where lock files are created.
+ /// </param>
+ public NativeFSLockFactory(System.String lockDirName):this(new System.IO.DirectoryInfo(lockDirName))
+ {
+ }
+
+ /// <summary> Create a NativeFSLockFactory instance, storing lock
+ /// files into the specified lockDir:
+ ///
+ /// </summary>
+ /// <param name="lockDir">where lock files are created.
+ /// </param>
+ public NativeFSLockFactory(System.IO.DirectoryInfo lockDir)
+ {
+ LockDir = lockDir;
+ }
+
+ public override Lock MakeLock(System.String lockName)
+ {
+ lock (this)
+ {
+ if (internalLockPrefix != null)
+ lockName = internalLockPrefix + "-" + lockName;
+ return new NativeFSLock(internalLockDir, lockName);
+ }
+ }
+
+ public override void ClearLock(System.String lockName)
+ {
+ // Note that this isn't strictly required anymore
+ // because the existence of these files does not mean
+ // they are locked, but, still do this in case people
+ // really want to see the files go away:
+ bool tmpBool;
+ if (System.IO.File.Exists(internalLockDir.FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(internalLockDir.FullName);
+ if (tmpBool)
+ {
+ if (internalLockPrefix != null)
+ {
+ lockName = internalLockPrefix + "-" + lockName;
+ }
+ System.IO.FileInfo lockFile = new System.IO.FileInfo(System.IO.Path.Combine(internalLockDir.FullName, lockName));
+ bool tmpBool2;
+ if (System.IO.File.Exists(lockFile.FullName))
+ tmpBool2 = true;
+ else
+ tmpBool2 = System.IO.Directory.Exists(lockFile.FullName);
+ bool tmpBool3;
+ if (System.IO.File.Exists(lockFile.FullName))
+ {
+ System.IO.File.Delete(lockFile.FullName);
+ tmpBool3 = true;
+ }
+ else if (System.IO.Directory.Exists(lockFile.FullName))
+ {
+ System.IO.Directory.Delete(lockFile.FullName);
+ tmpBool3 = true;
+ }
+ else
+ tmpBool3 = false;
+ if (tmpBool2 && !tmpBool3)
+ {
+ throw new System.IO.IOException("Cannot delete " + lockFile);
+ }
+ }
+ }
+ }
+
+
+ class NativeFSLock:Lock
+ {
+
+ private System.IO.FileStream f;
+ private System.IO.FileStream channel;
+ private bool lock_Renamed;
+ private System.IO.FileInfo path;
+ private System.IO.DirectoryInfo lockDir;
+
+ /*
+ * The javadocs for FileChannel state that you should have
+ * a single instance of a FileChannel (per JVM) for all
+ * locking against a given file. To ensure this, we have
+ * a single (static) HashSet that contains the file paths
+ * of all currently locked locks. This protects against
+ * possible cases where different Directory instances in
+ * one JVM (each with their own NativeFSLockFactory
+ * instance) have set the same lock dir and lock prefix.
+ */
+ private static HashSet<string> LOCK_HELD = new HashSet<string>();
+
+ public NativeFSLock(System.IO.DirectoryInfo lockDir, System.String lockFileName)
+ {
+ this.lockDir = lockDir;
+ path = new System.IO.FileInfo(System.IO.Path.Combine(lockDir.FullName, lockFileName));
+ }
+
+ private bool LockExists()
+ {
+ lock (this)
+ {
+ return lock_Renamed != false;
+ }
+ }
+
+ public override bool Obtain()
+ {
+ lock (this)
+ {
+
+ if (LockExists())
+ {
+ // Our instance is already locked:
+ return false;
+ }
+
+ // Ensure that lockDir exists and is a directory.
+ bool tmpBool;
+ if (System.IO.File.Exists(lockDir.FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(lockDir.FullName);
+ if (!tmpBool)
+ {
+ try
+ {
+ System.IO.Directory.CreateDirectory(lockDir.FullName);
+ }
+ catch
+ {
+ throw new System.IO.IOException("Cannot create directory: " + lockDir.FullName);
+ }
+ }
+ else if (!System.IO.Directory.Exists(lockDir.FullName))
+ {
+ throw new System.IO.IOException("Found regular file where directory expected: " + lockDir.FullName);
+ }
+
+ System.String canonicalPath = path.FullName;
+
+ bool markedHeld = false;
+
+ try
+ {
+
+ // Make sure nobody else in-process has this lock held
+ // already, and, mark it held if not:
+
+ lock (LOCK_HELD)
+ {
+ if (LOCK_HELD.Contains(canonicalPath))
+ {
+ // Someone else in this JVM already has the lock:
+ return false;
+ }
+ else
+ {
+ // This "reserves" the fact that we are the one
+ // thread trying to obtain this lock, so we own
+ // the only instance of a channel against this
+ // file:
+ LOCK_HELD.Add(canonicalPath);
+ markedHeld = true;
+ }
+ }
+
+ try
+ {
+ f = new System.IO.FileStream(path.FullName, System.IO.FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite);
+ }
+ catch (System.IO.IOException e)
+ {
+ // On Windows, we can get intermittent "Access
+ // Denied" here. So, we treat this as failure to
+ // acquire the lock, but, store the reason in case
+ // there is in fact a real error case.
+ failureReason = e;
+ f = null;
+ }
+ // lucene.net: UnauthorizedAccessException does not derive from IOException like in java
+ catch (System.UnauthorizedAccessException e)
+ {
+ // On Windows, we can get intermittent "Access
+ // Denied" here. So, we treat this as failure to
+ // acquire the lock, but, store the reason in case
+ // there is in fact a real error case.
+ failureReason = e;
+ f = null;
+ }
+
+ if (f != null)
+ {
+ try
+ {
+ channel = f;
+ lock_Renamed = false;
+ try
+ {
+ channel.Lock(0, channel.Length);
+ lock_Renamed = true;
+ }
+ catch (System.IO.IOException e)
+ {
+ // At least on OS X, we will sometimes get an
+ // intermittent "Permission Denied" IOException,
+ // which seems to simply mean "you failed to get
+ // the lock". But other IOExceptions could be
+ // "permanent" (eg, locking is not supported via
+ // the filesystem). So, we record the failure
+ // reason here; the timeout obtain (usually the
+ // one calling us) will use this as "root cause"
+ // if it fails to get the lock.
+ failureReason = e;
+ }
+ // lucene.net: UnauthorizedAccessException does not derive from IOException like in java
+ catch (System.UnauthorizedAccessException e)
+ {
+ // At least on OS X, we will sometimes get an
+ // intermittent "Permission Denied" IOException,
+ // which seems to simply mean "you failed to get
+ // the lock". But other IOExceptions could be
+ // "permanent" (eg, locking is not supported via
+ // the filesystem). So, we record the failure
+ // reason here; the timeout obtain (usually the
+ // one calling us) will use this as "root cause"
+ // if it fails to get the lock.
+ failureReason = e;
+ }
+ finally
+ {
+ if (lock_Renamed == false)
+ {
+ try
+ {
+ channel.Close();
+ }
+ finally
+ {
+ channel = null;
+ }
+ }
+ }
+ }
+ finally
+ {
+ if (channel == null)
+ {
+ try
+ {
+ f.Close();
+ }
+ finally
+ {
+ f = null;
+ }
+ }
+ }
+ }
+ }
+ finally
+ {
+ if (markedHeld && !LockExists())
+ {
+ lock (LOCK_HELD)
+ {
+ if (LOCK_HELD.Contains(canonicalPath))
+ {
+ LOCK_HELD.Remove(canonicalPath);
+ }
+ }
+ }
+ }
+ return LockExists();
+ }
+ }
+
+ public override void Release()
+ {
+ lock (this)
+ {
+ if (LockExists())
+ {
+ try
+ {
+ channel.Unlock(0, channel.Length);
+ }
+ finally
+ {
+ lock_Renamed = false;
+ try
+ {
+ channel.Close();
+ }
+ finally
+ {
+ channel = null;
+ try
+ {
+ f.Close();
+ }
+ finally
+ {
+ f = null;
+ lock (LOCK_HELD)
+ {
+ LOCK_HELD.Remove(path.FullName);
+ }
+ }
+ }
+ }
+ bool tmpBool;
+ if (System.IO.File.Exists(path.FullName))
+ {
+ System.IO.File.Delete(path.FullName);
+ tmpBool = true;
+ }
+ else if (System.IO.Directory.Exists(path.FullName))
+ {
+ System.IO.Directory.Delete(path.FullName);
+ tmpBool = true;
+ }
+ else
+ tmpBool = false;
+ if (!tmpBool)
+ throw new LockReleaseFailedException("failed to delete " + path);
+ }
+ }
+ }
+
+ public override bool IsLocked()
+ {
+ lock (this)
+ {
+ // The test for is isLocked is not directly possible with native file locks:
+
+ // First a shortcut, if a lock reference in this instance is available
+ if (LockExists())
+ return true;
+
+ // Look if lock file is present; if not, there can definitely be no lock!
+ bool tmpBool;
+ if (System.IO.File.Exists(path.FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(path.FullName);
+ if (!tmpBool)
+ return false;
+
+ // Try to obtain and release (if was locked) the lock
+ try
+ {
+ bool obtained = Obtain();
+ if (obtained)
+ Release();
+ return !obtained;
+ }
+ catch (System.IO.IOException)
+ {
+ return false;
+ }
+ }
+ }
+
+ public override System.String ToString()
+ {
+ return "NativeFSLock@" + path;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/NoLockFactory.cs b/src/core/Store/NoLockFactory.cs
new file mode 100644
index 0000000..4b5c5c5
--- /dev/null
+++ b/src/core/Store/NoLockFactory.cs
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> Use this <see cref="LockFactory" /> to disable locking entirely.
+ /// Only one instance of this lock is created. You should call <see cref="Instance" />
+ /// to get the instance.
+ ///
+ /// </summary>
+ /// <seealso cref="LockFactory">
+ /// </seealso>
+
+ public class NoLockFactory : LockFactory
+ {
+
+ // Single instance returned whenever makeLock is called.
+ private static NoLock singletonLock = new NoLock();
+ private static NoLockFactory singleton = new NoLockFactory();
+
+ public static NoLockFactory Instance
+ {
+ get { return singleton; }
+ }
+
+ public override Lock MakeLock(System.String lockName)
+ {
+ return singletonLock;
+ }
+
+ public override void ClearLock(System.String lockName)
+ {
+ }
+
+ }
+
+
+ class NoLock:Lock
+ {
+ public override bool Obtain()
+ {
+ return true;
+ }
+
+ public override void Release()
+ {
+ }
+
+ public override bool IsLocked()
+ {
+ return false;
+ }
+
+ public override System.String ToString()
+ {
+ return "NoLock";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/NoSuchDirectoryException.cs b/src/core/Store/NoSuchDirectoryException.cs
new file mode 100644
index 0000000..c3b01ae
--- /dev/null
+++ b/src/core/Store/NoSuchDirectoryException.cs
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> This exception is thrown when you try to list a
+ /// non-existent directory.
+ /// </summary>
+
+ [Serializable]
+ public class NoSuchDirectoryException:System.IO.FileNotFoundException
+ {
+ public NoSuchDirectoryException(System.String message):base(message)
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/RAMDirectory.cs b/src/core/Store/RAMDirectory.cs
new file mode 100644
index 0000000..c5c06e3
--- /dev/null
+++ b/src/core/Store/RAMDirectory.cs
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> A memory-resident <see cref="Directory"/> implementation. Locking
+ /// implementation is by default the <see cref="SingleInstanceLockFactory"/>
+ /// but can be changed with <see cref="Directory.SetLockFactory"/>.
+ /// </summary>
+ [Serializable]
+ public class RAMDirectory:Directory
+ {
+
+ private const long serialVersionUID = 1L;
+
+ internal protected HashMap<string, RAMFile> fileMap = new HashMap<string, RAMFile>();
+ internal protected long internalSizeInBytes = 0;
+
+ // *****
+ // Lock acquisition sequence: RAMDirectory, then RAMFile
+ // *****
+
+ /// <summary>Constructs an empty <see cref="Directory"/>. </summary>
+ public RAMDirectory()
+ {
+ SetLockFactory(new SingleInstanceLockFactory());
+ }
+
+ /// <summary> Creates a new <c>RAMDirectory</c> instance from a different
+ /// <c>Directory</c> implementation. This can be used to load
+ /// a disk-based index into memory.
+ /// <p/>
+ /// This should be used only with indices that can fit into memory.
+ /// <p/>
+ /// Note that the resulting <c>RAMDirectory</c> instance is fully
+ /// independent from the original <c>Directory</c> (it is a
+ /// complete copy). Any subsequent changes to the
+ /// original <c>Directory</c> will not be visible in the
+ /// <c>RAMDirectory</c> instance.
+ ///
+ /// </summary>
+ /// <param name="dir">a <c>Directory</c> value
+ /// </param>
+ /// <exception cref="System.IO.IOException">if an error occurs
+ /// </exception>
+ public RAMDirectory(Directory dir):this(dir, false)
+ {
+ }
+
+ private RAMDirectory(Directory dir, bool closeDir):this()
+ {
+ Directory.Copy(dir, this, closeDir);
+ }
+
+ //https://issues.apache.org/jira/browse/LUCENENET-174
+ [System.Runtime.Serialization.OnDeserialized]
+ void OnDeserialized(System.Runtime.Serialization.StreamingContext context)
+ {
+ if (interalLockFactory == null)
+ {
+ SetLockFactory(new SingleInstanceLockFactory());
+ }
+ }
+
+ public override System.String[] ListAll()
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ // TODO: may have better performance if our HashMap implmented KeySet() instead of generating one via HashSet
+ System.Collections.Generic.ISet<string> fileNames = Support.Compatibility.SetFactory.CreateHashSet(fileMap.Keys);
+ System.String[] result = new System.String[fileNames.Count];
+ int i = 0;
+ foreach(string filename in fileNames)
+ {
+ result[i++] = filename;
+ }
+ return result;
+ }
+ }
+
+ /// <summary>Returns true iff the named file exists in this directory. </summary>
+ public override bool FileExists(System.String name)
+ {
+ EnsureOpen();
+ RAMFile file;
+ lock (this)
+ {
+ file = fileMap[name];
+ }
+ return file != null;
+ }
+
+ /// <summary>Returns the time the named file was last modified.</summary>
+ /// <throws> IOException if the file does not exist </throws>
+ public override long FileModified(System.String name)
+ {
+ EnsureOpen();
+ RAMFile file;
+ lock (this)
+ {
+ file = fileMap[name];
+ }
+ if (file == null)
+ throw new System.IO.FileNotFoundException(name);
+
+ // RAMOutputStream.Flush() was changed to use DateTime.UtcNow.
+ // Convert it back to local time before returning (previous behavior)
+ return new DateTime(file.LastModified*TimeSpan.TicksPerMillisecond, DateTimeKind.Utc).ToLocalTime().Ticks/
+ TimeSpan.TicksPerMillisecond;
+ }
+
+ /// <summary>Set the modified time of an existing file to now.</summary>
+ /// <throws> IOException if the file does not exist </throws>
+ public override void TouchFile(System.String name)
+ {
+ EnsureOpen();
+ RAMFile file;
+ lock (this)
+ {
+ file = fileMap[name];
+ }
+ if (file == null)
+ throw new System.IO.FileNotFoundException(name);
+
+ long ts2, ts1 = System.DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
+ do
+ {
+ try
+ {
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 0 + 100 * 1));
+ }
+ catch (System.Threading.ThreadInterruptedException ie)
+ {
+ // In 3.0 we will change this to throw
+ // InterruptedException instead
+ ThreadClass.Current().Interrupt();
+ throw new System.SystemException(ie.Message, ie);
+ }
+ ts2 = System.DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
+ }
+ while (ts1 == ts2);
+
+ file.LastModified = ts2;
+ }
+
+ /// <summary>Returns the length in bytes of a file in the directory.</summary>
+ /// <throws> IOException if the file does not exist </throws>
+ public override long FileLength(System.String name)
+ {
+ EnsureOpen();
+ RAMFile file;
+ lock (this)
+ {
+ file = fileMap[name];
+ }
+ if (file == null)
+ throw new System.IO.FileNotFoundException(name);
+ return file.Length;
+ }
+
+ /// <summary>Return total size in bytes of all files in this
+ /// directory. This is currently quantized to
+ /// RAMOutputStream.BUFFER_SIZE.
+ /// </summary>
+ public long SizeInBytes()
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ return internalSizeInBytes;
+ }
+ }
+
+ /// <summary>Removes an existing file in the directory.</summary>
+ /// <throws> IOException if the file does not exist </throws>
+ public override void DeleteFile(System.String name)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ RAMFile file = fileMap[name];
+ if (file != null)
+ {
+ fileMap.Remove(name);
+ file.directory = null;
+ internalSizeInBytes -= file.sizeInBytes;
+ }
+ else
+ throw new System.IO.FileNotFoundException(name);
+ }
+ }
+
+ /// <summary>Creates a new, empty file in the directory with the given name. Returns a stream writing this file. </summary>
+ public override IndexOutput CreateOutput(System.String name)
+ {
+ EnsureOpen();
+ RAMFile file = new RAMFile(this);
+ lock (this)
+ {
+ RAMFile existing = fileMap[name];
+ if (existing != null)
+ {
+ internalSizeInBytes -= existing.sizeInBytes;
+ existing.directory = null;
+ }
+ fileMap[name] = file;
+ }
+ return new RAMOutputStream(file);
+ }
+
+ /// <summary>Returns a stream reading an existing file. </summary>
+ public override IndexInput OpenInput(System.String name)
+ {
+ EnsureOpen();
+ RAMFile file;
+ lock (this)
+ {
+ file = fileMap[name];
+ }
+ if (file == null)
+ throw new System.IO.FileNotFoundException(name);
+ return new RAMInputStream(file);
+ }
+
+ /// <summary>Closes the store to future operations, releasing associated memory. </summary>
+ protected override void Dispose(bool disposing)
+ {
+ isOpen = false;
+ fileMap = null;
+ }
+
+ //public HashMap<string, RAMFile> fileMap_ForNUnit
+ //{
+ // get { return fileMap; }
+ //}
+
+ //public long sizeInBytes_ForNUnitTest
+ //{
+ // get { return sizeInBytes; }
+ // set { sizeInBytes = value; }
+ //}
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/RAMFile.cs b/src/core/Store/RAMFile.cs
new file mode 100644
index 0000000..fd9daed
--- /dev/null
+++ b/src/core/Store/RAMFile.cs
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ [Serializable]
+ public class RAMFile
+ {
+
+ private const long serialVersionUID = 1L;
+
+ protected System.Collections.Generic.List<byte[]> buffers = new System.Collections.Generic.List<byte[]>();
+ internal long length;
+ internal RAMDirectory directory;
+ internal long sizeInBytes;
+
+ // This is publicly modifiable via Directory.touchFile(), so direct access not supported
+ private long lastModified = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond);
+
+ // File used as buffer, in no RAMDirectory
+ public /*internal*/ RAMFile()
+ {
+ }
+
+ public /*internal*/ RAMFile(RAMDirectory directory)
+ {
+ this.directory = directory;
+ }
+
+ // For non-stream access from thread that might be concurrent with writing
+
+ internal virtual long Length
+ {
+ get
+ {
+ lock (this)
+ {
+ return length;
+ }
+ }
+ set
+ {
+ lock (this)
+ {
+ this.length = value;
+ }
+ }
+ }
+
+ // For non-stream access from thread that might be concurrent with writing
+
+ internal virtual long LastModified
+ {
+ get
+ {
+ lock (this)
+ {
+ return lastModified;
+ }
+ }
+ set
+ {
+ lock (this)
+ {
+ this.lastModified = value;
+ }
+ }
+ }
+
+ internal byte[] AddBuffer(int size)
+ {
+ byte[] buffer = NewBuffer(size);
+ lock (this)
+ {
+ buffers.Add(buffer);
+ sizeInBytes += size;
+ }
+
+ if (directory != null)
+ {
+ lock (directory) //{{DIGY}} what if directory gets null in the mean time?
+ {
+ directory.internalSizeInBytes += size;
+ }
+ }
+
+ return buffer;
+ }
+
+ public /*internal*/ byte[] GetBuffer(int index)
+ {
+ lock (this)
+ {
+ return buffers[index];
+ }
+ }
+
+ public /*internal*/ int NumBuffers()
+ {
+ lock (this)
+ {
+ return buffers.Count;
+ }
+ }
+
+ /// <summary> Expert: allocate a new buffer.
+ /// Subclasses can allocate differently.
+ /// </summary>
+ /// <param name="size">size of allocated buffer.
+ /// </param>
+ /// <returns> allocated buffer.
+ /// </returns>
+ public /*internal*/ virtual byte[] NewBuffer(int size)
+ {
+ return new byte[size];
+ }
+
+
+ public virtual long SizeInBytes
+ {
+ get
+ {
+ lock (this)
+ {
+ return sizeInBytes;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/RAMInputStream.cs b/src/core/Store/RAMInputStream.cs
new file mode 100644
index 0000000..7e97f5f
--- /dev/null
+++ b/src/core/Store/RAMInputStream.cs
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> A memory-resident <see cref="IndexInput" /> implementation.
+ ///
+ /// </summary>
+ public class RAMInputStream : IndexInput
+ {
+ internal static readonly int BUFFER_SIZE;
+
+ private RAMFile file;
+ private long length;
+
+ private byte[] currentBuffer;
+ private int currentBufferIndex;
+
+ private int bufferPosition;
+ private long bufferStart;
+ private int bufferLength;
+
+ public /*internal*/ RAMInputStream(RAMFile f)
+ {
+ file = f;
+ length = file.length;
+ if (length / BUFFER_SIZE >= System.Int32.MaxValue)
+ {
+ throw new System.IO.IOException("Too large RAMFile! " + length);
+ }
+
+ // make sure that we switch to the
+ // first needed buffer lazily
+ currentBufferIndex = - 1;
+ currentBuffer = null;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // do nothing
+ }
+
+ public override long Length()
+ {
+ return length;
+ }
+
+ public override byte ReadByte()
+ {
+ if (bufferPosition >= bufferLength)
+ {
+ currentBufferIndex++;
+ SwitchCurrentBuffer(true);
+ }
+ return currentBuffer[bufferPosition++];
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ while (len > 0)
+ {
+ if (bufferPosition >= bufferLength)
+ {
+ currentBufferIndex++;
+ SwitchCurrentBuffer(true);
+ }
+
+ int remainInBuffer = bufferLength - bufferPosition;
+ int bytesToCopy = len < remainInBuffer?len:remainInBuffer;
+ Array.Copy(currentBuffer, bufferPosition, b, offset, bytesToCopy);
+ offset += bytesToCopy;
+ len -= bytesToCopy;
+ bufferPosition += bytesToCopy;
+ }
+ }
+
+ private void SwitchCurrentBuffer(bool enforceEOF)
+ {
+ if (currentBufferIndex >= file.NumBuffers())
+ {
+ // end of file reached, no more buffers left
+ if (enforceEOF)
+ throw new System.IO.IOException("Read past EOF");
+ else
+ {
+ // Force EOF if a read takes place at this position
+ currentBufferIndex--;
+ bufferPosition = BUFFER_SIZE;
+ }
+ }
+ else
+ {
+ currentBuffer = file.GetBuffer(currentBufferIndex);
+ bufferPosition = 0;
+ bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
+ long buflen = length - bufferStart;
+ bufferLength = buflen > BUFFER_SIZE?BUFFER_SIZE:(int) buflen;
+ }
+ }
+
+ public override long FilePointer
+ {
+ get { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; }
+ }
+
+ public override void Seek(long pos)
+ {
+ if (currentBuffer == null || pos < bufferStart || pos >= bufferStart + BUFFER_SIZE)
+ {
+ currentBufferIndex = (int) (pos / BUFFER_SIZE);
+ SwitchCurrentBuffer(false);
+ }
+ bufferPosition = (int) (pos % BUFFER_SIZE);
+ }
+
+ static RAMInputStream()
+ {
+ BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/RAMOutputStream.cs b/src/core/Store/RAMOutputStream.cs
new file mode 100644
index 0000000..64e9165
--- /dev/null
+++ b/src/core/Store/RAMOutputStream.cs
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> A memory-resident <see cref="IndexOutput" /> implementation.
+ /// <para>For lucene internal use.</para>
+ /// </summary>
+ public class RAMOutputStream:IndexOutput
+ {
+ internal const int BUFFER_SIZE = 1024;
+
+ private RAMFile file;
+
+ private byte[] currentBuffer;
+ private int currentBufferIndex;
+
+ private bool isDisposed;
+
+ private int bufferPosition;
+ private long bufferStart;
+ private int bufferLength;
+
+ /// <summary>Construct an empty output buffer. </summary>
+ public RAMOutputStream():this(new RAMFile())
+ {
+ }
+
+ internal RAMOutputStream(RAMFile f)
+ {
+ file = f;
+
+ // make sure that we switch to the
+ // first needed buffer lazily
+ currentBufferIndex = - 1;
+ currentBuffer = null;
+ }
+
+ /// <summary>Copy the current contents of this buffer to the named output. </summary>
+ public virtual void WriteTo(IndexOutput out_Renamed)
+ {
+ Flush();
+ long end = file.length;
+ long pos = 0;
+ int buffer = 0;
+ while (pos < end)
+ {
+ int length = BUFFER_SIZE;
+ long nextPos = pos + length;
+ if (nextPos > end)
+ {
+ // at the last buffer
+ length = (int) (end - pos);
+ }
+ out_Renamed.WriteBytes(file.GetBuffer(buffer++), length);
+ pos = nextPos;
+ }
+ }
+
+ /// <summary>Resets this to an empty buffer. </summary>
+ public virtual void Reset()
+ {
+ currentBuffer = null;
+ currentBufferIndex = -1;
+ bufferPosition = 0;
+ bufferStart = 0;
+ bufferLength = 0;
+
+ file.Length = 0;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ Flush();
+ }
+
+ isDisposed = true;
+ }
+
+ public override void Seek(long pos)
+ {
+ // set the file length in case we seek back
+ // and flush() has not been called yet
+ SetFileLength();
+ if (pos < bufferStart || pos >= bufferStart + bufferLength)
+ {
+ currentBufferIndex = (int) (pos / BUFFER_SIZE);
+ SwitchCurrentBuffer();
+ }
+
+ bufferPosition = (int) (pos % BUFFER_SIZE);
+ }
+
+ public override long Length
+ {
+ get { return file.length; }
+ }
+
+ public override void WriteByte(byte b)
+ {
+ if (bufferPosition == bufferLength)
+ {
+ currentBufferIndex++;
+ SwitchCurrentBuffer();
+ }
+ currentBuffer[bufferPosition++] = b;
+ }
+
+ public override void WriteBytes(byte[] b, int offset, int len)
+ {
+ System.Diagnostics.Debug.Assert(b != null);
+ while (len > 0)
+ {
+ if (bufferPosition == bufferLength)
+ {
+ currentBufferIndex++;
+ SwitchCurrentBuffer();
+ }
+
+ int remainInBuffer = currentBuffer.Length - bufferPosition;
+ int bytesToCopy = len < remainInBuffer?len:remainInBuffer;
+ Array.Copy(b, offset, currentBuffer, bufferPosition, bytesToCopy);
+ offset += bytesToCopy;
+ len -= bytesToCopy;
+ bufferPosition += bytesToCopy;
+ }
+ }
+
+ private void SwitchCurrentBuffer()
+ {
+ if (currentBufferIndex == file.NumBuffers())
+ {
+ currentBuffer = file.AddBuffer(BUFFER_SIZE);
+ }
+ else
+ {
+ currentBuffer = file.GetBuffer(currentBufferIndex);
+ }
+ bufferPosition = 0;
+ bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
+ bufferLength = currentBuffer.Length;
+ }
+
+ private void SetFileLength()
+ {
+ long pointer = bufferStart + bufferPosition;
+ if (pointer > file.length)
+ {
+ file.Length = pointer;
+ }
+ }
+
+ public override void Flush()
+ {
+ file.LastModified = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond);
+ SetFileLength();
+ }
+
+ public override long FilePointer
+ {
+ get { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; }
+ }
+
+ /// <summary>Returns byte usage of all buffers. </summary>
+ public virtual long SizeInBytes()
+ {
+ return file.NumBuffers() * BUFFER_SIZE;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/SimpleFSDirectory.cs b/src/core/Store/SimpleFSDirectory.cs
new file mode 100644
index 0000000..3eab359
--- /dev/null
+++ b/src/core/Store/SimpleFSDirectory.cs
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary>A straightforward implementation of <see cref="FSDirectory" />
+ /// using java.io.RandomAccessFile. However, this class has
+ /// poor concurrent performance (multiple threads will
+ /// bottleneck) as it synchronizes when multiple threads
+ /// read from the same file. It's usually better to use
+ /// <see cref="NIOFSDirectory" /> or <see cref="MMapDirectory" /> instead.
+ /// </summary>
+ public class SimpleFSDirectory : FSDirectory
+ {
+ /// <summary>Create a new SimpleFSDirectory for the named location.
+ ///
+ /// </summary>
+ /// <param name="path">the path of the directory
+ /// </param>
+ /// <param name="lockFactory">the lock factory to use, or null for the default.
+ /// </param>
+ /// <throws> IOException </throws>
+ public SimpleFSDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory)
+ : base(path, lockFactory)
+ {
+ }
+
+ /// <summary>Create a new SimpleFSDirectory for the named location and the default lock factory.
+ ///
+ /// </summary>
+ /// <param name="path">the path of the directory
+ /// </param>
+ /// <throws> IOException </throws>
+ public SimpleFSDirectory(System.IO.DirectoryInfo path) : base(path, null)
+ {
+ }
+
+ /// <summary>Creates an IndexOutput for the file with the given name. </summary>
+ public override IndexOutput CreateOutput(System.String name)
+ {
+ InitOutput(name);
+ return new SimpleFSIndexOutput(new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)));
+ }
+
+ /// <summary>Creates an IndexInput for the file with the given name. </summary>
+ public override IndexInput OpenInput(System.String name, int bufferSize)
+ {
+ EnsureOpen();
+
+ Exception e = null;
+ for (var i = 0; i < 10; i++)
+ {
+ try
+ {
+ return new SimpleFSIndexInput(new System.IO.FileInfo(
+ System.IO.Path.Combine(internalDirectory.FullName, name)), bufferSize, ReadChunkSize);
+ }
+ catch (System.UnauthorizedAccessException ex)
+ {
+ e = ex;
+ System.Threading.Thread.Sleep(1);
+ }
+ }
+
+ throw e;
+ }
+
+ protected internal class SimpleFSIndexInput : BufferedIndexInput
+ {
+ // TODO: This is a bad way to handle memory and disposing
+ protected internal class Descriptor : System.IO.BinaryReader
+ {
+ // remember if the file is open, so that we don't try to close it
+ // more than once
+ protected internal volatile bool isOpen;
+ internal long position;
+ internal long length;
+
+ private bool isDisposed;
+
+ public Descriptor(/*FSIndexInput enclosingInstance,*/ System.IO.FileInfo file, System.IO.FileAccess mode)
+ : base(new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, mode, System.IO.FileShare.ReadWrite))
+ {
+ isOpen = true;
+ length = file.Length;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (isOpen)
+ {
+ isOpen = false;
+ }
+ }
+
+ isDisposed = true;
+ base.Dispose(disposing);
+ }
+
+ ~Descriptor()
+ {
+ try
+ {
+ Dispose(false);
+ }
+ finally
+ {
+ }
+ }
+ }
+
+ protected internal Descriptor file;
+ internal bool isClone;
+ private bool isDisposed;
+ // LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM
+ protected internal int chunkSize;
+
+ public SimpleFSIndexInput(System.IO.FileInfo path, int bufferSize, int chunkSize)
+ : base(bufferSize)
+ {
+ file = new Descriptor(path, System.IO.FileAccess.Read);
+ this.chunkSize = chunkSize;
+ }
+
+ /// <summary>IndexInput methods </summary>
+ public override void ReadInternal(byte[] b, int offset, int len)
+ {
+ lock (file)
+ {
+ long position = FilePointer;
+ if (position != file.position)
+ {
+ file.BaseStream.Seek(position, System.IO.SeekOrigin.Begin);
+ file.position = position;
+ }
+ int total = 0;
+
+ try
+ {
+ do
+ {
+ int readLength;
+ if (total + chunkSize > len)
+ {
+ readLength = len - total;
+ }
+ else
+ {
+ // LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks
+ readLength = chunkSize;
+ }
+ int i = file.Read(b, offset + total, readLength);
+ if (i == - 1)
+ {
+ throw new System.IO.IOException("read past EOF");
+ }
+ file.position += i;
+ total += i;
+ }
+ while (total < len);
+ }
+ catch (System.OutOfMemoryException e)
+ {
+ // propagate OOM up and add a hint for 32bit VM Users hitting the bug
+ // with a large chunk size in the fast path.
+ System.OutOfMemoryException outOfMemoryError = new System.OutOfMemoryException("OutOfMemoryError likely caused by the Sun VM Bug described in " + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + "with a a value smaller than the current chunks size (" + chunkSize + ")", e);
+ throw outOfMemoryError;
+ }
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+ if (disposing)
+ {
+ // only close the file if this is not a clone
+ if (!isClone && file != null)
+ {
+ file.Close();
+ file = null;
+ }
+ }
+
+ isDisposed = true;
+ }
+
+ public override void SeekInternal(long position)
+ {
+ }
+
+ public override long Length()
+ {
+ return file.length;
+ }
+
+ public override System.Object Clone()
+ {
+ SimpleFSIndexInput clone = (SimpleFSIndexInput) base.Clone();
+ clone.isClone = true;
+ return clone;
+ }
+
+ /// <summary>Method used for testing. Returns true if the underlying
+ /// file descriptor is valid.
+ /// </summary>
+ public /*internal*/ virtual bool IsFDValid()
+ {
+ return file.BaseStream != null;
+ }
+
+ public bool isClone_ForNUnit
+ {
+ get { return isClone; }
+ }
+ }
+
+ /*protected internal*/ public class SimpleFSIndexOutput:BufferedIndexOutput
+ {
+ internal System.IO.FileStream file = null;
+
+ // remember if the file is open, so that we don't try to close it
+ // more than once
+ private volatile bool isOpen;
+
+ public SimpleFSIndexOutput(System.IO.FileInfo path)
+ {
+ file = new System.IO.FileStream(path.FullName, System.IO.FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite);
+ isOpen = true;
+ }
+
+ /// <summary>output methods: </summary>
+ public override void FlushBuffer(byte[] b, int offset, int size)
+ {
+ file.Write(b, offset, size);
+ // {{dougsale-2.4.0}}
+ // FSIndexOutput.Flush
+ // When writing frequently with small amounts of data, the data isn't flushed to disk.
+ // Thus, attempting to read the data soon after this method is invoked leads to
+ // BufferedIndexInput.Refill() throwing an IOException for reading past EOF.
+ // Test\Index\TestDoc.cs demonstrates such a situation.
+ // Forcing a flush here prevents said issue.
+ // {{DIGY 2.9.0}}
+ // This code is not available in Lucene.Java 2.9.X.
+ // Can there be a indexing-performance problem?
+ file.Flush();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // only close the file if it has not been closed yet
+ if (isOpen)
+ {
+ bool success = false;
+ try
+ {
+ base.Dispose(disposing);
+ success = true;
+ }
+ finally
+ {
+ isOpen = false;
+ if (!success)
+ {
+ try
+ {
+ file.Dispose();
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we don't mask original exception
+ }
+ }
+ else
+ file.Dispose();
+ }
+ }
+ }
+
+ /// <summary>Random-access methods </summary>
+ public override void Seek(long pos)
+ {
+ base.Seek(pos);
+ file.Seek(pos, System.IO.SeekOrigin.Begin);
+ }
+
+ public override long Length
+ {
+ get { return file.Length; }
+ }
+
+ public override void SetLength(long length)
+ {
+ file.SetLength(length);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/SimpleFSLockFactory.cs b/src/core/Store/SimpleFSLockFactory.cs
new file mode 100644
index 0000000..e7e284a
--- /dev/null
+++ b/src/core/Store/SimpleFSLockFactory.cs
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> <p/>Implements <see cref="LockFactory" /> using <see cref="System.IO.FileInfo.Create()" />
+ ///.<p/>
+ ///
+ /// <p/><b>NOTE:</b> the <a target="_top"
+ /// href="http://java.sun.com/j2se/1.4.2/docs/api/java/io/File.html#createNewFile()">javadocs
+ /// for <c>File.createNewFile</c></a> contain a vague
+ /// yet spooky warning about not using the API for file
+ /// locking. This warning was added due to <a target="_top"
+ /// href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4676183">this
+ /// bug</a>, and in fact the only known problem with using
+ /// this API for locking is that the Lucene write lock may
+ /// not be released when the JVM exits abnormally.<p/>
+ /// <p/>When this happens, a <see cref="LockObtainFailedException" />
+ /// is hit when trying to create a writer, in which case you
+ /// need to explicitly clear the lock file first. You can
+ /// either manually remove the file, or use the
+ /// <see cref="Lucene.Net.Index.IndexWriter.Unlock(Directory)" />
+ /// API. But, first be certain that no writer is in fact
+ /// writing to the index otherwise you can easily corrupt
+ /// your index.<p/>
+ ///
+ /// <p/>If you suspect that this or any other LockFactory is
+ /// not working properly in your environment, you can easily
+ /// test it by using <see cref="VerifyingLockFactory" />, <see cref="LockVerifyServer" />
+ /// and <see cref="LockStressTest" />.<p/>
+ ///
+ /// </summary>
+ /// <seealso cref="LockFactory">
+ /// </seealso>
+
+ public class SimpleFSLockFactory:FSLockFactory
+ {
+
+ /// <summary> Create a SimpleFSLockFactory instance, with null (unset)
+ /// lock directory. When you pass this factory to a <see cref="FSDirectory" />
+ /// subclass, the lock directory is automatically set to the
+ /// directory itsself. Be sure to create one instance for each directory
+ /// your create!
+ /// </summary>
+ public SimpleFSLockFactory():this((System.IO.DirectoryInfo) null)
+ {
+ }
+
+ /// <summary> Instantiate using the provided directory (as a File instance).</summary>
+ /// <param name="lockDir">where lock files should be created.
+ /// </param>
+ public SimpleFSLockFactory(System.IO.DirectoryInfo lockDir)
+ {
+ LockDir = lockDir;
+ }
+
+ /// <summary> Instantiate using the provided directory name (String).</summary>
+ /// <param name="lockDirName">where lock files should be created.
+ /// </param>
+ public SimpleFSLockFactory(System.String lockDirName)
+ : this(new System.IO.DirectoryInfo(lockDirName))
+ {
+ }
+
+ public override Lock MakeLock(System.String lockName)
+ {
+ if (internalLockPrefix != null)
+ {
+ lockName = internalLockPrefix + "-" + lockName;
+ }
+ return new SimpleFSLock(internalLockDir, lockName);
+ }
+
+ public override void ClearLock(System.String lockName)
+ {
+ bool tmpBool;
+ if (System.IO.File.Exists(internalLockDir.FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(internalLockDir.FullName);
+ if (tmpBool)
+ {
+ if (internalLockPrefix != null)
+ {
+ lockName = internalLockPrefix + "-" + lockName;
+ }
+ System.IO.FileInfo lockFile = new System.IO.FileInfo(System.IO.Path.Combine(internalLockDir.FullName, lockName));
+ bool tmpBool2;
+ if (System.IO.File.Exists(lockFile.FullName))
+ tmpBool2 = true;
+ else
+ tmpBool2 = System.IO.Directory.Exists(lockFile.FullName);
+ bool tmpBool3;
+ if (System.IO.File.Exists(lockFile.FullName))
+ {
+ System.IO.File.Delete(lockFile.FullName);
+ tmpBool3 = true;
+ }
+ else if (System.IO.Directory.Exists(lockFile.FullName))
+ {
+ System.IO.Directory.Delete(lockFile.FullName);
+ tmpBool3 = true;
+ }
+ else
+ tmpBool3 = false;
+ if (tmpBool2 && !tmpBool3)
+ {
+ throw new System.IO.IOException("Cannot delete " + lockFile);
+ }
+ }
+ }
+ }
+
+
+ class SimpleFSLock:Lock
+ {
+
+ internal System.IO.FileInfo lockFile;
+ internal System.IO.DirectoryInfo lockDir;
+
+ [System.Obsolete("Use the constructor that takes a DirectoryInfo, this will be removed in the 3.0 release")]
+ public SimpleFSLock(System.IO.FileInfo lockDir, System.String lockFileName) : this(new System.IO.DirectoryInfo(lockDir.FullName), lockFileName)
+ {
+ }
+
+ public SimpleFSLock(System.IO.DirectoryInfo lockDir, System.String lockFileName)
+ {
+ this.lockDir = new System.IO.DirectoryInfo(lockDir.FullName);
+ lockFile = new System.IO.FileInfo(System.IO.Path.Combine(lockDir.FullName, lockFileName));
+ }
+
+ public override bool Obtain()
+ {
+
+ // Ensure that lockDir exists and is a directory:
+ bool tmpBool;
+ if (System.IO.File.Exists(lockDir.FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(lockDir.FullName);
+ if (!tmpBool)
+ {
+ try
+ {
+ System.IO.Directory.CreateDirectory(lockDir.FullName);
+ }
+ catch
+ {
+ throw new System.IO.IOException("Cannot create directory: " + lockDir.FullName);
+ }
+ }
+ else
+ {
+ try
+ {
+ System.IO.Directory.Exists(lockDir.FullName);
+ }
+ catch
+ {
+ throw new System.IO.IOException("Found regular file where directory expected: " + lockDir.FullName);
+ }
+ }
+
+ if (lockFile.Exists)
+ {
+ return false;
+ }
+ else
+ {
+ System.IO.FileStream createdFile = lockFile.Create();
+ createdFile.Close();
+ return true;
+ }
+ }
+
+ public override void Release()
+ {
+ bool tmpBool;
+ if (System.IO.File.Exists(lockFile.FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(lockFile.FullName);
+ bool tmpBool2;
+ if (System.IO.File.Exists(lockFile.FullName))
+ {
+ System.IO.File.Delete(lockFile.FullName);
+ tmpBool2 = true;
+ }
+ else if (System.IO.Directory.Exists(lockFile.FullName))
+ {
+ System.IO.Directory.Delete(lockFile.FullName);
+ tmpBool2 = true;
+ }
+ else
+ tmpBool2 = false;
+ if (tmpBool && !tmpBool2)
+ throw new LockReleaseFailedException("failed to delete " + lockFile);
+ }
+
+ public override bool IsLocked()
+ {
+ bool tmpBool;
+ if (System.IO.File.Exists(lockFile.FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(lockFile.FullName);
+ return tmpBool;
+ }
+
+ public override System.String ToString()
+ {
+ return "SimpleFSLock@" + lockFile;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/SingleInstanceLockFactory.cs b/src/core/Store/SingleInstanceLockFactory.cs
new file mode 100644
index 0000000..9b6828b
--- /dev/null
+++ b/src/core/Store/SingleInstanceLockFactory.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> Implements <see cref="LockFactory" /> for a single in-process instance,
+ /// meaning all locking will take place through this one instance.
+ /// Only use this <see cref="LockFactory" /> when you are certain all
+ /// IndexReaders and IndexWriters for a given index are running
+ /// against a single shared in-process Directory instance. This is
+ /// currently the default locking for RAMDirectory.
+ ///
+ /// </summary>
+ /// <seealso cref="LockFactory">
+ /// </seealso>
+
+ public class SingleInstanceLockFactory:LockFactory
+ {
+
+ private System.Collections.Generic.HashSet<string> locks = new System.Collections.Generic.HashSet<string>();
+
+ public override Lock MakeLock(System.String lockName)
+ {
+ // We do not use the LockPrefix at all, because the private
+ // HashSet instance effectively scopes the locking to this
+ // single Directory instance.
+ return new SingleInstanceLock(locks, lockName);
+ }
+
+ public override void ClearLock(System.String lockName)
+ {
+ lock (locks)
+ {
+ if (locks.Contains(lockName))
+ {
+ locks.Remove(lockName);
+ }
+ }
+ }
+ }
+
+
+ class SingleInstanceLock:Lock
+ {
+
+ internal System.String lockName;
+ private System.Collections.Generic.HashSet<string> locks;
+
+ public SingleInstanceLock(System.Collections.Generic.HashSet<string> locks, System.String lockName)
+ {
+ this.locks = locks;
+ this.lockName = lockName;
+ }
+
+ public override bool Obtain()
+ {
+ lock (locks)
+ {
+ if (locks.Contains(lockName) == false)
+ {
+ locks.Add(lockName);
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ public override void Release()
+ {
+ lock (locks)
+ {
+ locks.Remove(lockName);
+ }
+ }
+
+ public override bool IsLocked()
+ {
+ lock (locks)
+ {
+ return locks.Contains(lockName);
+ }
+ }
+
+ public override System.String ToString()
+ {
+ return base.ToString() + ": " + lockName;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Store/VerifyingLockFactory.cs b/src/core/Store/VerifyingLockFactory.cs
new file mode 100644
index 0000000..24d52a1
--- /dev/null
+++ b/src/core/Store/VerifyingLockFactory.cs
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Store
+{
+
+ /// <summary> A <see cref="LockFactory" /> that wraps another <see cref="LockFactory" />
+ /// and verifies that each lock obtain/release
+ /// is "correct" (never results in two processes holding the
+ /// lock at the same time). It does this by contacting an
+ /// external server (<see cref="LockVerifyServer" />) to assert that
+ /// at most one process holds the lock at a time. To use
+ /// this, you should also run <see cref="LockVerifyServer" /> on the
+ /// host &amp; port matching what you pass to the constructor.
+ ///
+ /// </summary>
+ /// <seealso cref="LockVerifyServer">
+ /// </seealso>
+ /// <seealso cref="LockStressTest">
+ /// </seealso>
+
+ public class VerifyingLockFactory:LockFactory
+ {
+
+ internal LockFactory lf;
+ internal sbyte id;
+ internal System.String host;
+ internal int port;
+
+ private class CheckedLock:Lock
+ {
+ private void InitBlock(VerifyingLockFactory enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private VerifyingLockFactory enclosingInstance;
+ public VerifyingLockFactory Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Lock lock_Renamed;
+
+ public CheckedLock(VerifyingLockFactory enclosingInstance, Lock lock_Renamed)
+ {
+ InitBlock(enclosingInstance);
+ this.lock_Renamed = lock_Renamed;
+ }
+
+ private void Verify(sbyte message)
+ {
+ try
+ {
+ System.Net.Sockets.TcpClient s = new System.Net.Sockets.TcpClient(Enclosing_Instance.host, Enclosing_Instance.port);
+ System.IO.Stream out_Renamed = s.GetStream();
+ out_Renamed.WriteByte((byte) Enclosing_Instance.id);
+ out_Renamed.WriteByte((byte) message);
+ System.IO.Stream in_Renamed = s.GetStream();
+ int result = in_Renamed.ReadByte();
+ in_Renamed.Close();
+ out_Renamed.Close();
+ s.Close();
+ if (result != 0)
+ throw new System.SystemException("lock was double acquired");
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException(e.Message, e);
+ }
+ }
+
+ public override bool Obtain(long lockWaitTimeout)
+ {
+ lock (this)
+ {
+ bool obtained = lock_Renamed.Obtain(lockWaitTimeout);
+ if (obtained)
+ Verify((sbyte) 1);
+ return obtained;
+ }
+ }
+
+ public override bool Obtain()
+ {
+ lock (this)
+ {
+ return lock_Renamed.Obtain();
+ }
+ }
+
+ public override bool IsLocked()
+ {
+ lock (this)
+ {
+ return lock_Renamed.IsLocked();
+ }
+ }
+
+ public override void Release()
+ {
+ lock (this)
+ {
+ if (IsLocked())
+ {
+ Verify((sbyte) 0);
+ lock_Renamed.Release();
+ }
+ }
+ }
+ }
+
+ /// <param name="id">should be a unique id across all clients
+ /// </param>
+ /// <param name="lf">the LockFactory that we are testing
+ /// </param>
+ /// <param name="host">host or IP where <see cref="LockVerifyServer" />
+ /// is running
+ /// </param>
+ /// <param name="port">the port <see cref="LockVerifyServer" /> is
+ /// listening on
+ /// </param>
+ public VerifyingLockFactory(sbyte id, LockFactory lf, System.String host, int port)
+ {
+ this.id = id;
+ this.lf = lf;
+ this.host = host;
+ this.port = port;
+ }
+
+ public override Lock MakeLock(System.String lockName)
+ {
+ lock (this)
+ {
+ return new CheckedLock(this, lf.MakeLock(lockName));
+ }
+ }
+
+ public override void ClearLock(System.String lockName)
+ {
+ lock (this)
+ {
+ lf.ClearLock(lockName);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Support/AppSettings.cs b/src/core/Support/AppSettings.cs
new file mode 100644
index 0000000..a5e95cd
--- /dev/null
+++ b/src/core/Support/AppSettings.cs
@@ -0,0 +1,159 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Configuration;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ ///
+ /// </summary>
+ public class AppSettings
+ {
+ static System.Collections.Specialized.ListDictionary settings = new System.Collections.Specialized.ListDictionary();
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="key"></param>
+ /// <param name="defValue"></param>
+ public static void Set(System.String key, int defValue)
+ {
+ settings[key] = defValue;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="key"></param>
+ /// <param name="defValue"></param>
+ public static void Set(System.String key, long defValue)
+ {
+ settings[key] = defValue;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="key"></param>
+ /// <param name="defValue"></param>
+ public static void Set(System.String key, System.String defValue)
+ {
+ settings[key] = defValue;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="key"></param>
+ /// <param name="defValue"></param>
+ public static void Set(System.String key, bool defValue)
+ {
+ settings[key] = defValue;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="key"></param>
+ /// <param name="defValue"></param>
+ /// <returns></returns>
+ public static int Get(System.String key, int defValue)
+ {
+ if (settings[key] != null)
+ {
+ return (int)settings[key];
+ }
+
+ System.String theValue = ConfigurationManager.AppSettings.Get(key);
+ if (theValue == null)
+ {
+ return defValue;
+ }
+ int retValue = Convert.ToInt32(theValue.Trim());
+ settings[key] = retValue;
+ return retValue;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="key"></param>
+ /// <param name="defValue"></param>
+ /// <returns></returns>
+ public static long Get(System.String key, long defValue)
+ {
+ if (settings[key] != null)
+ {
+ return (long)settings[key];
+ }
+
+ System.String theValue = ConfigurationManager.AppSettings.Get(key);
+ if (theValue == null)
+ {
+ return defValue;
+ }
+ long retValue = Convert.ToInt64(theValue.Trim());
+ settings[key] = retValue;
+ return retValue;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="key"></param>
+ /// <param name="defValue"></param>
+ /// <returns></returns>
+ public static System.String Get(System.String key, System.String defValue)
+ {
+ if (settings[key] != null)
+ {
+ return (System.String)settings[key];
+ }
+
+ System.String theValue = ConfigurationManager.AppSettings.Get(key);
+ if (theValue == null)
+ {
+ return defValue;
+ }
+ settings[key] = theValue;
+ return theValue;
+ }
+
+ public static bool Get(System.String key, bool defValue)
+ {
+ if (settings[key] != null)
+ {
+ return (bool)settings[key];
+ }
+
+ System.String theValue = ConfigurationManager.AppSettings.Get(key);
+ if (theValue == null)
+ {
+ return defValue;
+ }
+ bool retValue = Convert.ToBoolean(theValue.Trim());
+ settings[key] = retValue;
+ return retValue;
+ }
+ }
+}
diff --git a/src/core/Support/AttributeImplItem.cs b/src/core/Support/AttributeImplItem.cs
new file mode 100644
index 0000000..f0c4c5a
--- /dev/null
+++ b/src/core/Support/AttributeImplItem.cs
@@ -0,0 +1,41 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// A simple wrapper to allow for the use of the GeneralKeyedCollection. The
+ /// wrapper is required as there can be several keys for an object depending
+ /// on how many interfaces it implements.
+ /// </summary>
+ internal sealed class AttributeImplItem
+ {
+ internal AttributeImplItem(Type key, Util.Attribute value)
+ {
+ this.Key = key;
+ this.Value = value;
+ }
+ internal Type Key;
+ internal Util.Attribute Value;
+ }
+}
diff --git a/src/core/Support/BitSetSupport.cs b/src/core/Support/BitSetSupport.cs
new file mode 100644
index 0000000..751a15e
--- /dev/null
+++ b/src/core/Support/BitSetSupport.cs
@@ -0,0 +1,88 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// This class provides supporting methods of java.util.BitSet
+ /// that are not present in System.Collections.BitArray.
+ /// </summary>
+ public class BitSetSupport
+ {
+ /// <summary>
+ /// Returns the next set bit at or after index, or -1 if no such bit exists.
+ /// </summary>
+ /// <param name="bitArray"></param>
+ /// <param name="index">the index of bit array at which to start checking</param>
+ /// <returns>the next set bit or -1</returns>
+ public static int NextSetBit(System.Collections.BitArray bitArray, int index)
+ {
+ while (index < bitArray.Length)
+ {
+ // if index bit is set, return it
+ // otherwise check next index bit
+ if (bitArray.Get(index))
+ return index;
+ else
+ index++;
+ }
+ // if no bits are set at or after index, return -1
+ return -1;
+ }
+
+ /// <summary>
+ /// Returns the next un-set bit at or after index, or -1 if no such bit exists.
+ /// </summary>
+ /// <param name="bitArray"></param>
+ /// <param name="index">the index of bit array at which to start checking</param>
+ /// <returns>the next set bit or -1</returns>
+ public static int NextClearBit(System.Collections.BitArray bitArray, int index)
+ {
+ while (index < bitArray.Length)
+ {
+ // if index bit is not set, return it
+ // otherwise check next index bit
+ if (!bitArray.Get(index))
+ return index;
+ else
+ index++;
+ }
+ // if no bits are set at or after index, return -1
+ return -1;
+ }
+
+ /// <summary>
+ /// Returns the number of bits set to true in this BitSet.
+ /// </summary>
+ /// <param name="bits">The BitArray object.</param>
+ /// <returns>The number of bits set to true in this BitSet.</returns>
+ public static int Cardinality(System.Collections.BitArray bits)
+ {
+ int count = 0;
+ for (int i = 0; i < bits.Count; i++)
+ {
+ if (bits[i])
+ count++;
+ }
+ return count;
+ }
+ }
+}
diff --git a/src/core/Support/BuildType.cs b/src/core/Support/BuildType.cs
new file mode 100644
index 0000000..1a84245
--- /dev/null
+++ b/src/core/Support/BuildType.cs
@@ -0,0 +1,32 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+namespace Lucene.Net.Support
+{
+ public class BuildType
+ {
+#if DEBUG
+ public static bool Debug = true;
+#else
+ public static bool Debug = false;
+#endif
+ }
+}
diff --git a/src/core/Support/CRC32.cs b/src/core/Support/CRC32.cs
new file mode 100644
index 0000000..d1efde2
--- /dev/null
+++ b/src/core/Support/CRC32.cs
@@ -0,0 +1,83 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+
+namespace Lucene.Net.Support
+{
+ public class CRC32 : IChecksum
+ {
+ private static readonly UInt32[] crcTable = InitializeCRCTable();
+
+ private static UInt32[] InitializeCRCTable()
+ {
+ UInt32[] crcTable = new UInt32[256];
+ for (UInt32 n = 0; n < 256; n++)
+ {
+ UInt32 c = n;
+ for (int k = 8; --k >= 0; )
+ {
+ if ((c & 1) != 0)
+ c = 0xedb88320 ^ (c >> 1);
+ else
+ c = c >> 1;
+ }
+ crcTable[n] = c;
+ }
+ return crcTable;
+ }
+
+ private UInt32 crc = 0;
+
+ public long Value
+ {
+ get
+ {
+ return crc & 0xffffffffL;
+ }
+ }
+
+ public void Reset()
+ {
+ crc = 0;
+ }
+
+ public void Update(int bval)
+ {
+ UInt32 c = ~crc;
+ c = crcTable[(c ^ bval) & 0xff] ^ (c >> 8);
+ crc = ~c;
+ }
+
+ public void Update(byte[] buf, int off, int len)
+ {
+ UInt32 c = ~crc;
+ while (--len >= 0)
+ c = crcTable[(c ^ buf[off++]) & 0xff] ^ (c >> 8);
+ crc = ~c;
+ }
+
+ public void Update(byte[] buf)
+ {
+ Update(buf, 0, buf.Length);
+ }
+ }
+}
diff --git a/src/core/Support/Character.cs b/src/core/Support/Character.cs
new file mode 100644
index 0000000..a306405
--- /dev/null
+++ b/src/core/Support/Character.cs
@@ -0,0 +1,81 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Mimics Java's Character class.
+ /// </summary>
+ public class Character
+ {
+ private const char charNull = '\0';
+ private const char charZero = '0';
+ private const char charA = 'a';
+
+ /// <summary>
+ /// </summary>
+ public static int MAX_RADIX
+ {
+ get
+ {
+ return 36;
+ }
+ }
+
+ /// <summary>
+ /// </summary>
+ public static int MIN_RADIX
+ {
+ get
+ {
+ return 2;
+ }
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="digit"></param>
+ /// <param name="radix"></param>
+ /// <returns></returns>
+ public static char ForDigit(int digit, int radix)
+ {
+ // if radix or digit is out of range,
+ // return the null character.
+ if (radix < Character.MIN_RADIX)
+ return charNull;
+ if (radix > Character.MAX_RADIX)
+ return charNull;
+ if (digit < 0)
+ return charNull;
+ if (digit >= radix)
+ return charNull;
+
+ // if digit is less than 10,
+ // return '0' plus digit
+ if (digit < 10)
+ return (char)((int)charZero + digit);
+
+ // otherwise, return 'a' plus digit.
+ return (char)((int)charA + digit - 10);
+ }
+ }
+}
diff --git a/src/core/Support/CloseableThreadLocalProfiler.cs b/src/core/Support/CloseableThreadLocalProfiler.cs
new file mode 100644
index 0000000..b67a4b8
--- /dev/null
+++ b/src/core/Support/CloseableThreadLocalProfiler.cs
@@ -0,0 +1,45 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// For Debuging purposes.
+ /// </summary>
+ public class CloseableThreadLocalProfiler
+ {
+ private static bool _enableCloseableThreadLocalProfiler = false;
+ public static System.Collections.Generic.List<WeakReference> Instances = new System.Collections.Generic.List<WeakReference>();
+
+ public static bool EnableCloseableThreadLocalProfiler
+ {
+ get { return _enableCloseableThreadLocalProfiler; }
+ set
+ {
+ _enableCloseableThreadLocalProfiler = value;
+ lock (Instances)
+ Instances.Clear();
+ }
+ }
+ }
+}
diff --git a/src/core/Support/CollectionsHelper.cs b/src/core/Support/CollectionsHelper.cs
new file mode 100644
index 0000000..74a07ab
--- /dev/null
+++ b/src/core/Support/CollectionsHelper.cs
@@ -0,0 +1,339 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Collections;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Support class used to handle Hashtable addition, which does a check
+ /// first to make sure the added item is unique in the hash.
+ /// </summary>
+ public class CollectionsHelper
+ {
+ public static void Add(System.Collections.Hashtable hashtable, System.Object item)
+ {
+ hashtable.Add(item, item);
+ }
+
+ public static void AddIfNotContains(System.Collections.Hashtable hashtable, System.Object item)
+ {
+ // Added lock around check. Even though the collection should already have
+ // a synchronized wrapper around it, it doesn't prevent this test from having
+ // race conditions. Two threads can (and have in TestIndexReaderReopen) call
+ // hashtable.Contains(item) == false at the same time, then both try to add to
+ // the hashtable, causing an ArgumentException. locking on the collection
+ // prevents this. -- cc
+ lock (hashtable)
+ {
+ if (hashtable.Contains(item) == false)
+ {
+ hashtable.Add(item, item);
+ }
+ }
+ }
+
+ public static void AddIfNotContains(System.Collections.ArrayList hashtable, System.Object item)
+ {
+ // see AddIfNotContains(Hashtable, object) for information about the lock
+ lock (hashtable)
+ {
+ if (hashtable.Contains(item) == false)
+ {
+ hashtable.Add(item);
+ }
+ }
+ }
+
+ public static void AddAll(System.Collections.Hashtable hashtable, System.Collections.ICollection items)
+ {
+ System.Collections.IEnumerator iter = items.GetEnumerator();
+ System.Object item;
+ while (iter.MoveNext())
+ {
+ item = iter.Current;
+ hashtable.Add(item, item);
+ }
+ }
+
+ public static void AddAllIfNotContains(System.Collections.Hashtable hashtable, System.Collections.IList items)
+ {
+ System.Object item;
+ for (int i = 0; i < items.Count; i++)
+ {
+ item = items[i];
+ if (hashtable.Contains(item) == false)
+ {
+ hashtable.Add(item, item);
+ }
+ }
+ }
+
+ public static void AddAllIfNotContains(System.Collections.Hashtable hashtable, System.Collections.ICollection items)
+ {
+ System.Collections.IEnumerator iter = items.GetEnumerator();
+ System.Object item;
+ while (iter.MoveNext())
+ {
+ item = iter.Current;
+ if (hashtable.Contains(item) == false)
+ {
+ hashtable.Add(item, item);
+ }
+ }
+ }
+
+ public static void AddAllIfNotContains(System.Collections.Generic.IDictionary<string, string> hashtable, System.Collections.Generic.ICollection<string> items)
+ {
+ foreach (string s in items)
+ {
+ if (hashtable.ContainsKey(s) == false)
+ {
+ hashtable.Add(s, s);
+ }
+ }
+ }
+
+ public static void AddAll(System.Collections.Generic.IDictionary<string, string> hashtable, System.Collections.Generic.ICollection<string> items)
+ {
+ foreach (string s in items)
+ {
+ hashtable.Add(s, s);
+ }
+ }
+
+ public static bool Contains(System.Collections.Generic.ICollection<string> col, string item)
+ {
+ foreach (string s in col) if (s == item) return true;
+ return false;
+ }
+
+ public static bool Contains(System.Collections.ICollection col, System.Object item)
+ {
+ System.Collections.IEnumerator iter = col.GetEnumerator();
+ while (iter.MoveNext())
+ {
+ if (iter.Current.Equals(item))
+ return true;
+ }
+ return false;
+ }
+
+
+ public static System.String CollectionToString(System.Collections.Generic.IDictionary<string, string> c)
+ {
+ Hashtable t = new Hashtable();
+ foreach (string key in c.Keys)
+ {
+ t.Add(key, c[key]);
+ }
+ return CollectionToString(t);
+ }
+
+ /// <summary>
+ /// Converts the specified collection to its string representation.
+ /// </summary>
+ /// <param name="c">The collection to convert to string.</param>
+ /// <returns>A string representation of the specified collection.</returns>
+ public static System.String CollectionToString(System.Collections.ICollection c)
+ {
+ System.Text.StringBuilder s = new System.Text.StringBuilder();
+
+ if (c != null)
+ {
+
+ System.Collections.ArrayList l = new System.Collections.ArrayList(c);
+
+ bool isDictionary = (c is System.Collections.BitArray || c is System.Collections.Hashtable || c is System.Collections.IDictionary || c is System.Collections.Specialized.NameValueCollection || (l.Count > 0 && l[0] is System.Collections.DictionaryEntry));
+ for (int index = 0; index < l.Count; index++)
+ {
+ if (l[index] == null)
+ s.Append("null");
+ else if (!isDictionary)
+ s.Append(l[index]);
+ else
+ {
+ isDictionary = true;
+ if (c is System.Collections.Specialized.NameValueCollection)
+ s.Append(((System.Collections.Specialized.NameValueCollection)c).GetKey(index));
+ else
+ s.Append(((System.Collections.DictionaryEntry)l[index]).Key);
+ s.Append("=");
+ if (c is System.Collections.Specialized.NameValueCollection)
+ s.Append(((System.Collections.Specialized.NameValueCollection)c).GetValues(index)[0]);
+ else
+ s.Append(((System.Collections.DictionaryEntry)l[index]).Value);
+
+ }
+ if (index < l.Count - 1)
+ s.Append(", ");
+ }
+
+ if (isDictionary)
+ {
+ if (c is System.Collections.ArrayList)
+ isDictionary = false;
+ }
+ if (isDictionary)
+ {
+ s.Insert(0, "{");
+ s.Append("}");
+ }
+ else
+ {
+ s.Insert(0, "[");
+ s.Append("]");
+ }
+ }
+ else
+ s.Insert(0, "null");
+ return s.ToString();
+ }
+
+ /// <summary>
+ /// Compares two string arrays for equality.
+ /// </summary>
+ /// <param name="l1">First string array list to compare</param>
+ /// <param name="l2">Second string array list to compare</param>
+ /// <returns>true if the strings are equal in both arrays, false otherwise</returns>
+ public static bool CompareStringArrays(System.String[] l1, System.String[] l2)
+ {
+ if (l1.Length != l2.Length)
+ return false;
+ for (int i = 0; i < l1.Length; i++)
+ {
+ if (l1[i] != l2[i])
+ return false;
+ }
+ return true;
+ }
+
+ /// <summary>
+ /// Sorts an IList collections
+ /// </summary>
+ /// <param name="list">The System.Collections.IList instance that will be sorted</param>
+ /// <param name="Comparator">The Comparator criteria, null to use natural comparator.</param>
+ public static void Sort(System.Collections.IList list, System.Collections.IComparer Comparator)
+ {
+ if (((System.Collections.ArrayList)list).IsReadOnly)
+ throw new System.NotSupportedException();
+
+ if ((Comparator == null) || (Comparator is System.Collections.Comparer))
+ {
+ try
+ {
+ ((System.Collections.ArrayList)list).Sort();
+ }
+ catch (System.InvalidOperationException e)
+ {
+ throw new System.InvalidCastException(e.Message);
+ }
+ }
+ else
+ {
+ try
+ {
+ ((System.Collections.ArrayList)list).Sort(Comparator);
+ }
+ catch (System.InvalidOperationException e)
+ {
+ throw new System.InvalidCastException(e.Message);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Fills the array with an specific value from an specific index to an specific index.
+ /// </summary>
+ /// <param name="array">The array to be filled.</param>
+ /// <param name="fromindex">The first index to be filled.</param>
+ /// <param name="toindex">The last index to be filled.</param>
+ /// <param name="val">The value to fill the array with.</param>
+ public static void Fill(System.Array array, System.Int32 fromindex, System.Int32 toindex, System.Object val)
+ {
+ System.Object Temp_Object = val;
+ System.Type elementtype = array.GetType().GetElementType();
+ if (elementtype != val.GetType())
+ Temp_Object = Convert.ChangeType(val, elementtype);
+ if (array.Length == 0)
+ throw (new System.NullReferenceException());
+ if (fromindex > toindex)
+ throw (new System.ArgumentException());
+ if ((fromindex < 0) || ((System.Array)array).Length < toindex)
+ throw (new System.IndexOutOfRangeException());
+ for (int index = (fromindex > 0) ? fromindex-- : fromindex; index < toindex; index++)
+ array.SetValue(Temp_Object, index);
+ }
+
+
+ /// <summary>
+ /// Fills the array with an specific value.
+ /// </summary>
+ /// <param name="array">The array to be filled.</param>
+ /// <param name="val">The value to fill the array with.</param>
+ public static void Fill(System.Array array, System.Object val)
+ {
+ Fill(array, 0, array.Length, val);
+ }
+
+ /// <summary>
+ /// Compares the entire members of one array whith the other one.
+ /// </summary>
+ /// <param name="array1">The array to be compared.</param>
+ /// <param name="array2">The array to be compared with.</param>
+ /// <returns>Returns true if the two specified arrays of Objects are equal
+ /// to one another. The two arrays are considered equal if both arrays
+ /// contain the same number of elements, and all corresponding pairs of
+ /// elements in the two arrays are equal. Two objects e1 and e2 are
+ /// considered equal if (e1==null ? e2==null : e1.equals(e2)). In other
+ /// words, the two arrays are equal if they contain the same elements in
+ /// the same order. Also, two array references are considered equal if
+ /// both are null.</returns>
+ public static bool Equals(System.Array array1, System.Array array2)
+ {
+ bool result = false;
+ if ((array1 == null) && (array2 == null))
+ result = true;
+ else if ((array1 != null) && (array2 != null))
+ {
+ if (array1.Length == array2.Length)
+ {
+ int length = array1.Length;
+ result = true;
+ for (int index = 0; index < length; index++)
+ {
+ System.Object o1 = array1.GetValue(index);
+ System.Object o2 = array2.GetValue(index);
+ if (o1 == null && o2 == null)
+ continue; // they match
+ else if (o1 == null || !o1.Equals(o2))
+ {
+ result = false;
+ break;
+ }
+ }
+ }
+ }
+ return result;
+ }
+ }
+}
diff --git a/src/core/Support/Compare.cs b/src/core/Support/Compare.cs
new file mode 100644
index 0000000..3e15846
--- /dev/null
+++ b/src/core/Support/Compare.cs
@@ -0,0 +1,49 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Summary description for TestSupportClass.
+ /// </summary>
+ public class Compare
+ {
+ /// <summary>
+ /// Compares two Term arrays for equality.
+ /// </summary>
+ /// <param name="t1">First Term array to compare</param>
+ /// <param name="t2">Second Term array to compare</param>
+ /// <returns>true if the Terms are equal in both arrays, false otherwise</returns>
+ public static bool CompareTermArrays(Index.Term[] t1, Index.Term[] t2)
+ {
+ if (t1.Length != t2.Length)
+ return false;
+ for (int i = 0; i < t1.Length; i++)
+ {
+ if (t1[i].CompareTo(t2[i]) == 0)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+}
diff --git a/src/core/Support/Compatibility/ConcurrentDictionary.cs b/src/core/Support/Compatibility/ConcurrentDictionary.cs
new file mode 100644
index 0000000..47914da
--- /dev/null
+++ b/src/core/Support/Compatibility/ConcurrentDictionary.cs
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+
+#if NET35
+
+namespace Lucene.Net.Support.Compatibility
+{
+ /// <summary>
+ /// Support class that emulates the behavior of the ConcurrentDictionary
+ /// from .NET 4.0. This class will, in most cases, perform slightly slower
+ /// than the 4.0 equivalent. Note that all behavior is emulated, which means
+ /// that <see cref="GetEnumerator"/>, <see cref="Keys"/>, and <see cref="Values"/>
+ /// all return a snapshot of the data at the time it was called.
+ /// </summary>
+ [Serializable]
+ public class ConcurrentDictionary<TKey, TValue> : IDictionary<TKey, TValue>
+ {
+ private readonly object _lockObj = new object();
+ private readonly Dictionary<TKey, TValue> _dictInst;
+
+ public ConcurrentDictionary()
+ : this(16)
+ { }
+
+ public ConcurrentDictionary(int capacity)
+ : this(capacity, EqualityComparer<TKey>.Default)
+ { }
+
+ public ConcurrentDictionary(int capacity, IEqualityComparer<TKey> comparer)
+ {
+ _dictInst = new Dictionary<TKey, TValue>(capacity, comparer);
+ }
+
+ public ConcurrentDictionary(IEnumerable<KeyValuePair<TKey, TValue>> keyValuePairs)
+ : this(16)
+ {
+ foreach(var value in keyValuePairs)
+ {
+ _dictInst.Add(value.Key, value.Value);
+ }
+ }
+
+ #region Concurrent Dictionary Special Methods
+
+ public TValue AddOrUpdate(TKey key, Func<TKey, TValue> addValueFactory, Func<TKey, TValue, TValue> updateValueFactory)
+ {
+ lock(_lockObj)
+ {
+ if(_dictInst.ContainsKey(key))
+ {
+ _dictInst[key] = updateValueFactory(key, _dictInst[key]);
+ }
+ else
+ {
+ _dictInst[key] = addValueFactory(key);
+ }
+
+ return _dictInst[key];
+ }
+ }
+
+ public TValue AddOrUpdate(TKey key, TValue addValue, Func<TKey, TValue, TValue> updateValueFactory)
+ {
+ lock (_lockObj)
+ {
+ if (_dictInst.ContainsKey(key))
+ {
+ _dictInst[key] = updateValueFactory(key, _dictInst[key]);
+ }
+ else
+ {
+ _dictInst[key] = addValue;
+ }
+
+ return _dictInst[key];
+ }
+ }
+
+ public TValue GetOrAdd(TKey key, Func<TKey, TValue> valueFactory)
+ {
+ lock (_lockObj)
+ {
+ if (!_dictInst.ContainsKey(key))
+ {
+ _dictInst[key] = valueFactory(key);
+ }
+
+ return _dictInst[key];
+ }
+ }
+
+ public TValue GetOrAdd(TKey key, TValue value)
+ {
+ lock (_lockObj)
+ {
+ if (!_dictInst.ContainsKey(key))
+ {
+ _dictInst[key] = value;
+ }
+
+ return _dictInst[key];
+ }
+ }
+
+ public bool TryAdd(TKey key, TValue value)
+ {
+ lock (_lockObj)
+ {
+ if (_dictInst.ContainsKey(key))
+ {
+ return false;
+ }
+
+ _dictInst[key] = value;
+ return true;
+ }
+ }
+
+ public bool TryRemove(TKey key, out TValue value)
+ {
+ lock (_lockObj)
+ {
+ if (_dictInst.ContainsKey(key))
+ {
+ value = _dictInst[key];
+ _dictInst.Remove(key);
+ return true;
+ }
+
+ value = default(TValue);
+ return false;
+ }
+ }
+
+ public bool TryUpdate(TKey key, TValue newValue, TValue comparisonValue)
+ {
+ lock (_lockObj)
+ {
+ if (_dictInst.ContainsKey(key) && _dictInst[key].Equals(comparisonValue))
+ {
+ _dictInst[key] = newValue;
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ #endregion
+
+ #region IDictionary Methods
+
+ // .NET4 ConcurrentDictionary returns an enumerator that can enumerate even
+ // if the collection is modified. We can't do that, so create a copy (expensive)
+ public IEnumerator<KeyValuePair<TKey, TValue>> GetEnumerator()
+ {
+ lock (_lockObj)
+ {
+ return _dictInst.ToList().GetEnumerator();
+ }
+ }
+
+ public bool TryGetValue(TKey key, out TValue value)
+ {
+ lock (_lockObj)
+ {
+ return _dictInst.TryGetValue(key, out value);
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ public void Clear()
+ {
+ lock (_lockObj)
+ {
+ _dictInst.Clear();
+ }
+ }
+
+ public int Count
+ {
+ get
+ {
+ lock (_lockObj)
+ {
+ return _dictInst.Count;
+ }
+ }
+ }
+
+ public bool ContainsKey(TKey key)
+ {
+ lock (_lockObj)
+ {
+ return _dictInst.ContainsKey(key);
+ }
+ }
+
+ public TValue this[TKey key]
+ {
+ get
+ {
+ lock (_lockObj)
+ {
+ return _dictInst[key];
+ }
+ }
+ set
+ {
+ lock (_lockObj)
+ {
+ _dictInst[key] = value;
+ }
+ }
+ }
+
+ public ICollection<TKey> Keys
+ {
+ get { return _dictInst.Keys.ToArray(); }
+ }
+
+ public ICollection<TValue> Values
+ {
+ get { return _dictInst.Values.ToArray(); }
+ }
+
+ #endregion
+
+ #region Explicit Interface Definitions
+
+ bool ICollection<KeyValuePair<TKey, TValue>>.IsReadOnly
+ {
+ get { return ((ICollection<KeyValuePair<TKey, TValue>>) _dictInst).IsReadOnly; }
+ }
+
+ void IDictionary<TKey, TValue>.Add(TKey key, TValue value)
+ {
+ lock (_lockObj)
+ {
+ _dictInst.Add(key, value);
+ }
+ }
+
+ bool ICollection<KeyValuePair<TKey, TValue>>.Contains(KeyValuePair<TKey, TValue> item)
+ {
+ lock (_lockObj)
+ {
+ return _dictInst.Contains(item);
+ }
+ }
+
+ bool IDictionary<TKey, TValue>.Remove(TKey key)
+ {
+ lock (_lockObj)
+ {
+ return _dictInst.Remove(key);
+ }
+ }
+
+ void ICollection<KeyValuePair<TKey, TValue>>.Add(KeyValuePair<TKey, TValue> item)
+ {
+ lock (_lockObj)
+ {
+ ((ICollection<KeyValuePair<TKey, TValue>>)_dictInst).Add(item);
+ }
+ }
+
+ void ICollection<KeyValuePair<TKey, TValue>>.CopyTo(KeyValuePair<TKey, TValue>[] array, int arrayIndex)
+ {
+ lock (_lockObj)
+ {
+ ((ICollection<KeyValuePair<TKey, TValue>>)_dictInst).CopyTo(array, arrayIndex);
+ }
+ }
+
+ bool ICollection<KeyValuePair<TKey, TValue>>.Remove(KeyValuePair<TKey, TValue> item)
+ {
+ lock (_lockObj)
+ {
+ return ((ICollection<KeyValuePair<TKey, TValue>>)_dictInst).Remove(item);
+ }
+ }
+
+ #endregion
+ }
+}
+
+#endif \ No newline at end of file
diff --git a/src/core/Support/Compatibility/Func.cs b/src/core/Support/Compatibility/Func.cs
new file mode 100644
index 0000000..508c6a7
--- /dev/null
+++ b/src/core/Support/Compatibility/Func.cs
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace System
+{
+#if NET35
+ public delegate TResult Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, TResult>(T1 arg1, T2 arg2, T3 arg3, T4 arg4,
+ T5 arg5, T6 arg6, T7 arg7, T8 arg8,
+ T9 arg9);
+
+ public delegate TResult Func<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, TResult>(T1 arg1, T2 arg2, T3 arg3, T4 arg4,
+ T5 arg5, T6 arg6, T7 arg7, T8 arg8,
+ T9 arg9, T10 arg10);
+#endif
+}
diff --git a/src/core/Support/Compatibility/ISet.cs b/src/core/Support/Compatibility/ISet.cs
new file mode 100644
index 0000000..fc9fa79
--- /dev/null
+++ b/src/core/Support/Compatibility/ISet.cs
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if NET35
+
+namespace System.Collections.Generic
+{
+ public interface ISet<T> : ICollection<T>
+ {
+#region METHODS
+
+ new bool Add(T item);
+
+ void ExceptWith(IEnumerable<T> other);
+
+ void IntersectWith(IEnumerable<T> other);
+
+ bool IsProperSubsetOf(IEnumerable<T> other);
+
+ bool IsProperSupersetOf(IEnumerable<T> other);
+
+ bool IsSubsetOf(IEnumerable<T> other);
+
+ bool IsSupersetOf(IEnumerable<T> other);
+
+ bool Overlaps(IEnumerable<T> other);
+
+ bool SetEquals(IEnumerable<T> other);
+
+ void SymmetricExceptWith(IEnumerable<T> other);
+
+ void UnionWith(IEnumerable<T> other);
+
+ #endregion
+
+#region EXTENSION METHODS
+
+
+
+ #endregion
+ }
+
+}
+
+#endif \ No newline at end of file
diff --git a/src/core/Support/Compatibility/SetFactory.cs b/src/core/Support/Compatibility/SetFactory.cs
new file mode 100644
index 0000000..18b2520
--- /dev/null
+++ b/src/core/Support/Compatibility/SetFactory.cs
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Support.Compatibility
+{
+ public static class SetFactory
+ {
+ public static ISet<T> CreateHashSet<T>()
+ {
+#if NET35
+ return new WrappedHashSet<T>();
+#else
+ return new HashSet<T>();
+#endif
+ }
+
+ public static ISet<T> CreateHashSet<T>(IEnumerable<T> other)
+ {
+#if NET35
+ return new WrappedHashSet<T>(other);
+#else
+ return new HashSet<T>(other);
+#endif
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Support/Compatibility/SortedSet.cs b/src/core/Support/Compatibility/SortedSet.cs
new file mode 100644
index 0000000..0dad5e9
--- /dev/null
+++ b/src/core/Support/Compatibility/SortedSet.cs
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if NET35
+
+namespace System.Collections.Generic
+{
+ [Serializable]
+ public class SortedSet<T> : ISet<T>, ICollection
+ {
+ private readonly SortedList<T, byte> _list;
+
+ public SortedSet()
+ : this(Comparer<T>.Default)
+ { }
+
+ public SortedSet(IComparer<T> comparer)
+ {
+ _list = new SortedList<T, byte>(comparer);
+ }
+
+ public T Min { get { return (_list.Count) >= 1 ? _list.Keys[0] : default(T); } }
+
+ public T Max { get { return (_list.Count) >= 1 ? _list.Keys[_list.Count - 1] : default(T); } }
+
+
+ /// <summary>
+ /// Removes all items from the <see cref="T:System.Collections.Generic.ICollection`1"/>.
+ /// </summary>
+ /// <exception cref="T:System.NotSupportedException">The <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.
+ /// </exception>
+ public void Clear()
+ {
+ _list.Clear();
+ }
+
+ public void CopyTo(T[] array, int arrayIndex)
+ {
+ _list.Keys.CopyTo(array, arrayIndex);
+ }
+
+ public bool Remove(T item)
+ {
+ return _list.Remove(item);
+ }
+
+ public bool Contains(T value)
+ {
+ return _list.ContainsKey(value);
+ }
+
+ public bool Add(T item)
+ {
+ if (!_list.ContainsKey(item))
+ {
+ _list.Add(item, 0);
+ return true;
+ }
+ return false;
+ }
+
+ public void UnionWith(IEnumerable<T> other)
+ {
+ foreach (var obj in other)
+ Add(obj);
+ }
+
+ public IEnumerator<T> GetEnumerator()
+ {
+ return _list.Keys.GetEnumerator();
+ }
+
+ public IComparer<T> Comparer { get { return _list.Comparer; } }
+
+ public int Count
+ {
+ get { return _list.Count; }
+ }
+
+ #region Explicit Interface Implementations
+
+ void ICollection<T>.Add(T item)
+ {
+ Add(item);
+ }
+
+ void ICollection.CopyTo(Array array, int index)
+ {
+ CopyTo((T[]) array, index);
+ }
+
+ bool ICollection<T>.IsReadOnly
+ {
+ get { return false; }
+ }
+
+ bool ICollection.IsSynchronized
+ {
+ get { return false; }
+ }
+
+ object ICollection.SyncRoot
+ {
+ get { throw new NotSupportedException(); }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ int ICollection.Count
+ {
+ get { return Count; }
+ }
+
+ #endregion
+
+ #region ISet<T> Implementation
+
+ void ISet<T>.ExceptWith(IEnumerable<T> other)
+ {
+ foreach(var obj in other)
+ {
+ _list.Remove(obj);
+ }
+ }
+
+ void ISet<T>.IntersectWith(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ bool ISet<T>.IsProperSubsetOf(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ bool ISet<T>.IsProperSupersetOf(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ bool ISet<T>.IsSubsetOf(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ bool ISet<T>.IsSupersetOf(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ bool ISet<T>.Overlaps(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ bool ISet<T>.SetEquals(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ void ISet<T>.SymmetricExceptWith(IEnumerable<T> other)
+ {
+ throw new NotImplementedException();
+ }
+
+ #endregion
+ }
+}
+
+#endif \ No newline at end of file
diff --git a/src/core/Support/Compatibility/ThreadLocal.cs b/src/core/Support/Compatibility/ThreadLocal.cs
new file mode 100644
index 0000000..167228b
--- /dev/null
+++ b/src/core/Support/Compatibility/ThreadLocal.cs
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if NET35
+
+using System;
+
+namespace Lucene.Net.Support.Compatibility
+{
+ public class ThreadLocal<T> : IDisposable
+ {
+ [ThreadStatic]
+ static WeakDictionary<ThreadLocal<T>, T> slots;
+
+ static void Init()
+ {
+ if (slots == null) slots = new WeakDictionary<ThreadLocal<T>, T>();
+ }
+
+ public T Value
+ {
+ set
+ {
+ Init();
+ slots.Add(this, value);
+ }
+ get
+ {
+ Init();
+ return (T)slots[this];
+ }
+ }
+
+ public void Dispose()
+ {
+ if (slots != null) slots.Remove(this);
+ }
+ }
+}
+
+#endif \ No newline at end of file
diff --git a/src/core/Support/Compatibility/WrappedHashSet.cs b/src/core/Support/Compatibility/WrappedHashSet.cs
new file mode 100644
index 0000000..c2ffd21
--- /dev/null
+++ b/src/core/Support/Compatibility/WrappedHashSet.cs
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if NET35
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.Serialization;
+
+namespace Lucene.Net.Support.Compatibility
+{
+ [Serializable]
+ class WrappedHashSet<T> : HashSet<T>, ISet<T>
+ {
+ public WrappedHashSet()
+ { }
+
+ public WrappedHashSet(IEnumerable<T> items)
+ : base(items)
+ { }
+
+ protected WrappedHashSet(SerializationInfo info, StreamingContext context)
+ : base (info, context)
+ {
+
+ }
+ }
+}
+
+#endif \ No newline at end of file
diff --git a/src/core/Support/Cryptography.cs b/src/core/Support/Cryptography.cs
new file mode 100644
index 0000000..3b6e593
--- /dev/null
+++ b/src/core/Support/Cryptography.cs
@@ -0,0 +1,45 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Security.Cryptography;
+
+namespace Lucene.Net.Support
+{
+ public static class Cryptography
+ {
+ public static bool FIPSCompliant = false;
+
+ public static HashAlgorithm HashAlgorithm
+ {
+ get
+ {
+ if (FIPSCompliant)
+ {
+ //LUCENENET-175
+ //No Assumptions should be made on the HashAlgorithm. It may change in time.
+ //SHA256 SHA384 SHA512 etc.
+ return SHA1.Create();
+ }
+ return MD5.Create();
+ }
+ }
+}
+} \ No newline at end of file
diff --git a/src/core/Support/Deflater.cs b/src/core/Support/Deflater.cs
new file mode 100644
index 0000000..03473de
--- /dev/null
+++ b/src/core/Support/Deflater.cs
@@ -0,0 +1,97 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+
+namespace Lucene.Net.Support
+{
+ public class Deflater
+ {
+ delegate void SetLevelDelegate(int level);
+ delegate void SetInputDelegate(byte[] input, int offset, int count);
+ delegate void FinishDelegate();
+ delegate bool GetIsFinishedDelegate();
+ delegate int DeflateDelegate(byte[] output);
+
+ SetLevelDelegate setLevelMethod;
+ SetInputDelegate setInputMethod;
+ FinishDelegate finishMethod;
+ GetIsFinishedDelegate getIsFinishedMethod;
+ DeflateDelegate deflateMethod;
+
+ public const int BEST_COMPRESSION = 9;
+
+ internal Deflater(object deflaterInstance)
+ {
+ Type type = deflaterInstance.GetType();
+
+ setLevelMethod = (SetLevelDelegate)Delegate.CreateDelegate(
+ typeof(SetLevelDelegate),
+ deflaterInstance,
+ type.GetMethod("SetLevel", new Type[] { typeof(int) }));
+
+ setInputMethod = (SetInputDelegate)Delegate.CreateDelegate(
+ typeof(SetInputDelegate),
+ deflaterInstance,
+ type.GetMethod("SetInput", new Type[] { typeof(byte[]), typeof(int), typeof(int) }));
+
+ finishMethod = (FinishDelegate)Delegate.CreateDelegate(
+ typeof(FinishDelegate),
+ deflaterInstance,
+ type.GetMethod("Finish", Type.EmptyTypes));
+
+ getIsFinishedMethod = (GetIsFinishedDelegate)Delegate.CreateDelegate(
+ typeof(GetIsFinishedDelegate),
+ deflaterInstance,
+ type.GetMethod("get_IsFinished", Type.EmptyTypes));
+
+ deflateMethod = (DeflateDelegate)Delegate.CreateDelegate(
+ typeof(DeflateDelegate),
+ deflaterInstance,
+ type.GetMethod("Deflate", new Type[] { typeof(byte[]) }));
+ }
+
+ public void SetLevel(int level)
+ {
+ setLevelMethod(level);
+ }
+
+ public void SetInput(byte[] input, int offset, int count)
+ {
+ setInputMethod(input, offset, count);
+ }
+
+ public void Finish()
+ {
+ finishMethod();
+ }
+
+ public bool IsFinished
+ {
+ get { return getIsFinishedMethod(); }
+ }
+
+ public int Deflate(byte[] output)
+ {
+ return deflateMethod(output);
+ }
+ }
+}
diff --git a/src/core/Support/Double.cs b/src/core/Support/Double.cs
new file mode 100644
index 0000000..a8cff94
--- /dev/null
+++ b/src/core/Support/Double.cs
@@ -0,0 +1,44 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ ///
+ /// </summary>
+ public class Double
+ {
+ public static System.Double Parse(System.String s)
+ {
+ try
+ {
+ return System.Double.Parse(s.Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator));
+ }
+ catch (OverflowException)
+ {
+ return System.Double.MaxValue;
+ }
+ }
+ }
+}
diff --git a/src/core/Support/EquatableList.cs b/src/core/Support/EquatableList.cs
new file mode 100644
index 0000000..ab6fb47
--- /dev/null
+++ b/src/core/Support/EquatableList.cs
@@ -0,0 +1,339 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>Represents a strongly typed list of objects that can be accessed by index.
+ /// Provides methods to search, sort, and manipulate lists. Also provides functionality
+ /// to compare lists against each other through an implementations of
+ /// <see cref="IEquatable{T}"/>.</summary>
+ /// <typeparam name="T">The type of elements in the list.</typeparam>
+ [Serializable]
+ public class EquatableList<T> : System.Collections.Generic.List<T>,
+ IEquatable<System.Collections.Generic.IEnumerable<T>>,
+ ICloneable
+ {
+ /// <summary>Initializes a new instance of the
+ /// <see cref="EquatableList{T}"/> class that is empty and has the
+ /// default initial capacity.</summary>
+ public EquatableList() : base() { }
+
+ /// <summary>Initializes a new instance of the <see cref="EquatableList{T}"/>
+ /// class that contains elements copied from the specified collection and has
+ /// sufficient capacity to accommodate the number of elements copied.</summary>
+ /// <param name="collection">The collection whose elements are copied to the new list.</param>
+ public EquatableList(System.Collections.Generic.IEnumerable<T> collection) : base(collection) { }
+
+ /// <summary>Initializes a new instance of the <see cref="EquatableList{T}"/>
+ /// class that is empty and has the specified initial capacity.</summary>
+ /// <param name="capacity">The number of elements that the new list can initially store.</param>
+ public EquatableList(int capacity) : base(capacity) { }
+
+ /// <summary>Adds a range of objects represented by the <see cref="ICollection"/>
+ /// implementation.</summary>
+ /// <param name="c">The <see cref="ICollection"/>
+ /// implementation to add to this list.</param>
+ public void AddRange(ICollection c)
+ {
+ // If the collection is null, throw an exception.
+ if (c == null) throw new ArgumentNullException("c");
+
+ // Pre-compute capacity.
+ Capacity = Math.Max(c.Count + Count, Capacity);
+
+ // Cycle through the items and add.
+ foreach (T item in c)
+ {
+ // Add the item.
+ Add(item);
+ }
+ }
+
+ /// <summary>Compares the counts of two <see cref="System.Collections.Generic.IEnumerable{T}"/>
+ /// implementations.</summary>
+ /// <remarks>This uses a trick in LINQ, sniffing types for implementations
+ /// of interfaces that might supply shortcuts when trying to make comparisons.
+ /// In this case, that is the <see cref="System.Collections.Generic.ICollection{T}"/> and
+ /// <see cref="ICollection"/> interfaces, either of which can provide a count
+ /// which can be used in determining the equality of sequences (if they don't have
+ /// the same count, then they can't be equal).</remarks>
+ /// <param name="x">The <see cref="System.Collections.Generic.IEnumerable{T}"/> from the left hand side of the
+ /// comparison to check the count of.</param>
+ /// <param name="y">The <see cref="System.Collections.Generic.IEnumerable{T}"/> from the right hand side of the
+ /// comparison to check the count of.</param>
+ /// <returns>Null if the result is indeterminate. This occurs when either <paramref name="x"/>
+ /// or <paramref name="y"/> doesn't implement <see cref="ICollection"/> or <see cref="System.Collections.Generic.ICollection{T}"/>.
+ /// Otherwise, it will get the count from each and return true if they are equal, false otherwise.</returns>
+ private static bool? EnumerableCountsEqual(System.Collections.Generic.IEnumerable<T> x, System.Collections.Generic.IEnumerable<T> y)
+ {
+ // Get the ICollection<T> and ICollection interfaces.
+ System.Collections.Generic.ICollection<T> xOfTCollection = x as System.Collections.Generic.ICollection<T>;
+ System.Collections.Generic.ICollection<T> yOfTCollection = y as System.Collections.Generic.ICollection<T>;
+ ICollection xCollection = x as ICollection;
+ ICollection yCollection = y as ICollection;
+
+ // The count in x and y.
+ int? xCount = xOfTCollection != null ? xOfTCollection.Count : xCollection != null ? xCollection.Count : (int?)null;
+ int? yCount = yOfTCollection != null ? yOfTCollection.Count : yCollection != null ? yCollection.Count : (int?)null;
+
+ // If either are null, return null, the result is indeterminate.
+ if (xCount == null || yCount == null)
+ {
+ // Return null, indeterminate.
+ return null;
+ }
+
+ // Both counts are non-null, compare.
+ return xCount == yCount;
+ }
+
+ /// <summary>Compares the contents of a <see cref="System.Collections.Generic.IEnumerable{T}"/>
+ /// implementation to another one to determine equality.</summary>
+ /// <remarks>Thinking of the <see cref="System.Collections.Generic.IEnumerable{T}"/> implementation as
+ /// a string with any number of characters, the algorithm checks
+ /// each item in each list. If any item of the list is not equal (or
+ /// one list contains all the elements of another list), then that list
+ /// element is compared to the other list element to see which
+ /// list is greater.</remarks>
+ /// <param name="x">The <see cref="System.Collections.Generic.IEnumerable{T}"/> implementation
+ /// that is considered the left hand side.</param>
+ /// <param name="y">The <see cref="System.Collections.Generic.IEnumerable{T}"/> implementation
+ /// that is considered the right hand side.</param>
+ /// <returns>True if the items are equal, false otherwise.</returns>
+ private static bool Equals(System.Collections.Generic.IEnumerable<T> x,
+ System.Collections.Generic.IEnumerable<T> y)
+ {
+ // If x and y are null, then return true, they are the same.
+ if (x == null && y == null)
+ {
+ // They are the same, return 0.
+ return true;
+ }
+
+ // If one is null, then return a value based on whether or not
+ // one is null or not.
+ if (x == null || y == null)
+ {
+ // Return false, one is null, the other is not.
+ return false;
+ }
+
+ // Check to see if the counts on the IEnumerable implementations are equal.
+ // This is a shortcut, if they are not equal, then the lists are not equal.
+ // If the result is indeterminate, then get out.
+ bool? enumerableCountsEqual = EnumerableCountsEqual(x, y);
+
+ // If the enumerable counts have been able to be calculated (indicated by
+ // a non-null value) and it is false, then no need to iterate through the items.
+ if (enumerableCountsEqual != null && !enumerableCountsEqual.Value)
+ {
+ // The sequences are not equal.
+ return false;
+ }
+
+ // The counts of the items in the enumerations are equal, or indeterminate
+ // so a full iteration needs to be made to compare each item.
+ // Get the default comparer for T first.
+ System.Collections.Generic.EqualityComparer<T> defaultComparer =
+ EqualityComparer<T>.Default;
+
+ // Get the enumerator for y.
+ System.Collections.Generic.IEnumerator<T> otherEnumerator = y.GetEnumerator();
+
+ // Call Dispose on IDisposable if there is an implementation on the
+ // IEnumerator<T> returned by a call to y.GetEnumerator().
+ using (otherEnumerator as IDisposable)
+ {
+ // Cycle through the items in this list.
+ foreach (T item in x)
+ {
+ // If there isn't an item to get, then this has more
+ // items than that, they are not equal.
+ if (!otherEnumerator.MoveNext())
+ {
+ // Return false.
+ return false;
+ }
+
+ // Perform a comparison. Must check this on the left hand side
+ // and that on the right hand side.
+ bool comparison = defaultComparer.Equals(item, otherEnumerator.Current);
+
+ // If the value is false, return false.
+ if (!comparison)
+ {
+ // Return the value.
+ return comparison;
+ }
+ }
+
+ // If there are no more items, then return true, the sequences
+ // are equal.
+ if (!otherEnumerator.MoveNext())
+ {
+ // The sequences are equal.
+ return true;
+ }
+
+ // The other sequence has more items than this one, return
+ // false, these are not equal.
+ return false;
+ }
+ }
+
+ #region IEquatable<IEnumerable<T>> Members
+ /// <summary>Compares this sequence to another <see cref="System.Collections.Generic.IEnumerable{T}"/>
+ /// implementation, returning true if they are equal, false otherwise.</summary>
+ /// <param name="other">The other <see cref="System.Collections.Generic.IEnumerable{T}"/> implementation
+ /// to compare against.</param>
+ /// <returns>True if the sequence in <paramref name="other"/>
+ /// is the same as this one.</returns>
+ public bool Equals(System.Collections.Generic.IEnumerable<T> other)
+ {
+ // Compare to the other sequence. If 0, then equal.
+ return Equals(this, other);
+ }
+ #endregion
+
+ /// <summary>Compares this object for equality against other.</summary>
+ /// <param name="obj">The other object to compare this object against.</param>
+ /// <returns>True if this object and <paramref name="obj"/> are equal, false
+ /// otherwise.</returns>
+ public override bool Equals(object obj)
+ {
+ // Call the strongly typed version.
+ return Equals(obj as System.Collections.Generic.IEnumerable<T>);
+ }
+
+ /// <summary>Gets the hash code for the list.</summary>
+ /// <returns>The hash code value.</returns>
+ public override int GetHashCode()
+ {
+ // Call the static method, passing this.
+ return GetHashCode(this);
+ }
+
+#if __MonoCS__
+ public static int GetHashCode<T>(System.Collections.Generic.IEnumerable<T> source)
+#else
+ /// <summary>Gets the hash code for the list.</summary>
+ /// <param name="source">The <see cref="System.Collections.Generic.IEnumerable{T}"/>
+ /// implementation which will have all the contents hashed.</param>
+ /// <returns>The hash code value.</returns>
+ public static int GetHashCode(System.Collections.Generic.IEnumerable<T> source)
+#endif
+ {
+ // If source is null, then return 0.
+ if (source == null) return 0;
+
+ // Seed the hash code with the hash code of the type.
+ // This is done so that you don't have a lot of collisions of empty
+ // ComparableList instances when placed in dictionaries
+ // and things that rely on hashcodes.
+ int hashCode = typeof(T).GetHashCode();
+
+ // Iterate through the items in this implementation.
+ foreach (T item in source)
+ {
+ // Adjust the hash code.
+ hashCode = 31 * hashCode + (item == null ? 0 : item.GetHashCode());
+ }
+
+ // Return the hash code.
+ return hashCode;
+ }
+
+ // TODO: When diverging from Java version of Lucene, can uncomment these to adhere to best practices when overriding the Equals method and implementing IEquatable<T>.
+ ///// <summary>Overload of the == operator, it compares a
+ ///// <see cref="ComparableList{T}"/> to an <see cref="IEnumerable{T}"/>
+ ///// implementation.</summary>
+ ///// <param name="x">The <see cref="ComparableList{T}"/> to compare
+ ///// against <paramref name="y"/>.</param>
+ ///// <param name="y">The <see cref="IEnumerable{T}"/> to compare
+ ///// against <paramref name="x"/>.</param>
+ ///// <returns>True if the instances are equal, false otherwise.</returns>
+ //public static bool operator ==(EquatableList<T> x, System.Collections.Generic.IEnumerable<T> y)
+ //{
+ // // Call Equals.
+ // return Equals(x, y);
+ //}
+
+ ///// <summary>Overload of the == operator, it compares a
+ ///// <see cref="ComparableList{T}"/> to an <see cref="IEnumerable{T}"/>
+ ///// implementation.</summary>
+ ///// <param name="y">The <see cref="ComparableList{T}"/> to compare
+ ///// against <paramref name="x"/>.</param>
+ ///// <param name="x">The <see cref="IEnumerable{T}"/> to compare
+ ///// against <paramref name="y"/>.</param>
+ ///// <returns>True if the instances are equal, false otherwise.</returns>
+ //public static bool operator ==(System.Collections.Generic.IEnumerable<T> x, EquatableList<T> y)
+ //{
+ // // Call equals.
+ // return Equals(x, y);
+ //}
+
+ ///// <summary>Overload of the != operator, it compares a
+ ///// <see cref="ComparableList{T}"/> to an <see cref="IEnumerable{T}"/>
+ ///// implementation.</summary>
+ ///// <param name="x">The <see cref="ComparableList{T}"/> to compare
+ ///// against <paramref name="y"/>.</param>
+ ///// <param name="y">The <see cref="IEnumerable{T}"/> to compare
+ ///// against <paramref name="x"/>.</param>
+ ///// <returns>True if the instances are not equal, false otherwise.</returns>
+ //public static bool operator !=(EquatableList<T> x, System.Collections.Generic.IEnumerable<T> y)
+ //{
+ // // Return the negative of the equals operation.
+ // return !(x == y);
+ //}
+
+ ///// <summary>Overload of the != operator, it compares a
+ ///// <see cref="ComparableList{T}"/> to an <see cref="IEnumerable{T}"/>
+ ///// implementation.</summary>
+ ///// <param name="y">The <see cref="ComparableList{T}"/> to compare
+ ///// against <paramref name="x"/>.</param>
+ ///// <param name="x">The <see cref="IEnumerable{T}"/> to compare
+ ///// against <paramref name="y"/>.</param>
+ ///// <returns>True if the instances are not equal, false otherwise.</returns>
+ //public static bool operator !=(System.Collections.Generic.IEnumerable<T> x, EquatableList<T> y)
+ //{
+ // // Return the negative of the equals operation.
+ // return !(x == y);
+ //}
+
+ #region ICloneable Members
+
+ /// <summary>Clones the <see cref="EquatableList{T}"/>.</summary>
+ /// <remarks>This is a shallow clone.</remarks>
+ /// <returns>A new shallow clone of this
+ /// <see cref="EquatableList{T}"/>.</returns>
+ public object Clone()
+ {
+ // Just create a new one, passing this to the constructor.
+ return new EquatableList<T>(this);
+ }
+
+ #endregion
+ }
+}
diff --git a/src/core/Support/FileSupport.cs b/src/core/Support/FileSupport.cs
new file mode 100644
index 0000000..b6236c8
--- /dev/null
+++ b/src/core/Support/FileSupport.cs
@@ -0,0 +1,121 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Represents the methods to support some operations over files.
+ /// </summary>
+ public class FileSupport
+ {
+ /// <summary>
+ /// Returns an array of abstract pathnames representing the files and directories of the specified path.
+ /// </summary>
+ /// <param name="path">The abstract pathname to list it childs.</param>
+ /// <returns>An array of abstract pathnames childs of the path specified or null if the path is not a directory</returns>
+ public static System.IO.FileInfo[] GetFiles(System.IO.FileInfo path)
+ {
+ if ((path.Attributes & FileAttributes.Directory) > 0)
+ {
+ String[] fullpathnames = Directory.GetFileSystemEntries(path.FullName);
+ System.IO.FileInfo[] result = new System.IO.FileInfo[fullpathnames.Length];
+ for (int i = 0; i < result.Length; i++)
+ result[i] = new System.IO.FileInfo(fullpathnames[i]);
+ return result;
+ }
+ else
+ return null;
+ }
+
+ // TODO: This filesupport thing is silly. Same goes with _TestUtil's RMDir.
+ // If we're removing a directory
+ public static System.IO.FileInfo[] GetFiles(System.IO.DirectoryInfo path)
+ {
+ return GetFiles(new FileInfo(path.FullName));
+ }
+
+ /// <summary>
+ /// Returns a list of files in a give directory.
+ /// </summary>
+ /// <param name="fullName">The full path name to the directory.</param>
+ /// <param name="indexFileNameFilter"></param>
+ /// <returns>An array containing the files.</returns>
+ public static System.String[] GetLuceneIndexFiles(System.String fullName,
+ Index.IndexFileNameFilter indexFileNameFilter)
+ {
+ System.IO.DirectoryInfo dInfo = new System.IO.DirectoryInfo(fullName);
+ System.Collections.ArrayList list = new System.Collections.ArrayList();
+ foreach (System.IO.FileInfo fInfo in dInfo.GetFiles())
+ {
+ if (indexFileNameFilter.Accept(fInfo, fInfo.Name) == true)
+ {
+ list.Add(fInfo.Name);
+ }
+ }
+ System.String[] retFiles = new System.String[list.Count];
+ list.CopyTo(retFiles);
+ return retFiles;
+ }
+
+ // Disable the obsolete warning since we must use FileStream.Handle
+ // because Mono does not support FileSystem.SafeFileHandle at present.
+#pragma warning disable 618
+
+ /// <summary>
+ /// Flushes the specified file stream. Ensures that all buffered
+ /// data is actually written to the file system.
+ /// </summary>
+ /// <param name="fileStream">The file stream.</param>
+ public static void Sync(System.IO.FileStream fileStream)
+ {
+ if (fileStream == null)
+ throw new ArgumentNullException("fileStream");
+
+ fileStream.Flush();
+
+ //if (OS.IsWindows)
+ //{
+ // if (!FlushFileBuffers(fileStream.Handle))
+ // throw new System.IO.IOException();
+ //}
+ //else if (OS.IsUnix)
+ //{
+ // if (fsync(fileStream.Handle) != IntPtr.Zero)
+ // throw new System.IO.IOException();
+ //}
+ //else
+ //{
+ // throw new NotImplementedException();
+ //}
+ }
+
+#pragma warning restore 618
+
+ //[System.Runtime.InteropServices.DllImport("libc")]
+ //extern static IntPtr fsync(IntPtr fd);
+
+ //[System.Runtime.InteropServices.DllImport("kernel32.dll")]
+ //extern static bool FlushFileBuffers(IntPtr hFile);
+ }
+}
diff --git a/src/core/Support/GeneralKeyedCollection.cs b/src/core/Support/GeneralKeyedCollection.cs
new file mode 100644
index 0000000..1b2e29b
--- /dev/null
+++ b/src/core/Support/GeneralKeyedCollection.cs
@@ -0,0 +1,96 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Diagnostics;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>A collection of <typeparamref name="TItem"/> which can be
+ /// looked up by instances of <typeparamref name="TKey"/>.</summary>
+ /// <typeparam name="TItem">The type of the items contains in this
+ /// collection.</typeparam>
+ /// <typeparam name="TKey">The type of the keys that can be used to look
+ /// up the items.</typeparam>
+ internal class GeneralKeyedCollection<TKey, TItem> : System.Collections.ObjectModel.KeyedCollection<TKey, TItem>
+ {
+ /// <summary>Creates a new instance of the
+ /// <see cref="GeneralKeyedCollection{TKey, TItem}"/> class.</summary>
+ /// <param name="converter">The <see cref="Converter{TInput, TOutput}"/> which will convert
+ /// instances of <typeparamref name="TItem"/> to <typeparamref name="TKey"/>
+ /// when the override of <see cref="GetKeyForItem(TItem)"/> is called.</param>
+ internal GeneralKeyedCollection(Converter<TItem, TKey> converter)
+ : base()
+ {
+ // If the converter is null, throw an exception.
+ if (converter == null) throw new ArgumentNullException("converter");
+
+ // Store the converter.
+ this.converter = converter;
+
+ // That's all folks.
+ return;
+ }
+
+ /// <summary>The <see cref="Converter{TInput, TOutput}"/> which will convert
+ /// instances of <typeparamref name="TItem"/> to <typeparamref name="TKey"/>
+ /// when the override of <see cref="GetKeyForItem(TItem)"/> is called.</summary>
+ private readonly Converter<TItem, TKey> converter;
+
+ /// <summary>Converts an item that is added to the collection to
+ /// a key.</summary>
+ /// <param name="item">The instance of <typeparamref name="TItem"/>
+ /// to convert into an instance of <typeparamref name="TKey"/>.</param>
+ /// <returns>The instance of <typeparamref name="TKey"/> which is the
+ /// key for this item.</returns>
+ protected override TKey GetKeyForItem(TItem item)
+ {
+ // The converter is not null.
+ Debug.Assert(converter != null);
+
+ // Call the converter.
+ return converter(item);
+ }
+
+ /// <summary>Determines if a key for an item exists in this
+ /// collection.</summary>
+ /// <param name="key">The instance of <typeparamref name="TKey"/>
+ /// to see if it exists in this collection.</param>
+ /// <returns>True if the key exists in the collection, false otherwise.</returns>
+ public bool ContainsKey(TKey key)
+ {
+ // Call the dictionary - it is lazily created when the first item is added
+ if (Dictionary != null)
+ {
+ return Dictionary.ContainsKey(key);
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ public System.Collections.Generic.IList<TItem> Values()
+ {
+ return base.Items;
+ }
+ }
+}
diff --git a/src/core/Support/HashMap.cs b/src/core/Support/HashMap.cs
new file mode 100644
index 0000000..04e09c2
--- /dev/null
+++ b/src/core/Support/HashMap.cs
@@ -0,0 +1,449 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// A C# emulation of the <a href="http://download.oracle.com/javase/1,5.0/docs/api/java/util/HashMap.html">Java Hashmap</a>
+ /// <para>
+ /// A <see cref="Dictionary{TKey, TValue}" /> is a close equivalent to the Java
+ /// Hashmap. One difference java implementation of the class is that
+ /// the Hashmap supports both null keys and values, where the C# Dictionary
+ /// only supports null values not keys. Also, <c>V Get(TKey)</c>
+ /// method in Java returns null if the key doesn't exist, instead of throwing
+ /// an exception. This implementation doesn't throw an exception when a key
+ /// doesn't exist, it will return null. This class is slower than using a
+ /// <see cref="Dictionary{TKey, TValue}"/>, because of extra checks that have to be
+ /// done on each access, to check for null.
+ /// </para>
+ /// <para>
+ /// <b>NOTE:</b> This class works best with nullable types. default(T) is returned
+ /// when a key doesn't exist in the collection (this being similar to how Java returns
+ /// null). Therefore, if the expected behavior of the java code is to execute code
+ /// based on if the key exists, when the key is an integer type, it will return 0 instead of null.
+ /// </para>
+ /// <remaks>
+ /// Consider also implementing IDictionary, IEnumerable, and ICollection
+ /// like <see cref="Dictionary{TKey, TValue}" /> does, so HashMap can be
+ /// used in substituted in place for the same interfaces it implements.
+ /// </remaks>
+ /// </summary>
+ /// <typeparam name="TKey">The type of keys in the dictionary</typeparam>
+ /// <typeparam name="TValue">The type of values in the dictionary</typeparam>
+ [Serializable]
+ public class HashMap<TKey, TValue> : IDictionary<TKey, TValue>
+ {
+ internal IEqualityComparer<TKey> _comparer;
+ internal Dictionary<TKey, TValue> _dict;
+
+ // Indicates if a null key has been assigned, used for iteration
+ private bool _hasNullValue;
+ // stores the value for the null key
+ private TValue _nullValue;
+ // Indicates the type of key is a non-nullable valuetype
+ private bool _isValueType;
+
+ public HashMap()
+ : this(0)
+ { }
+
+ public HashMap(IEqualityComparer<TKey> comparer)
+ : this(0, comparer)
+ {
+
+ }
+
+ public HashMap(int initialCapacity)
+ : this(initialCapacity, EqualityComparer<TKey>.Default)
+ {
+
+ }
+
+ public HashMap(int initialCapacity, IEqualityComparer<TKey> comparer)
+ {
+ _comparer = comparer;
+ _dict = new Dictionary<TKey, TValue>(initialCapacity, _comparer);
+ _hasNullValue = false;
+
+ if (typeof(TKey).IsValueType)
+ {
+ _isValueType = Nullable.GetUnderlyingType(typeof(TKey)) == null;
+ }
+ }
+
+ public HashMap(IEnumerable<KeyValuePair<TKey, TValue>> other)
+ : this(0)
+ {
+ foreach (var kvp in other)
+ {
+ Add(kvp.Key, kvp.Value);
+ }
+ }
+
+ public bool ContainsValue(TValue value)
+ {
+ if (!_isValueType && _hasNullValue && _nullValue.Equals(value))
+ return true;
+
+ return _dict.ContainsValue(value);
+ }
+
+ #region Implementation of IEnumerable
+
+ public IEnumerator<KeyValuePair<TKey, TValue>> GetEnumerator()
+ {
+ if (!_isValueType && _hasNullValue)
+ {
+ yield return new KeyValuePair<TKey, TValue>(default(TKey), _nullValue);
+ }
+ foreach (var kvp in _dict)
+ {
+ yield return kvp;
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ #endregion
+
+ #region Implementation of ICollection<KeyValuePair<TKey,TValue>>
+
+ void ICollection<KeyValuePair<TKey, TValue>>.Add(KeyValuePair<TKey, TValue> item)
+ {
+ Add(item.Key, item.Value);
+ }
+
+ public void Clear()
+ {
+ _hasNullValue = false;
+ _nullValue = default(TValue);
+ _dict.Clear();
+ }
+
+ bool ICollection<KeyValuePair<TKey, TValue>>.Contains(KeyValuePair<TKey, TValue> item)
+ {
+ if (!_isValueType && _comparer.Equals(item.Key, default(TKey)))
+ {
+ return _hasNullValue && EqualityComparer<TValue>.Default.Equals(item.Value, _nullValue);
+ }
+
+ return ((ICollection<KeyValuePair<TKey, TValue>>)_dict).Contains(item);
+ }
+
+ void ICollection<KeyValuePair<TKey, TValue>>.CopyTo(KeyValuePair<TKey, TValue>[] array, int arrayIndex)
+ {
+ ((ICollection<KeyValuePair<TKey, TValue>>) _dict).CopyTo(array, arrayIndex);
+ if(!_isValueType && _hasNullValue)
+ {
+ array[array.Length - 1] = new KeyValuePair<TKey, TValue>(default(TKey), _nullValue);
+ }
+ }
+
+ public bool Remove(KeyValuePair<TKey, TValue> item)
+ {
+ if (!_isValueType && _comparer.Equals(item.Key, default(TKey)))
+ {
+ if (!_hasNullValue)
+ return false;
+
+ _hasNullValue = false;
+ _nullValue = default(TValue);
+ return true;
+ }
+
+ return ((ICollection<KeyValuePair<TKey, TValue>>)_dict).Remove(item);
+ }
+
+ public int Count
+ {
+ get { return _dict.Count + (_hasNullValue ? 1 : 0); }
+ }
+
+ public bool IsReadOnly
+ {
+ get { return false; }
+ }
+
+ #endregion
+
+ #region Implementation of IDictionary<TKey,TValue>
+
+ public bool ContainsKey(TKey key)
+ {
+ if (!_isValueType && _comparer.Equals(key, default(TKey)))
+ {
+ if (_hasNullValue)
+ {
+ return true;
+ }
+ return false;
+ }
+
+ return _dict.ContainsKey(key);
+ }
+
+ public virtual void Add(TKey key, TValue value)
+ {
+ if (!_isValueType && _comparer.Equals(key, default(TKey)))
+ {
+ _hasNullValue = true;
+ _nullValue = value;
+ }
+ else
+ {
+ _dict[key] = value;
+ }
+ }
+
+ public bool Remove(TKey key)
+ {
+ if (!_isValueType && _comparer.Equals(key, default(TKey)))
+ {
+ _hasNullValue = false;
+ _nullValue = default(TValue);
+ return true;
+ }
+ else
+ {
+ return _dict.Remove(key);
+ }
+ }
+
+ public bool TryGetValue(TKey key, out TValue value)
+ {
+ if (!_isValueType && _comparer.Equals(key, default(TKey)))
+ {
+ if (_hasNullValue)
+ {
+ value = _nullValue;
+ return true;
+ }
+
+ value = default(TValue);
+ return false;
+ }
+ else
+ {
+ return _dict.TryGetValue(key, out value);
+ }
+ }
+
+ public TValue this[TKey key]
+ {
+ get
+ {
+ if (!_isValueType && _comparer.Equals(key, default(TKey)))
+ {
+ if (!_hasNullValue)
+ {
+ return default(TValue);
+ }
+ return _nullValue;
+ }
+ return _dict.ContainsKey(key) ? _dict[key] : default(TValue);
+ }
+ set { Add(key, value); }
+ }
+
+ public ICollection<TKey> Keys
+ {
+ get
+ {
+ if (!_hasNullValue) return _dict.Keys;
+
+ // Using a List<T> to generate an ICollection<TKey>
+ // would incur a costly copy of the dict's KeyCollection
+ // use out own wrapper instead
+ return new NullKeyCollection(_dict);
+ }
+ }
+
+ public ICollection<TValue> Values
+ {
+ get
+ {
+ if (!_hasNullValue) return _dict.Values;
+
+ // Using a List<T> to generate an ICollection<TValue>
+ // would incur a costly copy of the dict's ValueCollection
+ // use out own wrapper instead
+ return new NullValueCollection(_dict, _nullValue);
+ }
+ }
+
+ #endregion
+
+ #region NullValueCollection
+
+ /// <summary>
+ /// Wraps a dictionary and adds the value
+ /// represented by the null key
+ /// </summary>
+ class NullValueCollection : ICollection<TValue>
+ {
+ private readonly TValue _nullValue;
+ private readonly Dictionary<TKey, TValue> _internalDict;
+
+ public NullValueCollection(Dictionary<TKey, TValue> dict, TValue nullValue)
+ {
+ _internalDict = dict;
+ _nullValue = nullValue;
+ }
+
+ #region Implementation of IEnumerable
+
+ public IEnumerator<TValue> GetEnumerator()
+ {
+ yield return _nullValue;
+
+ foreach (var val in _internalDict.Values)
+ {
+ yield return val;
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ #endregion
+
+ #region Implementation of ICollection<TValue>
+
+ public void CopyTo(TValue[] array, int arrayIndex)
+ {
+ throw new NotImplementedException("Implement as needed");
+ }
+
+ public int Count
+ {
+ get { return _internalDict.Count + 1; }
+ }
+
+ public bool IsReadOnly
+ {
+ get { return true; }
+ }
+
+ #region Explicit Interface Methods
+
+ void ICollection<TValue>.Add(TValue item)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection<TValue>.Clear()
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ICollection<TValue>.Contains(TValue item)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ICollection<TValue>.Remove(TValue item)
+ {
+ throw new NotSupportedException("Collection is read only!");
+ }
+ #endregion
+
+ #endregion
+ }
+
+ #endregion
+
+ #region NullKeyCollection
+ /// <summary>
+ /// Wraps a dictionary's collection, adding in a
+ /// null key.
+ /// </summary>
+ class NullKeyCollection : ICollection<TKey>
+ {
+ private readonly Dictionary<TKey, TValue> _internalDict;
+
+ public NullKeyCollection(Dictionary<TKey, TValue> dict)
+ {
+ _internalDict = dict;
+ }
+
+ public IEnumerator<TKey> GetEnumerator()
+ {
+ yield return default(TKey);
+ foreach (var key in _internalDict.Keys)
+ {
+ yield return key;
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ public void CopyTo(TKey[] array, int arrayIndex)
+ {
+ throw new NotImplementedException("Implement this as needed");
+ }
+
+ public int Count
+ {
+ get { return _internalDict.Count + 1; }
+ }
+
+ public bool IsReadOnly
+ {
+ get { return true; }
+ }
+
+ #region Explicit Interface Definitions
+ bool ICollection<TKey>.Contains(TKey item)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection<TKey>.Add(TKey item)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection<TKey>.Clear()
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ICollection<TKey>.Remove(TKey item)
+ {
+ throw new NotSupportedException();
+ }
+ #endregion
+ }
+ #endregion
+ }
+}
diff --git a/src/core/Support/IChecksum.cs b/src/core/Support/IChecksum.cs
new file mode 100644
index 0000000..1b2d393
--- /dev/null
+++ b/src/core/Support/IChecksum.cs
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+using System;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Contains conversion support elements such as classes, interfaces and static methods.
+ /// </summary>
+ public interface IChecksum
+ {
+ void Reset();
+ void Update(int b);
+ void Update(byte[] b);
+ void Update(byte[] b, int offset, int length);
+ long Value { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Support/IThreadRunnable.cs b/src/core/Support/IThreadRunnable.cs
new file mode 100644
index 0000000..309979a
--- /dev/null
+++ b/src/core/Support/IThreadRunnable.cs
@@ -0,0 +1,36 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// This interface should be implemented by any class whose instances are intended
+ /// to be executed by a thread.
+ /// </summary>
+ public interface IThreadRunnable
+ {
+ /// <summary>
+ /// This method has to be implemented in order that starting of the thread causes the object's
+ /// run method to be called in that separately executing thread.
+ /// </summary>
+ void Run();
+ }
+}
diff --git a/src/core/Support/Inflater.cs b/src/core/Support/Inflater.cs
new file mode 100644
index 0000000..a67add0
--- /dev/null
+++ b/src/core/Support/Inflater.cs
@@ -0,0 +1,71 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+
+namespace Lucene.Net.Support
+{
+ public class Inflater
+ {
+ delegate void SetInputDelegate(byte[] buffer);
+ delegate bool GetIsFinishedDelegate();
+ delegate int InflateDelegate(byte[] buffer);
+
+ SetInputDelegate setInputMethod;
+ GetIsFinishedDelegate getIsFinishedMethod;
+ InflateDelegate inflateMethod;
+
+ internal Inflater(object inflaterInstance)
+ {
+ Type type = inflaterInstance.GetType();
+
+ setInputMethod = (SetInputDelegate)Delegate.CreateDelegate(
+ typeof(SetInputDelegate),
+ inflaterInstance,
+ type.GetMethod("SetInput", new Type[] { typeof(byte[]) }));
+
+ getIsFinishedMethod = (GetIsFinishedDelegate)Delegate.CreateDelegate(
+ typeof(GetIsFinishedDelegate),
+ inflaterInstance,
+ type.GetMethod("get_IsFinished", Type.EmptyTypes));
+
+ inflateMethod = (InflateDelegate)Delegate.CreateDelegate(
+ typeof(InflateDelegate),
+ inflaterInstance,
+ type.GetMethod("Inflate", new Type[] { typeof(byte[]) }));
+ }
+
+ public void SetInput(byte[] buffer)
+ {
+ setInputMethod(buffer);
+ }
+
+ public bool IsFinished
+ {
+ get { return getIsFinishedMethod(); }
+ }
+
+ public int Inflate(byte[] buffer)
+ {
+ return inflateMethod(buffer);
+ }
+ }
+}
diff --git a/src/core/Support/Number.cs b/src/core/Support/Number.cs
new file mode 100644
index 0000000..70f35a9
--- /dev/null
+++ b/src/core/Support/Number.cs
@@ -0,0 +1,252 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// A simple class for number conversions.
+ /// </summary>
+ public class Number
+ {
+ /// <summary>
+ /// Min radix value.
+ /// </summary>
+ public const int MIN_RADIX = 2;
+ /// <summary>
+ /// Max radix value.
+ /// </summary>
+ public const int MAX_RADIX = 36;
+
+ private const System.String digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+
+
+ /// <summary>
+ /// Converts a number to System.String.
+ /// </summary>
+ /// <param name="number"></param>
+ /// <returns></returns>
+ public static System.String ToString(long number)
+ {
+ System.Text.StringBuilder s = new System.Text.StringBuilder();
+
+ if (number == 0)
+ {
+ s.Append("0");
+ }
+ else
+ {
+ if (number < 0)
+ {
+ s.Append("-");
+ number = -number;
+ }
+
+ while (number > 0)
+ {
+ char c = digits[(int)number % 36];
+ s.Insert(0, c);
+ number = number / 36;
+ }
+ }
+
+ return s.ToString();
+ }
+
+
+ /// <summary>
+ /// Converts a number to System.String.
+ /// </summary>
+ /// <param name="f"></param>
+ /// <returns></returns>
+ public static System.String ToString(float f)
+ {
+ if (((float)(int)f) == f)
+ {
+ return ((int)f).ToString() + ".0";
+ }
+ else
+ {
+ return f.ToString(NumberFormatInfo.InvariantInfo);
+ }
+ }
+
+ /// <summary>
+ /// Converts a number to System.String in the specified radix.
+ /// </summary>
+ /// <param name="i">A number to be converted.</param>
+ /// <param name="radix">A radix.</param>
+ /// <returns>A System.String representation of the number in the specified redix.</returns>
+ public static System.String ToString(long i, int radix)
+ {
+ if (radix < MIN_RADIX || radix > MAX_RADIX)
+ radix = 10;
+
+ char[] buf = new char[65];
+ int charPos = 64;
+ bool negative = (i < 0);
+
+ if (!negative)
+ {
+ i = -i;
+ }
+
+ while (i <= -radix)
+ {
+ buf[charPos--] = digits[(int)(-(i % radix))];
+ i = i / radix;
+ }
+ buf[charPos] = digits[(int)(-i)];
+
+ if (negative)
+ {
+ buf[--charPos] = '-';
+ }
+
+ return new System.String(buf, charPos, (65 - charPos));
+ }
+
+ /// <summary>
+ /// Parses a number in the specified radix.
+ /// </summary>
+ /// <param name="s">An input System.String.</param>
+ /// <param name="radix">A radix.</param>
+ /// <returns>The parsed number in the specified radix.</returns>
+ public static long Parse(System.String s, int radix)
+ {
+ if (s == null)
+ {
+ throw new ArgumentException("null");
+ }
+
+ if (radix < MIN_RADIX)
+ {
+ throw new NotSupportedException("radix " + radix +
+ " less than Number.MIN_RADIX");
+ }
+ if (radix > MAX_RADIX)
+ {
+ throw new NotSupportedException("radix " + radix +
+ " greater than Number.MAX_RADIX");
+ }
+
+ long result = 0;
+ long mult = 1;
+
+ s = s.ToLower();
+
+ for (int i = s.Length - 1; i >= 0; i--)
+ {
+ int weight = digits.IndexOf(s[i]);
+ if (weight == -1)
+ throw new FormatException("Invalid number for the specified radix");
+
+ result += (weight * mult);
+ mult *= radix;
+ }
+
+ return result;
+ }
+
+ /// <summary>
+ /// Performs an unsigned bitwise right shift with the specified number
+ /// </summary>
+ /// <param name="number">Number to operate on</param>
+ /// <param name="bits">Ammount of bits to shift</param>
+ /// <returns>The resulting number from the shift operation</returns>
+ public static int URShift(int number, int bits)
+ {
+ return (int)(((uint)number) >> bits);
+ }
+
+
+ /// <summary>
+ /// Performs an unsigned bitwise right shift with the specified number
+ /// </summary>
+ /// <param name="number">Number to operate on</param>
+ /// <param name="bits">Ammount of bits to shift</param>
+ /// <returns>The resulting number from the shift operation</returns>
+ public static long URShift(long number, int bits)
+ {
+ return (long)(((ulong)number) >> bits);
+ }
+
+
+ /// <summary>
+ /// Returns the index of the first bit that is set to true that occurs
+ /// on or after the specified starting index. If no such bit exists
+ /// then -1 is returned.
+ /// </summary>
+ /// <param name="bits">The BitArray object.</param>
+ /// <param name="fromIndex">The index to start checking from (inclusive).</param>
+ /// <returns>The index of the next set bit.</returns>
+ public static int NextSetBit(System.Collections.BitArray bits, int fromIndex)
+ {
+ for (int i = fromIndex; i < bits.Length; i++)
+ {
+ if (bits[i] == true)
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /// <summary>
+ /// Converts a System.String number to long.
+ /// </summary>
+ /// <param name="s"></param>
+ /// <returns></returns>
+ public static long ToInt64(System.String s)
+ {
+ long number = 0;
+ long factor;
+
+ // handle negative number
+ if (s.StartsWith("-"))
+ {
+ s = s.Substring(1);
+ factor = -1;
+ }
+ else
+ {
+ factor = 1;
+ }
+
+ // generate number
+ for (int i = s.Length - 1; i > -1; i--)
+ {
+ int n = digits.IndexOf(s[i]);
+
+ // not supporting fractional or scientific notations
+ if (n < 0)
+ throw new System.ArgumentException("Invalid or unsupported character in number: " + s[i]);
+
+ number += (n * factor);
+ factor *= 36;
+ }
+
+ return number;
+ }
+ }
+}
diff --git a/src/core/Support/OS.cs b/src/core/Support/OS.cs
new file mode 100644
index 0000000..7f80abf
--- /dev/null
+++ b/src/core/Support/OS.cs
@@ -0,0 +1,62 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Provides platform infos.
+ /// </summary>
+ public class OS
+ {
+ static bool isUnix;
+ static bool isWindows;
+
+ static OS()
+ {
+ PlatformID pid = Environment.OSVersion.Platform;
+ isWindows = pid == PlatformID.Win32NT || pid == PlatformID.Win32Windows;
+
+ // we use integers instead of enum tags because "MacOS"
+ // requires 2.0 SP2, 3.0 SP2 or 3.5 SP1.
+ // 128 is mono's old platform tag for Unix.
+ int id = (int)pid;
+ isUnix = id == 4 || id == 6 || id == 128;
+ }
+
+ /// <summary>
+ /// Whether we run under a Unix platform.
+ /// </summary>
+ public static bool IsUnix
+ {
+ get { return isUnix; }
+ }
+
+ /// <summary>
+ /// Whether we run under a supported Windows platform.
+ /// </summary>
+ public static bool IsWindows
+ {
+ get { return isWindows; }
+ }
+ }
+}
diff --git a/src/core/Support/SharpZipLib.cs b/src/core/Support/SharpZipLib.cs
new file mode 100644
index 0000000..c788277
--- /dev/null
+++ b/src/core/Support/SharpZipLib.cs
@@ -0,0 +1,51 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Reflection;
+
+namespace Lucene.Net.Support
+{
+ public class SharpZipLib
+ {
+ static System.Reflection.Assembly asm = null;
+
+ static SharpZipLib()
+ {
+ try
+ {
+ asm = Assembly.Load("ICSharpCode.SharpZipLib");
+ }
+ catch { }
+ }
+
+ public static Deflater CreateDeflater()
+ {
+ if (asm == null) throw new System.IO.FileNotFoundException("Can not load ICSharpCode.SharpZipLib.dll");
+ return new Deflater(asm.CreateInstance("ICSharpCode.SharpZipLib.Zip.Compression.Deflater"));
+ }
+
+ public static Inflater CreateInflater()
+ {
+ if (asm == null) throw new System.IO.FileNotFoundException("Can not load ICSharpCode.SharpZipLib.dll");
+ return new Inflater(asm.CreateInstance("ICSharpCode.SharpZipLib.Zip.Compression.Inflater"));
+ }
+ }
+}
diff --git a/src/core/Support/Single.cs b/src/core/Support/Single.cs
new file mode 100644
index 0000000..12fa500
--- /dev/null
+++ b/src/core/Support/Single.cs
@@ -0,0 +1,131 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ ///
+ /// </summary>
+ public class Single
+ {
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="s"></param>
+ /// <param name="style"></param>
+ /// <param name="provider"></param>
+ /// <returns></returns>
+ public static System.Single Parse(System.String s, System.Globalization.NumberStyles style, System.IFormatProvider provider)
+ {
+ if (s.EndsWith("f") || s.EndsWith("F"))
+ return System.Single.Parse(s.Substring(0, s.Length - 1), style, provider);
+ else
+ return System.Single.Parse(s, style, provider);
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="s"></param>
+ /// <param name="provider"></param>
+ /// <returns></returns>
+ public static System.Single Parse(System.String s, System.IFormatProvider provider)
+ {
+ if (s.EndsWith("f") || s.EndsWith("F"))
+ return System.Single.Parse(s.Substring(0, s.Length - 1), provider);
+ else
+ return System.Single.Parse(s, provider);
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="s"></param>
+ /// <param name="style"></param>
+ /// <returns></returns>
+ public static System.Single Parse(System.String s, System.Globalization.NumberStyles style)
+ {
+ if (s.EndsWith("f") || s.EndsWith("F"))
+ return System.Single.Parse(s.Substring(0, s.Length - 1), style);
+ else
+ return System.Single.Parse(s, style);
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="s"></param>
+ /// <returns></returns>
+ public static System.Single Parse(System.String s)
+ {
+ if (s.EndsWith("f") || s.EndsWith("F"))
+ return System.Single.Parse(s.Substring(0, s.Length - 1).Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator));
+ else
+ return System.Single.Parse(s.Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator));
+ }
+
+ public static bool TryParse(System.String s, out float f)
+ {
+ bool ok = false;
+
+ if (s.EndsWith("f") || s.EndsWith("F"))
+ ok = System.Single.TryParse(s.Substring(0, s.Length - 1).Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator), out f);
+ else
+ ok = System.Single.TryParse(s.Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator), out f);
+
+ return ok;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="f"></param>
+ /// <returns></returns>
+ public static string ToString(float f)
+ {
+ return f.ToString().Replace(CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, ".");
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="f"></param>
+ /// <param name="format"></param>
+ /// <returns></returns>
+ public static string ToString(float f, string format)
+ {
+ return f.ToString(format).Replace(CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, ".");
+ }
+
+ public static int FloatToIntBits(float value)
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(value), 0);
+ }
+
+ public static float IntBitsToFloat(int value)
+ {
+ return BitConverter.ToSingle(BitConverter.GetBytes(value), 0);
+ }
+ }
+}
diff --git a/src/core/Support/TextSupport.cs b/src/core/Support/TextSupport.cs
new file mode 100644
index 0000000..de4da46
--- /dev/null
+++ b/src/core/Support/TextSupport.cs
@@ -0,0 +1,49 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+namespace Lucene.Net.Support
+{
+ public class TextSupport
+ {
+ /// <summary>
+ /// Copies an array of chars obtained from a String into a specified array of chars
+ /// </summary>
+ /// <param name="sourceString">The String to get the chars from</param>
+ /// <param name="sourceStart">Position of the String to start getting the chars</param>
+ /// <param name="sourceEnd">Position of the String to end getting the chars</param>
+ /// <param name="destinationArray">Array to return the chars</param>
+ /// <param name="destinationStart">Position of the destination array of chars to start storing the chars</param>
+ /// <returns>An array of chars</returns>
+ public static void GetCharsFromString(string sourceString, int sourceStart, int sourceEnd, char[] destinationArray, int destinationStart)
+ {
+ int sourceCounter;
+ int destinationCounter;
+ sourceCounter = sourceStart;
+ destinationCounter = destinationStart;
+ while (sourceCounter < sourceEnd)
+ {
+ destinationArray[destinationCounter] = (char)sourceString[sourceCounter];
+ sourceCounter++;
+ destinationCounter++;
+ }
+ }
+ }
+}
diff --git a/src/core/Support/ThreadClass.cs b/src/core/Support/ThreadClass.cs
new file mode 100644
index 0000000..6657424
--- /dev/null
+++ b/src/core/Support/ThreadClass.cs
@@ -0,0 +1,315 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Threading;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Support class used to handle threads
+ /// </summary>
+ public class ThreadClass : IThreadRunnable
+ {
+ /// <summary>
+ /// The instance of System.Threading.Thread
+ /// </summary>
+ private System.Threading.Thread threadField;
+
+
+ /// <summary>
+ /// Initializes a new instance of the ThreadClass class
+ /// </summary>
+ public ThreadClass()
+ {
+ threadField = new System.Threading.Thread(new System.Threading.ThreadStart(Run));
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the Thread class.
+ /// </summary>
+ /// <param name="Name">The name of the thread</param>
+ public ThreadClass(System.String Name)
+ {
+ threadField = new System.Threading.Thread(new System.Threading.ThreadStart(Run));
+ this.Name = Name;
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the Thread class.
+ /// </summary>
+ /// <param name="Start">A ThreadStart delegate that references the methods to be invoked when this thread begins executing</param>
+ public ThreadClass(System.Threading.ThreadStart Start)
+ {
+ threadField = new System.Threading.Thread(Start);
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the Thread class.
+ /// </summary>
+ /// <param name="Start">A ThreadStart delegate that references the methods to be invoked when this thread begins executing</param>
+ /// <param name="Name">The name of the thread</param>
+ public ThreadClass(System.Threading.ThreadStart Start, System.String Name)
+ {
+ threadField = new System.Threading.Thread(Start);
+ this.Name = Name;
+ }
+
+ /// <summary>
+ /// This method has no functionality unless the method is overridden
+ /// </summary>
+ public virtual void Run()
+ {
+ }
+
+ /// <summary>
+ /// Causes the operating system to change the state of the current thread instance to ThreadState.Running
+ /// </summary>
+ public virtual void Start()
+ {
+ threadField.Start();
+ }
+
+ /// <summary>
+ /// Interrupts a thread that is in the WaitSleepJoin thread state
+ /// </summary>
+ public virtual void Interrupt()
+ {
+ threadField.Interrupt();
+ }
+
+ /// <summary>
+ /// Gets the current thread instance
+ /// </summary>
+ public System.Threading.Thread Instance
+ {
+ get
+ {
+ return threadField;
+ }
+ set
+ {
+ threadField = value;
+ }
+ }
+
+ /// <summary>
+ /// Gets or sets the name of the thread
+ /// </summary>
+ public System.String Name
+ {
+ get
+ {
+ return threadField.Name;
+ }
+ set
+ {
+ if (threadField.Name == null)
+ threadField.Name = value;
+ }
+ }
+
+ public void SetDaemon(bool isDaemon)
+ {
+ threadField.IsBackground = isDaemon;
+ }
+
+ /// <summary>
+ /// Gets or sets a value indicating the scheduling priority of a thread
+ /// </summary>
+ public System.Threading.ThreadPriority Priority
+ {
+ get
+ {
+ try
+ {
+ return threadField.Priority;
+ }
+ catch
+ {
+ return ThreadPriority.Normal;
+ }
+ }
+ set
+ {
+ try
+ {
+ threadField.Priority = value;
+ }
+ catch { }
+
+ }
+ }
+
+ /// <summary>
+ /// Gets a value indicating the execution status of the current thread
+ /// </summary>
+ public bool IsAlive
+ {
+ get
+ {
+ return threadField.IsAlive;
+ }
+ }
+
+ /// <summary>
+ /// Gets or sets a value indicating whether or not a thread is a background thread.
+ /// </summary>
+ public bool IsBackground
+ {
+ get
+ {
+ return threadField.IsBackground;
+ }
+ set
+ {
+ threadField.IsBackground = value;
+ }
+ }
+
+ /// <summary>
+ /// Blocks the calling thread until a thread terminates
+ /// </summary>
+ public void Join()
+ {
+ threadField.Join();
+ }
+
+ /// <summary>
+ /// Blocks the calling thread until a thread terminates or the specified time elapses
+ /// </summary>
+ /// <param name="MiliSeconds">Time of wait in milliseconds</param>
+ public void Join(long MiliSeconds)
+ {
+ threadField.Join(new System.TimeSpan(MiliSeconds * 10000));
+ }
+
+ /// <summary>
+ /// Blocks the calling thread until a thread terminates or the specified time elapses
+ /// </summary>
+ /// <param name="MiliSeconds">Time of wait in milliseconds</param>
+ /// <param name="NanoSeconds">Time of wait in nanoseconds</param>
+ public void Join(long MiliSeconds, int NanoSeconds)
+ {
+ threadField.Join(new System.TimeSpan(MiliSeconds * 10000 + NanoSeconds * 100));
+ }
+
+ /// <summary>
+ /// Resumes a thread that has been suspended
+ /// </summary>
+ public void Resume()
+ {
+ Monitor.PulseAll(threadField);
+ }
+
+ /// <summary>
+ /// Raises a ThreadAbortException in the thread on which it is invoked,
+ /// to begin the process of terminating the thread. Calling this method
+ /// usually terminates the thread
+ /// </summary>
+ public void Abort()
+ {
+ threadField.Abort();
+ }
+
+ /// <summary>
+ /// Raises a ThreadAbortException in the thread on which it is invoked,
+ /// to begin the process of terminating the thread while also providing
+ /// exception information about the thread termination.
+ /// Calling this method usually terminates the thread.
+ /// </summary>
+ /// <param name="stateInfo">An object that contains application-specific information, such as state, which can be used by the thread being aborted</param>
+ public void Abort(object stateInfo)
+ {
+ threadField.Abort(stateInfo);
+ }
+
+ /// <summary>
+ /// Suspends the thread, if the thread is already suspended it has no effect
+ /// </summary>
+ public void Suspend()
+ {
+ Monitor.Wait(threadField);
+ }
+
+ /// <summary>
+ /// Obtain a String that represents the current object
+ /// </summary>
+ /// <returns>A String that represents the current object</returns>
+ public override System.String ToString()
+ {
+ return "Thread[" + Name + "," + Priority.ToString() + "]";
+ }
+
+ [ThreadStatic]
+ static ThreadClass This = null;
+
+ // named as the Java version
+ public static ThreadClass CurrentThread()
+ {
+ return Current();
+ }
+
+ public static void Sleep(long ms)
+ {
+ // casting long ms to int ms could lose resolution, however unlikely
+ // that someone would want to sleep for that long...
+ Thread.Sleep((int)ms);
+ }
+
+ /// <summary>
+ /// Gets the currently running thread
+ /// </summary>
+ /// <returns>The currently running thread</returns>
+ public static ThreadClass Current()
+ {
+ if (This == null)
+ {
+ This = new ThreadClass();
+ This.Instance = Thread.CurrentThread;
+ }
+ return This;
+ }
+
+ public static bool operator ==(ThreadClass t1, object t2)
+ {
+ if (((object)t1) == null) return t2 == null;
+ return t1.Equals(t2);
+ }
+
+ public static bool operator !=(ThreadClass t1, object t2)
+ {
+ return !(t1 == t2);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (obj == null) return false;
+ if (obj is ThreadClass) return this.threadField.Equals(((ThreadClass)obj).threadField);
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return this.threadField.GetHashCode();
+ }
+ }
+}
diff --git a/src/core/Support/ThreadLock.cs b/src/core/Support/ThreadLock.cs
new file mode 100644
index 0000000..93cd719
--- /dev/null
+++ b/src/core/Support/ThreadLock.cs
@@ -0,0 +1,82 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Threading;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Abstract base class that provides a synchronization interface
+ /// for derived lock types
+ /// </summary>
+ public abstract class ThreadLock
+ {
+ public abstract void Enter(object obj);
+ public abstract void Exit(object obj);
+
+ private static readonly ThreadLock _nullLock = new NullThreadLock();
+ private static readonly ThreadLock _monitorLock = new MonitorThreadLock();
+
+ /// <summary>
+ /// A ThreadLock class that actually does no locking
+ /// Used in ParallelMultiSearcher/MultiSearcher
+ /// </summary>
+ public static ThreadLock NullLock
+ {
+ get { return _nullLock; }
+ }
+
+ /// <summary>
+ /// Wrapper class for the Monitor Enter/Exit methods
+ /// using the <see cref="ThreadLock"/> interface
+ /// </summary>
+ public static ThreadLock MonitorLock
+ {
+ get { return _monitorLock; }
+ }
+
+ private sealed class NullThreadLock : ThreadLock
+ {
+ public override void Enter(object obj)
+ {
+ // Do nothing
+ }
+
+ public override void Exit(object obj)
+ {
+ // Do nothing
+ }
+ }
+
+ private sealed class MonitorThreadLock : ThreadLock
+ {
+ public override void Enter(object obj)
+ {
+ Monitor.Enter(obj);
+ }
+
+ public override void Exit(object obj)
+ {
+ Monitor.Exit(obj);
+ }
+ }
+ }
+}
diff --git a/src/core/Support/WeakDictionary.cs b/src/core/Support/WeakDictionary.cs
new file mode 100644
index 0000000..880b02a
--- /dev/null
+++ b/src/core/Support/WeakDictionary.cs
@@ -0,0 +1,296 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Support
+{
+ public sealed class WeakDictionary<TKey, TValue> : IDictionary<TKey, TValue>
+ {
+ private HashMap<WeakKey<TKey>, TValue> _hm;
+ private int _gcCollections = 0;
+
+ public WeakDictionary(int initialCapacity) : this(initialCapacity, Enumerable.Empty<KeyValuePair<TKey, TValue>>())
+ { }
+
+ public WeakDictionary() : this(32, Enumerable.Empty<KeyValuePair<TKey, TValue>>())
+ { }
+
+ public WeakDictionary(IEnumerable<KeyValuePair<TKey, TValue>> otherDictionary) : this(32, otherDictionary)
+ { }
+
+ private WeakDictionary(int initialCapacity, IEnumerable<KeyValuePair<TKey, TValue>> otherDict)
+ {
+ _hm = new HashMap<WeakKey<TKey>, TValue>(initialCapacity);
+ foreach (var kvp in otherDict)
+ {
+ _hm.Add(new WeakKey<TKey>(kvp.Key), kvp.Value);
+ }
+ }
+
+ private void Clean()
+ {
+ if (_hm.Count == 0) return;
+ var newHm = new HashMap<WeakKey<TKey>, TValue>();
+ foreach (var entry in _hm.Where(x => x.Key != null && x.Key.IsAlive))
+ {
+ newHm.Add(entry.Key, entry.Value);
+ }
+ _hm = newHm;
+ }
+
+ private void CleanIfNeeded()
+ {
+ int currentColCount = GC.CollectionCount(0);
+ if (currentColCount > _gcCollections)
+ {
+ Clean();
+ _gcCollections = currentColCount;
+ }
+ }
+
+ public IEnumerator<KeyValuePair<TKey, TValue>> GetEnumerator()
+ {
+ foreach (var kvp in _hm.Where(x => x.Key.IsAlive))
+ {
+ yield return new KeyValuePair<TKey, TValue>(kvp.Key.Target, kvp.Value);
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ void ICollection<KeyValuePair<TKey, TValue>>.Add(KeyValuePair<TKey, TValue> item)
+ {
+ CleanIfNeeded();
+ ((ICollection<KeyValuePair<WeakKey<TKey>, TValue>>) _hm).Add(
+ new KeyValuePair<WeakKey<TKey>, TValue>(new WeakKey<TKey>(item.Key), item.Value));
+ }
+
+ public void Clear()
+ {
+ _hm.Clear();
+ }
+
+ bool ICollection<KeyValuePair<TKey, TValue>>.Contains(KeyValuePair<TKey, TValue> item)
+ {
+ return ((ICollection<KeyValuePair<WeakKey<TKey>, TValue>>)_hm).Contains(
+ new KeyValuePair<WeakKey<TKey>, TValue>(new WeakKey<TKey>(item.Key), item.Value));
+ }
+
+ bool ICollection<KeyValuePair<TKey, TValue>>.Remove(KeyValuePair<TKey, TValue> item)
+ {
+ return ((ICollection<KeyValuePair<WeakKey<TKey>, TValue>>)_hm).Remove(
+ new KeyValuePair<WeakKey<TKey>, TValue>(new WeakKey<TKey>(item.Key), item.Value));
+ }
+
+ public int Count
+ {
+ get
+ {
+ CleanIfNeeded();
+ return _hm.Count;
+ }
+ }
+
+ public bool IsReadOnly
+ {
+ get { return false; }
+ }
+
+ public bool ContainsKey(TKey key)
+ {
+ return _hm.ContainsKey(new WeakKey<TKey>(key));
+ }
+
+ public void Add(TKey key, TValue value)
+ {
+ CleanIfNeeded();
+ _hm.Add(new WeakKey<TKey>(key), value);
+ }
+
+ public bool Remove(TKey key)
+ {
+ return _hm.Remove(new WeakKey<TKey>(key));
+ }
+
+ public bool TryGetValue(TKey key, out TValue value)
+ {
+ return _hm.TryGetValue(new WeakKey<TKey>(key), out value);
+ }
+
+ public TValue this[TKey key]
+ {
+ get { return _hm[new WeakKey<TKey>(key)]; }
+ set
+ {
+ CleanIfNeeded();
+ _hm[new WeakKey<TKey>(key)] = value;
+ }
+ }
+
+ public ICollection<TKey> Keys
+ {
+ get
+ {
+ CleanIfNeeded();
+ return new KeyCollection(_hm);
+ }
+ }
+
+ public ICollection<TValue> Values
+ {
+ get
+ {
+ CleanIfNeeded();
+ return _hm.Values;
+ }
+ }
+
+ void ICollection<KeyValuePair<TKey, TValue>>.CopyTo(KeyValuePair<TKey, TValue>[] array, int arrayIndex)
+ {
+ throw new NotSupportedException();
+ }
+
+ #region KeyCollection
+ class KeyCollection : ICollection<TKey>
+ {
+ private readonly HashMap<WeakKey<TKey>, TValue> _internalDict;
+
+ public KeyCollection(HashMap<WeakKey<TKey>, TValue> dict)
+ {
+ _internalDict = dict;
+ }
+
+ public IEnumerator<TKey> GetEnumerator()
+ {
+ foreach (var key in _internalDict.Keys)
+ {
+ yield return key.Target;
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ public void CopyTo(TKey[] array, int arrayIndex)
+ {
+ throw new NotImplementedException("Implement this as needed");
+ }
+
+ public int Count
+ {
+ get { return _internalDict.Count + 1; }
+ }
+
+ public bool IsReadOnly
+ {
+ get { return true; }
+ }
+
+ #region Explicit Interface Definitions
+ bool ICollection<TKey>.Contains(TKey item)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection<TKey>.Add(TKey item)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection<TKey>.Clear()
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ICollection<TKey>.Remove(TKey item)
+ {
+ throw new NotSupportedException();
+ }
+ #endregion
+ }
+ #endregion
+
+
+ /// <summary>
+ /// A weak reference wrapper for the hashtable keys. Whenever a key\value pair
+ /// is added to the hashtable, the key is wrapped using a WeakKey. WeakKey saves the
+ /// value of the original object hashcode for fast comparison.
+ /// </summary>
+ class WeakKey<T>
+ {
+ WeakReference reference;
+ int hashCode;
+
+ public WeakKey(T key)
+ {
+ if (key == null)
+ throw new ArgumentNullException("key");
+
+ hashCode = key.GetHashCode();
+ reference = new WeakReference(key);
+ }
+
+ public override int GetHashCode()
+ {
+ return hashCode;
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (!reference.IsAlive || obj == null) return false;
+
+ if (object.ReferenceEquals(this, obj))
+ {
+ return true;
+ }
+
+ if (obj is WeakKey<T>)
+ {
+ var other = (WeakKey<T>)obj;
+
+ var referenceTarget = reference.Target; // Careful: can be null in the mean time...
+ return referenceTarget != null && referenceTarget.Equals(other.Target);
+ }
+
+ return false;
+ }
+
+ public T Target
+ {
+ get { return (T)reference.Target; }
+ }
+
+ public bool IsAlive
+ {
+ get { return reference.IsAlive; }
+ }
+ }
+ }
+}
diff --git a/src/core/Util/ArrayUtil.cs b/src/core/Util/ArrayUtil.cs
new file mode 100644
index 0000000..7ab69c9
--- /dev/null
+++ b/src/core/Util/ArrayUtil.cs
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Methods for manipulating arrays.</summary>
+ public sealed class ArrayUtil
+ {
+ /*
+ Begin Apache Harmony code
+
+ Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java
+
+ */
+
+ /// <summary> Parses the string argument as if it was an int value and returns the
+ /// result. Throws NumberFormatException if the string does not represent an
+ /// int quantity.
+ ///
+ /// </summary>
+ /// <param name="chars">a string representation of an int quantity.
+ /// </param>
+ /// <returns> int the value represented by the argument
+ /// </returns>
+ /// <throws> NumberFormatException if the argument could not be parsed as an int quantity. </throws>
+ public static int ParseInt(char[] chars)
+ {
+ return ParseInt(chars, 0, chars.Length, 10);
+ }
+
+ /// <summary> Parses a char array into an int.</summary>
+ /// <param name="chars">the character array
+ /// </param>
+ /// <param name="offset">The offset into the array
+ /// </param>
+ /// <param name="len">The length
+ /// </param>
+ /// <returns> the int
+ /// </returns>
+ /// <throws> NumberFormatException if it can't parse </throws>
+ public static int ParseInt(char[] chars, int offset, int len)
+ {
+ return ParseInt(chars, offset, len, 10);
+ }
+
+ /// <summary> Parses the string argument as if it was an int value and returns the
+ /// result. Throws NumberFormatException if the string does not represent an
+ /// int quantity. The second argument specifies the radix to use when parsing
+ /// the value.
+ ///
+ /// </summary>
+ /// <param name="chars">a string representation of an int quantity.
+ /// </param>
+ /// <param name="offset"></param>
+ /// <param name="len"></param>
+ /// <param name="radix">the base to use for conversion.
+ /// </param>
+ /// <returns> int the value represented by the argument
+ /// </returns>
+ /// <throws> NumberFormatException if the argument could not be parsed as an int quantity. </throws>
+ public static int ParseInt(char[] chars, int offset, int len, int radix)
+ {
+ if (chars == null || radix < 2 || radix > 36)
+ {
+ throw new System.FormatException();
+ }
+ int i = 0;
+ if (len == 0)
+ {
+ throw new System.FormatException("chars length is 0");
+ }
+ bool negative = chars[offset + i] == '-';
+ if (negative && ++i == len)
+ {
+ throw new System.FormatException("can't convert to an int");
+ }
+ if (negative == true)
+ {
+ offset++;
+ len--;
+ }
+ return Parse(chars, offset, len, radix, negative);
+ }
+
+
+ private static int Parse(char[] chars, int offset, int len, int radix, bool negative)
+ {
+ int max = System.Int32.MinValue / radix;
+ int result = 0;
+ for (int i = 0; i < len; i++)
+ {
+ int digit = (int) System.Char.GetNumericValue(chars[i + offset]);
+ if (digit == - 1)
+ {
+ throw new System.FormatException("Unable to parse");
+ }
+ if (max > result)
+ {
+ throw new System.FormatException("Unable to parse");
+ }
+ int next = result * radix - digit;
+ if (next > result)
+ {
+ throw new System.FormatException("Unable to parse");
+ }
+ result = next;
+ }
+ /*while (offset < len) {
+
+ }*/
+ if (!negative)
+ {
+ result = - result;
+ if (result < 0)
+ {
+ throw new System.FormatException("Unable to parse");
+ }
+ }
+ return result;
+ }
+
+
+ /*
+
+ END APACHE HARMONY CODE
+ */
+
+
+ public static int GetNextSize(int targetSize)
+ {
+ /* This over-allocates proportional to the list size, making room
+ * for additional growth. The over-allocation is mild, but is
+ * enough to give linear-time amortized behavior over a long
+ * sequence of appends() in the presence of a poorly-performing
+ * system realloc().
+ * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
+ */
+ return (targetSize >> 3) + (targetSize < 9?3:6) + targetSize;
+ }
+
+ public static int GetShrinkSize(int currentSize, int targetSize)
+ {
+ int newSize = GetNextSize(targetSize);
+ // Only reallocate if we are "substantially" smaller.
+ // This saves us from "running hot" (constantly making a
+ // bit bigger then a bit smaller, over and over):
+ if (newSize < currentSize / 2)
+ return newSize;
+ else
+ return currentSize;
+ }
+
+ public static int[] Grow(int[] array, int minSize)
+ {
+ if (array.Length < minSize)
+ {
+ int[] newArray = new int[GetNextSize(minSize)];
+ Array.Copy(array, 0, newArray, 0, array.Length);
+ return newArray;
+ }
+ else
+ return array;
+ }
+
+ public static int[] Grow(int[] array)
+ {
+ return Grow(array, 1 + array.Length);
+ }
+
+ public static int[] Shrink(int[] array, int targetSize)
+ {
+ int newSize = GetShrinkSize(array.Length, targetSize);
+ if (newSize != array.Length)
+ {
+ int[] newArray = new int[newSize];
+ Array.Copy(array, 0, newArray, 0, newSize);
+ return newArray;
+ }
+ else
+ return array;
+ }
+
+ public static long[] Grow(long[] array, int minSize)
+ {
+ if (array.Length < minSize)
+ {
+ long[] newArray = new long[GetNextSize(minSize)];
+ Array.Copy(array, 0, newArray, 0, array.Length);
+ return newArray;
+ }
+ else
+ return array;
+ }
+
+ public static long[] Grow(long[] array)
+ {
+ return Grow(array, 1 + array.Length);
+ }
+
+ public static long[] Shrink(long[] array, int targetSize)
+ {
+ int newSize = GetShrinkSize(array.Length, targetSize);
+ if (newSize != array.Length)
+ {
+ long[] newArray = new long[newSize];
+ Array.Copy(array, 0, newArray, 0, newSize);
+ return newArray;
+ }
+ else
+ return array;
+ }
+
+ public static byte[] Grow(byte[] array, int minSize)
+ {
+ if (array.Length < minSize)
+ {
+ byte[] newArray = new byte[GetNextSize(minSize)];
+ Array.Copy(array, 0, newArray, 0, array.Length);
+ return newArray;
+ }
+ else
+ return array;
+ }
+
+ public static byte[] Grow(byte[] array)
+ {
+ return Grow(array, 1 + array.Length);
+ }
+
+ public static byte[] Shrink(byte[] array, int targetSize)
+ {
+ int newSize = GetShrinkSize(array.Length, targetSize);
+ if (newSize != array.Length)
+ {
+ byte[] newArray = new byte[newSize];
+ Array.Copy(array, 0, newArray, 0, newSize);
+ return newArray;
+ }
+ else
+ return array;
+ }
+
+ /// <summary> Returns hash of chars in range start (inclusive) to
+ /// end (inclusive)
+ /// </summary>
+ public static int HashCode(char[] array, int start, int end)
+ {
+ int code = 0;
+ for (int i = end - 1; i >= start; i--)
+ code = code * 31 + array[i];
+ return code;
+ }
+
+ /// <summary> Returns hash of chars in range start (inclusive) to
+ /// end (inclusive)
+ /// </summary>
+ public static int HashCode(byte[] array, int start, int end)
+ {
+ int code = 0;
+ for (int i = end - 1; i >= start; i--)
+ code = code * 31 + array[i];
+ return code;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/Attribute.cs b/src/core/Util/Attribute.cs
new file mode 100644
index 0000000..b0a76b8
--- /dev/null
+++ b/src/core/Util/Attribute.cs
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Base class for Attributes that can be added to a
+ /// <see cref="Lucene.Net.Util.AttributeSource" />.
+ /// <p/>
+ /// Attributes are used to add data in a dynamic, yet type-safe way to a source
+ /// of usually streamed objects, e. g. a <see cref="Lucene.Net.Analysis.TokenStream" />.
+ /// </summary>
+ [Serializable]
+ public abstract class Attribute : System.ICloneable, IAttribute
+ {
+ /// <summary> Clears the values in this AttributeImpl and resets it to its
+ /// default value. If this implementation implements more than one Attribute interface
+ /// it clears all.
+ /// </summary>
+ public abstract void Clear();
+
+ /// <summary> The default implementation of this method accesses all declared
+ /// fields of this object and prints the values in the following syntax:
+ ///
+ /// <code>
+ /// public String toString() {
+ /// return "start=" + startOffset + ",end=" + endOffset;
+ /// }
+ /// </code>
+ ///
+ /// This method may be overridden by subclasses.
+ /// </summary>
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ System.Type clazz = this.GetType();
+ System.Reflection.FieldInfo[] fields = clazz.GetFields(System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Static);
+ try
+ {
+ for (int i = 0; i < fields.Length; i++)
+ {
+ System.Reflection.FieldInfo f = fields[i];
+ if (f.IsStatic)
+ continue;
+ //f.setAccessible(true); // {{Aroush-2.9}} java.lang.reflect.AccessibleObject.setAccessible
+ System.Object value_Renamed = f.GetValue(this);
+ if (buffer.Length > 0)
+ {
+ buffer.Append(',');
+ }
+ if (value_Renamed == null)
+ {
+ buffer.Append(f.Name + "=null");
+ }
+ else
+ {
+ buffer.Append(f.Name + "=" + value_Renamed);
+ }
+ }
+ }
+ catch (System.UnauthorizedAccessException e)
+ {
+ // this should never happen, because we're just accessing fields
+ // from 'this'
+ throw new System.SystemException(e.Message, e);
+ }
+
+ return buffer.ToString();
+ }
+
+ /// <summary> Subclasses must implement this method and should compute
+ /// a hashCode similar to this:
+ /// <code>
+ /// public int hashCode() {
+ /// int code = startOffset;
+ /// code = code * 31 + endOffset;
+ /// return code;
+ /// }
+ /// </code>
+ ///
+ /// see also <see cref="Equals(Object)" />
+ /// </summary>
+ abstract public override int GetHashCode();
+
+ /// <summary> All values used for computation of <see cref="GetHashCode()" />
+ /// should be checked here for equality.
+ ///
+ /// see also <see cref="Object.Equals(Object)" />
+ /// </summary>
+ abstract public override bool Equals(System.Object other);
+
+ /// <summary> Copies the values from this Attribute into the passed-in
+ /// target attribute. The target implementation must support all the
+ /// Attributes this implementation supports.
+ /// </summary>
+ public abstract void CopyTo(Attribute target);
+
+ /// <summary> Shallow clone. Subclasses must override this if they
+ /// need to clone any members deeply,
+ /// </summary>
+ public virtual System.Object Clone()
+ {
+ System.Object clone = null;
+ try
+ {
+ clone = base.MemberwiseClone();
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException(e.Message, e); // shouldn't happen
+ }
+ return clone;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/AttributeSource.cs b/src/core/Util/AttributeSource.cs
new file mode 100644
index 0000000..6d92651
--- /dev/null
+++ b/src/core/Util/AttributeSource.cs
@@ -0,0 +1,510 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+using Lucene.Net.Support;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> An AttributeSource contains a list of different <see cref="Attribute" />s,
+ /// and methods to add and get them. There can only be a single instance
+ /// of an attribute in the same AttributeSource instance. This is ensured
+ /// by passing in the actual type of the Attribute (Class&lt;Attribute&gt;) to
+ /// the <see cref="AddAttribute{T}()" />, which then checks if an instance of
+ /// that type is already present. If yes, it returns the instance, otherwise
+ /// it creates a new instance and returns it.
+ /// </summary>
+ public class AttributeSource
+ {
+ /// <summary> An AttributeFactory creates instances of <see cref="Attribute" />s.</summary>
+ public abstract class AttributeFactory
+ {
+ /// <summary> returns an <see cref="Attribute" /> for the supplied <see cref="IAttribute" /> interface class.</summary>
+ public abstract Attribute CreateAttributeInstance<T>() where T : IAttribute;
+
+ /// <summary> This is the default factory that creates <see cref="Attribute" />s using the
+ /// class name of the supplied <see cref="IAttribute" /> interface class by appending <c>Impl</c> to it.
+ /// </summary>
+ public static readonly AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory();
+
+ private sealed class DefaultAttributeFactory:AttributeFactory
+ {
+ // This should be WeakDictionary<T, WeakReference<TImpl>> where typeof(T) is Attribute and TImpl is typeof(AttributeImpl)
+ private static readonly WeakDictionary<Type, WeakReference> attClassImplMap =
+ new WeakDictionary<Type, WeakReference>();
+
+ internal DefaultAttributeFactory()
+ {
+ }
+
+ public override Attribute CreateAttributeInstance<TAttImpl>()
+ {
+ try
+ {
+ return (Attribute)System.Activator.CreateInstance(GetClassForInterface<TAttImpl>());
+ }
+ catch (System.UnauthorizedAccessException)
+ {
+ throw new System.ArgumentException("Could not instantiate implementing class for " + typeof(TAttImpl).FullName);
+ }
+ //catch (System.Exception e)
+ //{
+ // throw new System.ArgumentException("Could not instantiate implementing class for " + typeof(TAttImpl).FullName);
+ //}
+ }
+
+ private static System.Type GetClassForInterface<T>() where T : IAttribute
+ {
+ lock (attClassImplMap)
+ {
+ var attClass = typeof (T);
+ WeakReference refz = attClassImplMap[attClass];
+ System.Type clazz = (refz == null) ? null : ((System.Type) refz.Target);
+ if (clazz == null)
+ {
+ try
+ {
+ string name = attClass.FullName.Replace(attClass.Name, attClass.Name.Substring(1)) + ", " + attClass.Assembly.FullName;
+ attClassImplMap.Add(attClass, new WeakReference( clazz = System.Type.GetType(name, true))); //OK
+ }
+ catch (System.TypeLoadException) // was System.Exception
+ {
+ throw new System.ArgumentException("Could not find implementing class for " + attClass.FullName);
+ }
+ }
+ return clazz;
+ }
+ }
+ }
+ }
+
+ // These two maps must always be in sync!!!
+ // So they are private, final and read-only from the outside (read-only iterators)
+ private GeneralKeyedCollection<Type, AttributeImplItem> attributes;
+ private GeneralKeyedCollection<Type, AttributeImplItem> attributeImpls;
+
+ private State[] currentState = null;
+ private AttributeFactory factory;
+
+ /// <summary> An AttributeSource using the default attribute factory <see cref="AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY" />.</summary>
+ public AttributeSource():this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)
+ {
+ }
+
+ /// <summary> An AttributeSource that uses the same attributes as the supplied one.</summary>
+ public AttributeSource(AttributeSource input)
+ {
+ if (input == null)
+ {
+ throw new System.ArgumentException("input AttributeSource must not be null");
+ }
+ this.attributes = input.attributes;
+ this.attributeImpls = input.attributeImpls;
+ this.currentState = input.currentState;
+ this.factory = input.factory;
+ }
+
+ /// <summary> An AttributeSource using the supplied <see cref="AttributeFactory" /> for creating new <see cref="IAttribute" /> instances.</summary>
+ public AttributeSource(AttributeFactory factory)
+ {
+ this.attributes = new GeneralKeyedCollection<Type, AttributeImplItem>(att => att.Key);
+ this.attributeImpls = new GeneralKeyedCollection<Type, AttributeImplItem>(att => att.Key);
+ this.currentState = new State[1];
+ this.factory = factory;
+ }
+
+ /// <summary>Returns the used AttributeFactory.</summary>
+ public virtual AttributeFactory Factory
+ {
+ get { return factory; }
+ }
+
+ /// <summary>Returns a new iterator that iterates the attribute classes
+ /// in the same order they were added in.
+ /// Signature for Java 1.5: <c>public Iterator&lt;Class&lt;? extends Attribute&gt;&gt; getAttributeClassesIterator()</c>
+ ///
+ /// Note that this return value is different from Java in that it enumerates over the values
+ /// and not the keys
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual IEnumerable<Type> GetAttributeTypesIterator()
+ {
+ return this.attributes.Select(item => item.Key);
+ }
+
+ /// <summary>Returns a new iterator that iterates all unique Attribute implementations.
+ /// This iterator may contain less entries that <see cref="GetAttributeTypesIterator" />,
+ /// if one instance implements more than one Attribute interface.
+ /// Signature for Java 1.5: <c>public Iterator&lt;AttributeImpl&gt; getAttributeImplsIterator()</c>
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual IEnumerable<Attribute> GetAttributeImplsIterator()
+ {
+ var initState = GetCurrentState();
+ while (initState != null)
+ {
+ var att = initState.attribute;
+ initState = initState.next;
+ yield return att;
+ }
+ }
+
+ /// <summary>a cache that stores all interfaces for known implementation classes for performance (slow reflection) </summary>
+ private static readonly WeakDictionary<Type, System.Collections.Generic.LinkedList<WeakReference>>
+ knownImplClasses = new WeakDictionary<Type, System.Collections.Generic.LinkedList<WeakReference>>();
+
+ /// <summary>
+ /// <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
+ /// <p><font color="red"><b>Please note:</b> It is not guaranteed, that <c>att</c> is added to
+ /// the <c>AttributeSource</c>, because the provided attributes may already exist.
+ /// You should always retrieve the wanted attributes using <see cref="GetAttribute{T}"/> after adding
+ /// with this method and cast to your class.
+ /// The recommended way to use custom implementations is using an <see cref="AttributeFactory"/>
+ /// </font></p>
+ /// </summary>
+ public virtual void AddAttributeImpl(Attribute att)
+ {
+ System.Type clazz = att.GetType();
+ if (attributeImpls.Contains(clazz))
+ return ;
+ System.Collections.Generic.LinkedList<WeakReference> foundInterfaces;
+ lock (knownImplClasses)
+ {
+ foundInterfaces = knownImplClasses[clazz];
+ if (foundInterfaces == null)
+ {
+ // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"),
+ // so all WeakReferences are never evicted by GC
+ knownImplClasses.Add(clazz, foundInterfaces = new LinkedList<WeakReference>());
+ // find all interfaces that this attribute instance implements
+ // and that extend the Attribute interface
+ System.Type actClazz = clazz;
+ do
+ {
+ System.Type[] interfaces = actClazz.GetInterfaces();
+ for (int i = 0; i < interfaces.Length; i++)
+ {
+ System.Type curInterface = interfaces[i];
+ if (curInterface != typeof(IAttribute) && typeof(IAttribute).IsAssignableFrom(curInterface))
+ {
+ foundInterfaces.AddLast(new WeakReference(curInterface));
+ }
+ }
+ actClazz = actClazz.BaseType;
+ }
+ while (actClazz != null);
+ }
+ }
+
+ // add all interfaces of this AttributeImpl to the maps
+ foreach(var curInterfaceRef in foundInterfaces)
+ {
+ System.Type curInterface = (System.Type) curInterfaceRef.Target;
+ System.Diagnostics.Debug.Assert(curInterface != null,
+ "We have a strong reference on the class holding the interfaces, so they should never get evicted");
+ // Attribute is a superclass of this interface
+ if (!attributes.ContainsKey(curInterface))
+ {
+ // invalidate state to force recomputation in captureState()
+ this.currentState[0] = null;
+ attributes.Add(new AttributeImplItem(curInterface, att));
+ if (!attributeImpls.ContainsKey(clazz))
+ {
+ attributeImpls.Add(new AttributeImplItem(clazz, att));
+ }
+ }
+ }
+ }
+
+ /// <summary> The caller must pass in a Class&lt;? extends Attribute&gt; value.
+ /// This method first checks if an instance of that class is
+ /// already in this AttributeSource and returns it. Otherwise a
+ /// new instance is created, added to this AttributeSource and returned.
+ /// </summary>
+ // NOTE: Java has Class<T>, .NET has no Type<T>, this is not a perfect port
+ public virtual T AddAttribute<T>() where T : IAttribute
+ {
+ var attClass = typeof (T);
+ if (!attributes.ContainsKey(attClass))
+ {
+ if (!(attClass.IsInterface && typeof(IAttribute).IsAssignableFrom(attClass)))
+ {
+ throw new ArgumentException(
+ "AddAttribute() only accepts an interface that extends Attribute, but " +
+ attClass.FullName + " does not fulfil this contract."
+ );
+ }
+
+ AddAttributeImpl(this.factory.CreateAttributeInstance<T>());
+ }
+
+ return (T)(IAttribute)attributes[attClass].Value;
+ }
+
+ /// <summary>Returns true, iff this AttributeSource has any attributes </summary>
+ public virtual bool HasAttributes
+ {
+ get { return this.attributes.Count != 0; }
+ }
+
+ /// <summary> The caller must pass in a Class&lt;? extends Attribute&gt; value.
+ /// Returns true, iff this AttributeSource contains the passed-in Attribute.
+ /// </summary>\
+ public virtual bool HasAttribute<T>() where T : IAttribute
+ {
+ return this.attributes.Contains(typeof(T));
+ }
+
+ /// <summary>
+ /// The caller must pass in a Class&lt;? extends Attribute&gt; value.
+ /// Returns the instance of the passed in Attribute contained in this AttributeSource
+ /// </summary>
+ /// <throws>
+ /// IllegalArgumentException if this AttributeSource does not contain the Attribute.
+ /// It is recommended to always use <see cref="AddAttribute{T}" /> even in consumers
+ /// of TokenStreams, because you cannot know if a specific TokenStream really uses
+ /// a specific Attribute. <see cref="AddAttribute{T}" /> will automatically make the attribute
+ /// available. If you want to only use the attribute, if it is available (to optimize
+ /// consuming), use <see cref="HasAttribute" />.
+ /// </throws>
+ // NOTE: Java has Class<T>, .NET has no Type<T>, this is not a perfect port
+ public virtual T GetAttribute<T>() where T : IAttribute
+ {
+ var attClass = typeof (T);
+ if (!this.attributes.ContainsKey(attClass))
+ {
+ throw new System.ArgumentException("This AttributeSource does not have the attribute '" + attClass.FullName + "'.");
+ }
+ else
+ {
+ return (T)(IAttribute)this.attributes[attClass].Value;
+ }
+ }
+
+ /// <summary> This class holds the state of an AttributeSource.</summary>
+ /// <seealso cref="CaptureState">
+ /// </seealso>
+ /// <seealso cref="RestoreState">
+ /// </seealso>
+ public sealed class State : System.ICloneable
+ {
+ internal /*private*/ Attribute attribute;
+ internal /*private*/ State next;
+
+ public System.Object Clone()
+ {
+ State clone = new State();
+ clone.attribute = (Attribute) attribute.Clone();
+
+ if (next != null)
+ {
+ clone.next = (State) next.Clone();
+ }
+
+ return clone;
+ }
+ }
+
+ private State GetCurrentState()
+ {
+ var s = currentState[0];
+ if (s != null || !HasAttributes)
+ {
+ return s;
+ }
+
+ var c = s = currentState[0] = new State();
+ var it = attributeImpls.Values().GetEnumerator();
+ it.MoveNext();
+ c.attribute = it.Current.Value;
+
+ while (it.MoveNext())
+ {
+ c.next = new State();
+ c = c.next;
+ c.attribute = it.Current.Value;
+ }
+
+ return s;
+ }
+
+ /// <summary> Resets all Attributes in this AttributeSource by calling
+ /// <see cref="Attribute.Clear()" /> on each Attribute implementation.
+ /// </summary>
+ public virtual void ClearAttributes()
+ {
+ for (var state = GetCurrentState(); state != null; state = state.next)
+ {
+ state.attribute.Clear();
+ }
+ }
+
+ /// <summary> Captures the state of all Attributes. The return value can be passed to
+ /// <see cref="RestoreState" /> to restore the state of this or another AttributeSource.
+ /// </summary>
+ public virtual State CaptureState()
+ {
+ var state = this.GetCurrentState();
+ return (state == null) ? null : (State) state.Clone();
+ }
+
+ /// <summary> Restores this state by copying the values of all attribute implementations
+ /// that this state contains into the attributes implementations of the targetStream.
+ /// The targetStream must contain a corresponding instance for each argument
+ /// contained in this state (e.g. it is not possible to restore the state of
+ /// an AttributeSource containing a TermAttribute into a AttributeSource using
+ /// a Token instance as implementation).
+ ///
+ /// Note that this method does not affect attributes of the targetStream
+ /// that are not contained in this state. In other words, if for example
+ /// the targetStream contains an OffsetAttribute, but this state doesn't, then
+ /// the value of the OffsetAttribute remains unchanged. It might be desirable to
+ /// reset its value to the default, in which case the caller should first
+ /// call <see cref="AttributeSource.ClearAttributes()" /> on the targetStream.
+ /// </summary>
+ public virtual void RestoreState(State state)
+ {
+ if (state == null)
+ return ;
+
+ do
+ {
+ if (!attributeImpls.ContainsKey(state.attribute.GetType()))
+ {
+ throw new System.ArgumentException("State contains an AttributeImpl that is not in this AttributeSource");
+ }
+ state.attribute.CopyTo(attributeImpls[state.attribute.GetType()].Value);
+ state = state.next;
+ }
+ while (state != null);
+ }
+
+ public override int GetHashCode()
+ {
+ var code = 0;
+
+ for (var state = GetCurrentState(); state != null; state = state.next)
+ {
+ code = code*31 + state.attribute.GetHashCode();
+ }
+
+ return code;
+ }
+
+ public override bool Equals(System.Object obj)
+ {
+ if (obj == this)
+ {
+ return true;
+ }
+
+ if (obj is AttributeSource)
+ {
+ AttributeSource other = (AttributeSource) obj;
+
+ if (HasAttributes)
+ {
+ if (!other.HasAttributes)
+ {
+ return false;
+ }
+
+ if (this.attributeImpls.Count != other.attributeImpls.Count)
+ {
+ return false;
+ }
+
+ // it is only equal if all attribute impls are the same in the same order
+ var thisState = this.GetCurrentState();
+ var otherState = other.GetCurrentState();
+ while (thisState != null && otherState != null)
+ {
+ if (otherState.attribute.GetType() != thisState.attribute.GetType() || !otherState.attribute.Equals(thisState.attribute))
+ {
+ return false;
+ }
+ thisState = thisState.next;
+ otherState = otherState.next;
+ }
+ return true;
+ }
+ else
+ {
+ return !other.HasAttributes;
+ }
+ }
+ else
+ return false;
+ }
+
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder().Append('(');
+
+ if (HasAttributes)
+ {
+ if (currentState[0] == null)
+ {
+ currentState[0] = GetCurrentState();
+ }
+ for (var state = currentState[0]; state != null; state = state.next)
+ {
+ if (state != currentState[0])
+ sb.Append(',');
+ sb.Append(state.attribute.ToString());
+ }
+ }
+ return sb.Append(')').ToString();
+ }
+
+ /// <summary> Performs a clone of all <see cref="Attribute" /> instances returned in a new
+ /// AttributeSource instance. This method can be used to e.g. create another TokenStream
+ /// with exactly the same attributes (using <see cref="AttributeSource(AttributeSource)" />)
+ /// </summary>
+ public virtual AttributeSource CloneAttributes()
+ {
+ var clone = new AttributeSource(this.factory);
+
+ // first clone the impls
+ if (HasAttributes)
+ {
+ for (var state = GetCurrentState(); state != null; state = state.next)
+ {
+ var impl = (Attribute) state.attribute.Clone();
+
+ if (!clone.attributeImpls.ContainsKey(impl.GetType()))
+ {
+ clone.attributeImpls.Add(new AttributeImplItem(impl.GetType(), impl));
+ }
+ }
+ }
+
+ // now the interfaces
+ foreach (var att in this.attributes)
+ {
+ clone.attributes.Add(new AttributeImplItem(att.Key, clone.attributeImpls[att.Value.GetType()].Value));
+ }
+
+ return clone;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/AverageGuessMemoryModel.cs b/src/core/Util/AverageGuessMemoryModel.cs
new file mode 100644
index 0000000..43dae7a
--- /dev/null
+++ b/src/core/Util/AverageGuessMemoryModel.cs
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> An average, best guess, MemoryModel that should work okay on most systems.
+ ///
+ /// </summary>
+ public class AverageGuessMemoryModel:MemoryModel
+ {
+ public AverageGuessMemoryModel()
+ {
+ InitBlock();
+ }
+
+ private void InitBlock()
+ {
+ sizes = new IdentityDictionary<Type, int>()
+ {
+ {typeof (bool), 1},
+ {typeof (byte), 1},
+ {typeof(sbyte), 1},
+ {typeof (char), 2},
+ {typeof (short), 2},
+ {typeof (int), 4},
+ {typeof (float), 4},
+ {typeof (double), 8},
+ {typeof (long), 8}
+ };
+ }
+ // best guess primitive sizes
+ private System.Collections.Generic.Dictionary<Type, int> sizes;
+
+ /*
+ * (non-Javadoc)
+ *
+ * <see cref="Lucene.Net.Util.MemoryModel.getArraySize()"/>
+ */
+
+ public override int ArraySize
+ {
+ get { return 16; }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * <see cref="Lucene.Net.Util.MemoryModel.getClassSize()"/>
+ */
+
+ public override int ClassSize
+ {
+ get { return 8; }
+ }
+
+ /* (non-Javadoc)
+ * <see cref="Lucene.Net.Util.MemoryModel.getPrimitiveSize(java.lang.Class)"/>
+ */
+ public override int GetPrimitiveSize(Type clazz)
+ {
+ return sizes[clazz];
+ }
+
+ /* (non-Javadoc)
+ * <see cref="Lucene.Net.Util.MemoryModel.getReferenceSize()"/>
+ */
+
+ public override int ReferenceSize
+ {
+ get { return 4; }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/BitUtil.cs b/src/core/Util/BitUtil.cs
new file mode 100644
index 0000000..f5cbd79
--- /dev/null
+++ b/src/core/Util/BitUtil.cs
@@ -0,0 +1,894 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Util
+{
+ // from org.apache.solr.util rev 555343
+
+ /// <summary>A variety of high efficiencly bit twiddling routines.
+ ///
+ /// </summary>
+ /// <version> $Id$
+ /// </version>
+ public class BitUtil
+ {
+
+ /// <summary>Returns the number of bits set in the long </summary>
+ public static int Pop(long x)
+ {
+ /* Hacker's Delight 32 bit pop function:
+ * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
+ *
+ int pop(unsigned x) {
+ x = x - ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ x = x + (x >> 8);
+ x = x + (x >> 16);
+ return x & 0x0000003F;
+ }
+ ***/
+
+ // 64 bit java version of the C function from above
+ x = x - ((Number.URShift(x, 1)) & 0x5555555555555555L);
+ x = (x & 0x3333333333333333L) + ((Number.URShift(x, 2)) & 0x3333333333333333L);
+ x = (x + (Number.URShift(x, 4))) & 0x0F0F0F0F0F0F0F0FL;
+ x = x + (Number.URShift(x, 8));
+ x = x + (Number.URShift(x, 16));
+ x = x + (Number.URShift(x, 32));
+ return ((int) x) & 0x7F;
+ }
+
+ /// <summary> Returns the number of set bits in an array of longs. </summary>
+ public static long Pop_array(long[] A, int wordOffset, int numWords)
+ {
+ /*
+ * Robert Harley and David Seal's bit counting algorithm, as documented
+ * in the revisions of Hacker's Delight
+ * http://www.hackersdelight.org/revisions.pdf
+ * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
+ *
+ * This function was adapted to Java, and extended to use 64 bit words.
+ * if only we had access to wider registers like SSE from java...
+ *
+ * This function can be transformed to compute the popcount of other functions
+ * on bitsets via something like this:
+ * sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
+ *
+ */
+ int n = wordOffset + numWords;
+ long tot = 0, tot8 = 0;
+ long ones = 0, twos = 0, fours = 0;
+
+ int i;
+ for (i = wordOffset; i <= n - 8; i += 8)
+ {
+ /* C macro from Hacker's Delight
+ #define CSA(h,l, a,b,c) \
+ {unsigned u = a ^ b; unsigned v = c; \
+ h = (a & b) | (u & v); l = u ^ v;}
+ ***/
+
+ long twosA, twosB, foursA, foursB, eights;
+
+ // CSA(twosA, ones, ones, A[i], A[i+1])
+ {
+ long b = A[i], c = A[i + 1];
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, A[i+2], A[i+3])
+ {
+ long b = A[i + 2], c = A[i + 3];
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursA, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ //CSA(twosA, ones, ones, A[i+4], A[i+5])
+ {
+ long b = A[i + 4], c = A[i + 5];
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, A[i+6], A[i+7])
+ {
+ long b = A[i + 6], c = A[i + 7];
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursB, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursB = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+
+ //CSA(eights, fours, fours, foursA, foursB)
+ {
+ long u = fours ^ foursA;
+ eights = (fours & foursA) | (u & foursB);
+ fours = u ^ foursB;
+ }
+ tot8 += Pop(eights);
+ }
+
+ // handle trailing words in a binary-search manner...
+ // derived from the loop above by setting specific elements to 0.
+ // the original method in Hackers Delight used a simple for loop:
+ // for (i = i; i < n; i++) // Add in the last elements
+ // tot = tot + pop(A[i]);
+
+ if (i <= n - 4)
+ {
+ long twosA, twosB, foursA, eights;
+ {
+ long b = A[i], c = A[i + 1];
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long b = A[i + 2], c = A[i + 3];
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 4;
+ }
+
+ if (i <= n - 2)
+ {
+ long b = A[i], c = A[i + 1];
+ long u = ones ^ b;
+ long twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+
+ long foursA = twos & twosA;
+ twos = twos ^ twosA;
+
+ long eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 2;
+ }
+
+ if (i < n)
+ {
+ tot += Pop(A[i]);
+ }
+
+ tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
+
+ return tot;
+ }
+
+ /// <summary>Returns the popcount or cardinality of the two sets after an intersection.
+ /// Neither array is modified.
+ /// </summary>
+ public static long Pop_intersect(long[] A, long[] B, int wordOffset, int numWords)
+ {
+ // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
+ int n = wordOffset + numWords;
+ long tot = 0, tot8 = 0;
+ long ones = 0, twos = 0, fours = 0;
+
+ int i;
+ for (i = wordOffset; i <= n - 8; i += 8)
+ {
+ long twosA, twosB, foursA, foursB, eights;
+
+ // CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1]))
+ {
+ long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3]))
+ {
+ long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursA, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ //CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5]))
+ {
+ long b = (A[i + 4] & B[i + 4]), c = (A[i + 5] & B[i + 5]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7]))
+ {
+ long b = (A[i + 6] & B[i + 6]), c = (A[i + 7] & B[i + 7]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursB, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursB = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+
+ //CSA(eights, fours, fours, foursA, foursB)
+ {
+ long u = fours ^ foursA;
+ eights = (fours & foursA) | (u & foursB);
+ fours = u ^ foursB;
+ }
+ tot8 += Pop(eights);
+ }
+
+
+ if (i <= n - 4)
+ {
+ long twosA, twosB, foursA, eights;
+ {
+ long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 4;
+ }
+
+ if (i <= n - 2)
+ {
+ long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]);
+ long u = ones ^ b;
+ long twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+
+ long foursA = twos & twosA;
+ twos = twos ^ twosA;
+
+ long eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 2;
+ }
+
+ if (i < n)
+ {
+ tot += Pop((A[i] & B[i]));
+ }
+
+ tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
+
+ return tot;
+ }
+
+ /// <summary>Returns the popcount or cardinality of the union of two sets.
+ /// Neither array is modified.
+ /// </summary>
+ public static long Pop_union(long[] A, long[] B, int wordOffset, int numWords)
+ {
+ // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \| B[\1]\)/g'
+ int n = wordOffset + numWords;
+ long tot = 0, tot8 = 0;
+ long ones = 0, twos = 0, fours = 0;
+
+ int i;
+ for (i = wordOffset; i <= n - 8; i += 8)
+ {
+ /* C macro from Hacker's Delight
+ #define CSA(h,l, a,b,c) \
+ {unsigned u = a ^ b; unsigned v = c; \
+ h = (a & b) | (u & v); l = u ^ v;}
+ ***/
+
+ long twosA, twosB, foursA, foursB, eights;
+
+ // CSA(twosA, ones, ones, (A[i] | B[i]), (A[i+1] | B[i+1]))
+ {
+ long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+2] | B[i+2]), (A[i+3] | B[i+3]))
+ {
+ long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursA, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ //CSA(twosA, ones, ones, (A[i+4] | B[i+4]), (A[i+5] | B[i+5]))
+ {
+ long b = (A[i + 4] | B[i + 4]), c = (A[i + 5] | B[i + 5]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+6] | B[i+6]), (A[i+7] | B[i+7]))
+ {
+ long b = (A[i + 6] | B[i + 6]), c = (A[i + 7] | B[i + 7]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursB, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursB = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+
+ //CSA(eights, fours, fours, foursA, foursB)
+ {
+ long u = fours ^ foursA;
+ eights = (fours & foursA) | (u & foursB);
+ fours = u ^ foursB;
+ }
+ tot8 += Pop(eights);
+ }
+
+
+ if (i <= n - 4)
+ {
+ long twosA, twosB, foursA, eights;
+ {
+ long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 4;
+ }
+
+ if (i <= n - 2)
+ {
+ long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]);
+ long u = ones ^ b;
+ long twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+
+ long foursA = twos & twosA;
+ twos = twos ^ twosA;
+
+ long eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 2;
+ }
+
+ if (i < n)
+ {
+ tot += Pop((A[i] | B[i]));
+ }
+
+ tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
+
+ return tot;
+ }
+
+ /// <summary>Returns the popcount or cardinality of A &amp; ~B
+ /// Neither array is modified.
+ /// </summary>
+ public static long Pop_andnot(long[] A, long[] B, int wordOffset, int numWords)
+ {
+ // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& ~B[\1]\)/g'
+ int n = wordOffset + numWords;
+ long tot = 0, tot8 = 0;
+ long ones = 0, twos = 0, fours = 0;
+
+ int i;
+ for (i = wordOffset; i <= n - 8; i += 8)
+ {
+ /* C macro from Hacker's Delight
+ #define CSA(h,l, a,b,c) \
+ {unsigned u = a ^ b; unsigned v = c; \
+ h = (a & b) | (u & v); l = u ^ v;}
+ ***/
+
+ long twosA, twosB, foursA, foursB, eights;
+
+ // CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i+1] & ~B[i+1]))
+ {
+ long b = (A[i] & ~ B[i]), c = (A[i + 1] & ~ B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+2] & ~B[i+2]), (A[i+3] & ~B[i+3]))
+ {
+ long b = (A[i + 2] & ~ B[i + 2]), c = (A[i + 3] & ~ B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursA, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ //CSA(twosA, ones, ones, (A[i+4] & ~B[i+4]), (A[i+5] & ~B[i+5]))
+ {
+ long b = (A[i + 4] & ~ B[i + 4]), c = (A[i + 5] & ~ B[i + 5]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+6] & ~B[i+6]), (A[i+7] & ~B[i+7]))
+ {
+ long b = (A[i + 6] & ~ B[i + 6]), c = (A[i + 7] & ~ B[i + 7]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursB, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursB = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+
+ //CSA(eights, fours, fours, foursA, foursB)
+ {
+ long u = fours ^ foursA;
+ eights = (fours & foursA) | (u & foursB);
+ fours = u ^ foursB;
+ }
+ tot8 += Pop(eights);
+ }
+
+
+ if (i <= n - 4)
+ {
+ long twosA, twosB, foursA, eights;
+ {
+ long b = (A[i] & ~ B[i]), c = (A[i + 1] & ~ B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long b = (A[i + 2] & ~ B[i + 2]), c = (A[i + 3] & ~ B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 4;
+ }
+
+ if (i <= n - 2)
+ {
+ long b = (A[i] & ~ B[i]), c = (A[i + 1] & ~ B[i + 1]);
+ long u = ones ^ b;
+ long twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+
+ long foursA = twos & twosA;
+ twos = twos ^ twosA;
+
+ long eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 2;
+ }
+
+ if (i < n)
+ {
+ tot += Pop((A[i] & ~ B[i]));
+ }
+
+ tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
+
+ return tot;
+ }
+
+ public static long Pop_xor(long[] A, long[] B, int wordOffset, int numWords)
+ {
+ int n = wordOffset + numWords;
+ long tot = 0, tot8 = 0;
+ long ones = 0, twos = 0, fours = 0;
+
+ int i;
+ for (i = wordOffset; i <= n - 8; i += 8)
+ {
+ /* C macro from Hacker's Delight
+ #define CSA(h,l, a,b,c) \
+ {unsigned u = a ^ b; unsigned v = c; \
+ h = (a & b) | (u & v); l = u ^ v;}
+ ***/
+
+ long twosA, twosB, foursA, foursB, eights;
+
+ // CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i+1] ^ B[i+1]))
+ {
+ long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+2] ^ B[i+2]), (A[i+3] ^ B[i+3]))
+ {
+ long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursA, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ //CSA(twosA, ones, ones, (A[i+4] ^ B[i+4]), (A[i+5] ^ B[i+5]))
+ {
+ long b = (A[i + 4] ^ B[i + 4]), c = (A[i + 5] ^ B[i + 5]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ // CSA(twosB, ones, ones, (A[i+6] ^ B[i+6]), (A[i+7] ^ B[i+7]))
+ {
+ long b = (A[i + 6] ^ B[i + 6]), c = (A[i + 7] ^ B[i + 7]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ //CSA(foursB, twos, twos, twosA, twosB)
+ {
+ long u = twos ^ twosA;
+ foursB = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+
+ //CSA(eights, fours, fours, foursA, foursB)
+ {
+ long u = fours ^ foursA;
+ eights = (fours & foursA) | (u & foursB);
+ fours = u ^ foursB;
+ }
+ tot8 += Pop(eights);
+ }
+
+
+ if (i <= n - 4)
+ {
+ long twosA, twosB, foursA, eights;
+ {
+ long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
+ long u = ones ^ b;
+ twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]);
+ long u = ones ^ b;
+ twosB = (ones & b) | (u & c);
+ ones = u ^ c;
+ }
+ {
+ long u = twos ^ twosA;
+ foursA = (twos & twosA) | (u & twosB);
+ twos = u ^ twosB;
+ }
+ eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 4;
+ }
+
+ if (i <= n - 2)
+ {
+ long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
+ long u = ones ^ b;
+ long twosA = (ones & b) | (u & c);
+ ones = u ^ c;
+
+ long foursA = twos & twosA;
+ twos = twos ^ twosA;
+
+ long eights = fours & foursA;
+ fours = fours ^ foursA;
+
+ tot8 += Pop(eights);
+ i += 2;
+ }
+
+ if (i < n)
+ {
+ tot += Pop((A[i] ^ B[i]));
+ }
+
+ tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
+
+ return tot;
+ }
+
+ /* python code to generate ntzTable
+ def ntz(val):
+ if val==0: return 8
+ i=0
+ while (val&0x01)==0:
+ i = i+1
+ val >>= 1
+ return i
+ print ','.join([ str(ntz(i)) for i in range(256) ])
+ ***/
+
+ /// <summary>table of number of trailing zeros in a byte </summary>
+ //
+ public static readonly byte[] ntzTable = new byte[]
+ {
+ 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1,
+ 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0,
+ 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2,
+ 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0,
+ 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1,
+ 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0,
+ 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0,
+ 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1,
+ 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0,
+ 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,
+ 0, 1, 0
+ };
+
+
+ /// <summary>Returns number of trailing zeros in a 64 bit long value. </summary>
+ public static int Ntz(long val)
+ {
+ // A full binary search to determine the low byte was slower than
+ // a linear search for nextSetBit(). This is most likely because
+ // the implementation of nextSetBit() shifts bits to the right, increasing
+ // the probability that the first non-zero byte is in the rhs.
+ //
+ // This implementation does a single binary search at the top level only
+ // so that all other bit shifting can be done on ints instead of longs to
+ // remain friendly to 32 bit architectures. In addition, the case of a
+ // non-zero first byte is checked for first because it is the most common
+ // in dense bit arrays.
+
+ int lower = (int) val;
+ int lowByte = lower & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte];
+
+ if (lower != 0)
+ {
+ lowByte = (Number.URShift(lower, 8)) & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte] + 8;
+ lowByte = (Number.URShift(lower, 16)) & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte] + 16;
+ // no need to mask off low byte for the last byte in the 32 bit word
+ // no need to check for zero on the last byte either.
+ return ntzTable[Number.URShift(lower, 24)] + 24;
+ }
+ else
+ {
+ // grab upper 32 bits
+ int upper = (int) (val >> 32);
+ lowByte = upper & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte] + 32;
+ lowByte = (Number.URShift(upper, 8)) & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte] + 40;
+ lowByte = (Number.URShift(upper, 16)) & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte] + 48;
+ // no need to mask off low byte for the last byte in the 32 bit word
+ // no need to check for zero on the last byte either.
+ return ntzTable[Number.URShift(upper, 24)] + 56;
+ }
+ }
+
+ /// <summary>Returns number of trailing zeros in a 32 bit int value. </summary>
+ public static int Ntz(int val)
+ {
+ // This implementation does a single binary search at the top level only.
+ // In addition, the case of a non-zero first byte is checked for first
+ // because it is the most common in dense bit arrays.
+
+ int lowByte = val & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte];
+ lowByte = (Number.URShift(val, 8)) & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte] + 8;
+ lowByte = (Number.URShift(val, 16)) & 0xff;
+ if (lowByte != 0)
+ return ntzTable[lowByte] + 16;
+ // no need to mask off low byte for the last byte.
+ // no need to check for zero on the last byte either.
+ return ntzTable[Number.URShift(val, 24)] + 24;
+ }
+
+ /// <summary>returns 0 based index of first set bit
+ /// (only works for x!=0)
+ /// <br/> This is an alternate implementation of ntz()
+ /// </summary>
+ public static int Ntz2(long x)
+ {
+ int n = 0;
+ int y = (int) x;
+ if (y == 0)
+ {
+ n += 32; y = (int) (Number.URShift(x, 32));
+ } // the only 64 bit shift necessary
+ if ((y & 0x0000FFFF) == 0)
+ {
+ n += 16; y = Number.URShift(y, 16);
+ }
+ if ((y & 0x000000FF) == 0)
+ {
+ n += 8; y = Number.URShift(y, 8);
+ }
+ return (ntzTable[y & 0xff]) + n;
+ }
+
+ /// <summary>returns 0 based index of first set bit
+ /// <br/> This is an alternate implementation of ntz()
+ /// </summary>
+ public static int Ntz3(long x)
+ {
+ // another implementation taken from Hackers Delight, extended to 64 bits
+ // and converted to Java.
+ // Many 32 bit ntz algorithms are at http://www.hackersdelight.org/HDcode/ntz.cc
+ int n = 1;
+
+ // do the first step as a long, all others as ints.
+ int y = (int) x;
+ if (y == 0)
+ {
+ n += 32; y = (int) (Number.URShift(x, 32));
+ }
+ if ((y & 0x0000FFFF) == 0)
+ {
+ n += 16; y = Number.URShift(y, 16);
+ }
+ if ((y & 0x000000FF) == 0)
+ {
+ n += 8; y = Number.URShift(y, 8);
+ }
+ if ((y & 0x0000000F) == 0)
+ {
+ n += 4; y = Number.URShift(y, 4);
+ }
+ if ((y & 0x00000003) == 0)
+ {
+ n += 2; y = Number.URShift(y, 2);
+ }
+ return n - (y & 1);
+ }
+
+
+ /// <summary>returns true if v is a power of two or zero</summary>
+ public static bool IsPowerOfTwo(int v)
+ {
+ return ((v & (v - 1)) == 0);
+ }
+
+ /// <summary>returns true if v is a power of two or zero</summary>
+ public static bool IsPowerOfTwo(long v)
+ {
+ return ((v & (v - 1)) == 0);
+ }
+
+ /// <summary>returns the next highest power of two, or the current value if it's already a power of two or zero</summary>
+ public static int NextHighestPowerOfTwo(int v)
+ {
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v++;
+ return v;
+ }
+
+ /// <summary>returns the next highest power of two, or the current value if it's already a power of two or zero</summary>
+ public static long NextHighestPowerOfTwo(long v)
+ {
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v |= v >> 32;
+ v++;
+ return v;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/BitVector.cs b/src/core/Util/BitVector.cs
new file mode 100644
index 0000000..17b1212
--- /dev/null
+++ b/src/core/Util/BitVector.cs
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary>Optimized implementation of a vector of bits. This is more-or-less like
+ /// java.util.BitSet, but also includes the following:
+ /// <list type="bullet">
+ /// <item>a count() method, which efficiently computes the number of one bits;</item>
+ /// <item>optimized read from and write to disk;</item>
+ /// <item>inlinable get() method;</item>
+ /// <item>store and load, as bit set or d-gaps, depending on sparseness;</item>
+ /// </list>
+ /// </summary>
+ public sealed class BitVector : ICloneable
+ {
+
+ private byte[] bits;
+ private int size;
+ private int count;
+
+ /// <summary>Constructs a vector capable of holding <c>n</c> bits. </summary>
+ public BitVector(int n)
+ {
+ size = n;
+ bits = new byte[(size >> 3) + 1];
+ count = 0;
+ }
+
+ internal BitVector(byte[] bits, int size)
+ {
+ this.bits = bits;
+ this.size = size;
+ count = -1;
+ }
+
+ public System.Object Clone()
+ {
+ byte[] copyBits = new byte[bits.Length];
+ Array.Copy(bits, 0, copyBits, 0, bits.Length);
+ BitVector clone = new BitVector(copyBits, size);
+ clone.count = count;
+ return clone;
+ }
+
+ /// <summary>Sets the value of <c>bit</c> to one. </summary>
+ public void Set(int bit)
+ {
+ if (bit >= size)
+ {
+ throw new System. IndexOutOfRangeException("Index of bound " + bit);
+ }
+ bits[bit >> 3] |= (byte) (1 << (bit & 7));
+ count = - 1;
+ }
+
+ /// <summary>Sets the value of <c>bit</c> to true, and
+ /// returns true if bit was already set
+ /// </summary>
+ public bool GetAndSet(int bit)
+ {
+ if (bit >= size)
+ {
+ throw new System. IndexOutOfRangeException("Index of bound " + bit);
+ }
+ int pos = bit >> 3;
+ int v = bits[pos];
+ int flag = 1 << (bit & 7);
+ if ((flag & v) != 0)
+ return true;
+ else
+ {
+ bits[pos] = (byte) (v | flag);
+ if (count != - 1)
+ count++;
+ return false;
+ }
+ }
+
+ /// <summary>Sets the value of <c>bit</c> to zero. </summary>
+ public void Clear(int bit)
+ {
+ if (bit >= size)
+ {
+ throw new System.IndexOutOfRangeException("Index of bound " + bit);
+ }
+ bits[bit >> 3] &= (byte) (~ (1 << (bit & 7)));
+ count = - 1;
+ }
+
+ /// <summary>Returns <c>true</c> if <c>bit</c> is one and
+ /// <c>false</c> if it is zero.
+ /// </summary>
+ public bool Get(int bit)
+ {
+ System.Diagnostics.Debug.Assert(bit >= 0 && bit < size, "bit " + bit + " is out of bounds 0.." +(size - 1));
+ return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
+ }
+
+ /// <summary>Returns the number of bits in this vector. This is also one greater than
+ /// the number of the largest valid bit number.
+ /// </summary>
+ public int Size()
+ {
+ return size;
+ }
+
+ /// <summary>Returns the total number of one bits in this vector. This is efficiently
+ /// computed and cached, so that, if the vector is not changed, no
+ /// recomputation is done for repeated calls.
+ /// </summary>
+ public int Count()
+ {
+ // if the vector has been modified
+ if (count == - 1)
+ {
+ int c = 0;
+ int end = bits.Length;
+ for (int i = 0; i < end; i++)
+ c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
+ count = c;
+ }
+ return count;
+ }
+
+ /// <summary>
+ /// For testing
+ /// </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public int GetRecomputedCount()
+ {
+ int c = 0;
+ int end = bits.Length;
+ for (int i = 0; i < end; i++)
+ c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
+ return c;
+ }
+
+ private static readonly byte[] BYTE_COUNTS = new byte[]{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+
+ /// <summary>Writes this vector to the file <c>name</c> in Directory
+ /// <c>d</c>, in a format that can be read by the constructor
+ /// <see cref="BitVector(Directory, String)" />.
+ /// </summary>
+ public void Write(Directory d, System.String name)
+ {
+ IndexOutput output = d.CreateOutput(name);
+ try
+ {
+ if (IsSparse())
+ {
+ WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps.
+ }
+ else
+ {
+ WriteBits(output);
+ }
+ }
+ finally
+ {
+ output.Close();
+ }
+ }
+
+ /// <summary>Write as a bit set </summary>
+ private void WriteBits(IndexOutput output)
+ {
+ output.WriteInt(Size()); // write size
+ output.WriteInt(Count()); // write count
+ output.WriteBytes(bits, bits.Length);
+ }
+
+ /// <summary>Write as a d-gaps list </summary>
+ private void WriteDgaps(IndexOutput output)
+ {
+ output.WriteInt(- 1); // mark using d-gaps
+ output.WriteInt(Size()); // write size
+ output.WriteInt(Count()); // write count
+ int last = 0;
+ int n = Count();
+ int m = bits.Length;
+ for (int i = 0; i < m && n > 0; i++)
+ {
+ if (bits[i] != 0)
+ {
+ output.WriteVInt(i - last);
+ output.WriteByte(bits[i]);
+ last = i;
+ n -= BYTE_COUNTS[bits[i] & 0xFF];
+ }
+ }
+ }
+
+ /// <summary>Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. </summary>
+ private bool IsSparse()
+ {
+ // note: order of comparisons below set to favor smaller values (no binary range search.)
+ // note: adding 4 because we start with ((int) -1) to indicate d-gaps format.
+ // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore
+ // multiplying count by (8+8) or (8+16) or (8+24) etc.:
+ // - first 8 for writing bits[i] (1 byte vs. 1 bit), and
+ // - second part for writing the byte-number d-gap as vint.
+ // note: factor is for read/write of byte-arrays being faster than vints.
+ int factor = 10;
+ if (bits.Length < (1 << 7))
+ return factor * (4 + (8 + 8) * Count()) < Size();
+ if (bits.Length < (1 << 14))
+ return factor * (4 + (8 + 16) * Count()) < Size();
+ if (bits.Length < (1 << 21))
+ return factor * (4 + (8 + 24) * Count()) < Size();
+ if (bits.Length < (1 << 28))
+ return factor * (4 + (8 + 32) * Count()) < Size();
+ return factor * (4 + (8 + 40) * Count()) < Size();
+ }
+
+ /// <summary>Constructs a bit vector from the file <c>name</c> in Directory
+ /// <c>d</c>, as written by the <see cref="Write" /> method.
+ /// </summary>
+ public BitVector(Directory d, System.String name)
+ {
+ IndexInput input = d.OpenInput(name);
+ try
+ {
+ size = input.ReadInt(); // read size
+ if (size == - 1)
+ {
+ ReadDgaps(input);
+ }
+ else
+ {
+ ReadBits(input);
+ }
+ }
+ finally
+ {
+ input.Close();
+ }
+ }
+
+ /// <summary>Read as a bit set </summary>
+ private void ReadBits(IndexInput input)
+ {
+ count = input.ReadInt(); // read count
+ bits = new byte[(size >> 3) + 1]; // allocate bits
+ input.ReadBytes(bits, 0, bits.Length);
+ }
+
+ /// <summary>read as a d-gaps list </summary>
+ private void ReadDgaps(IndexInput input)
+ {
+ size = input.ReadInt(); // (re)read size
+ count = input.ReadInt(); // read count
+ bits = new byte[(size >> 3) + 1]; // allocate bits
+ int last = 0;
+ int n = Count();
+ while (n > 0)
+ {
+ last += input.ReadVInt();
+ bits[last] = input.ReadByte();
+ n -= BYTE_COUNTS[bits[last] & 0xFF];
+ }
+ }
+
+ /// <summary> Retrieve a subset of this BitVector.
+ ///
+ /// </summary>
+ /// <param name="start">starting index, inclusive
+ /// </param>
+ /// <param name="end">ending index, exclusive
+ /// </param>
+ /// <returns> subset
+ /// </returns>
+ public BitVector Subset(int start, int end)
+ {
+ if (start < 0 || end > Size() || end < start)
+ throw new System.IndexOutOfRangeException();
+ // Special case -- return empty vector is start == end
+ if (end == start)
+ return new BitVector(0);
+ byte[] bits = new byte[(Number.URShift((end - start - 1), 3)) + 1];
+ int s = Number.URShift(start, 3);
+ for (int i = 0; i < bits.Length; i++)
+ {
+ int cur = 0xFF & this.bits[i + s];
+ int next = i + s + 1 >= this.bits.Length?0:0xFF & this.bits[i + s + 1];
+ bits[i] = (byte) ((Number.URShift(cur, (start & 7))) | ((next << (8 - (start & 7)))));
+ }
+ int bitsToClear = (bits.Length * 8 - (end - start)) % 8;
+ bits[bits.Length - 1] &= (byte) (~ (0xFF << (8 - bitsToClear)));
+ return new BitVector(bits, end - start);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/Cache/Cache.cs b/src/core/Util/Cache/Cache.cs
new file mode 100644
index 0000000..34ded72
--- /dev/null
+++ b/src/core/Util/Cache/Cache.cs
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util.Cache
+{
+
+
+ /// <summary> Base class for cache implementations.</summary>
+ public abstract class Cache<TKey, TValue> : IDisposable
+ {
+
+ /// <summary> Simple Cache wrapper that synchronizes all
+ /// calls that access the cache.
+ /// </summary>
+ internal class SynchronizedCache_Renamed_Class : Cache<TKey, TValue>
+ {
+ internal System.Object mutex;
+ internal Cache<TKey,TValue> cache;
+
+ internal SynchronizedCache_Renamed_Class(Cache<TKey, TValue> cache)
+ {
+ this.cache = cache;
+ this.mutex = this;
+ }
+
+ internal SynchronizedCache_Renamed_Class(Cache<TKey, TValue> cache, System.Object mutex)
+ {
+ this.cache = cache;
+ this.mutex = mutex;
+ }
+
+ public override void Put(TKey key, TValue value_Renamed)
+ {
+ lock (mutex)
+ {
+ cache.Put(key, value_Renamed);
+ }
+ }
+
+ public override TValue Get(System.Object key)
+ {
+ lock (mutex)
+ {
+ return cache.Get(key);
+ }
+ }
+
+ public override bool ContainsKey(System.Object key)
+ {
+ lock (mutex)
+ {
+ return cache.ContainsKey(key);
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ lock (mutex)
+ {
+ cache.Dispose();
+ }
+ }
+
+ internal override Cache<TKey,TValue> GetSynchronizedCache()
+ {
+ return this;
+ }
+ }
+
+ /// <summary> Returns a thread-safe cache backed by the specified cache.
+ /// In order to guarantee thread-safety, all access to the backed cache must
+ /// be accomplished through the returned cache.
+ /// </summary>
+ public static Cache<TKey, TValue> SynchronizedCache(Cache<TKey, TValue> cache)
+ {
+ return cache.GetSynchronizedCache();
+ }
+
+ /// <summary> Called by <see cref="SynchronizedCache(Cache{TKey,TValue})" />. This method
+ /// returns a <see cref="SynchronizedCache" /> instance that wraps
+ /// this instance by default and can be overridden to return
+ /// e. g. subclasses of <see cref="SynchronizedCache" /> or this
+ /// in case this cache is already synchronized.
+ /// </summary>
+ internal virtual Cache<TKey, TValue> GetSynchronizedCache()
+ {
+ return new SynchronizedCache_Renamed_Class(this);
+ }
+
+ /// <summary> Puts a (key, value)-pair into the cache. </summary>
+ public abstract void Put(TKey key, TValue value_Renamed);
+
+ /// <summary> Returns the value for the given key. </summary>
+ public abstract TValue Get(System.Object key);
+
+ /// <summary> Returns whether the given key is in this cache. </summary>
+ public abstract bool ContainsKey(System.Object key);
+
+ /// <summary> Closes the cache.</summary>
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/Cache/SimpleLRUCache.cs b/src/core/Util/Cache/SimpleLRUCache.cs
new file mode 100644
index 0000000..2cde655
--- /dev/null
+++ b/src/core/Util/Cache/SimpleLRUCache.cs
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Util.Cache
+{
+ public class SimpleLRUCache<TKey, TValue> : SimpleMapCache<TKey, TValue>
+ {
+ /// <summary>
+ /// The maximum number of items to cache.
+ /// </summary>
+ private int capacity;
+
+ /// <summary>
+ /// The list to efficiently maintain the LRU state.
+ /// </summary>
+ private LinkedList<ListValueEntry<TKey, TValue>> list;
+
+ /// <summary>
+ /// The dictionary to hash into any location in the list.
+ /// </summary>
+ private Dictionary<TKey, LinkedListNode<ListValueEntry<TKey, TValue>>> lookup;
+
+ /// <summary>
+ /// The node instance to use/re-use when adding an item to the cache.
+ /// </summary>
+ private LinkedListNode<ListValueEntry<TKey, TValue>> openNode;
+
+ public SimpleLRUCache(int Capacity)
+ {
+ this.capacity = Capacity;
+ this.list = new LinkedList<ListValueEntry<TKey, TValue>>();
+ this.lookup = new Dictionary<TKey, LinkedListNode<ListValueEntry<TKey, TValue>>>(Capacity + 1);
+ this.openNode = new LinkedListNode<ListValueEntry<TKey, TValue>>(new ListValueEntry<TKey, TValue>(default(TKey), default(TValue)));
+ }
+
+ public override void Put(TKey Key, TValue Value)
+ {
+ if (Get(Key) == null)
+ {
+ this.openNode.Value.ItemKey = Key;
+ this.openNode.Value.ItemValue = Value;
+ this.list.AddFirst(this.openNode);
+ this.lookup.Add(Key, this.openNode);
+
+ if (this.list.Count > this.capacity)
+ {
+ // last node is to be removed and saved for the next addition to the cache
+ this.openNode = this.list.Last;
+
+ // remove from list & dictionary
+ this.list.RemoveLast();
+ this.lookup.Remove(this.openNode.Value.ItemKey);
+ }
+ else
+ {
+ // still filling the cache, create a new open node for the next time
+ this.openNode = new LinkedListNode<ListValueEntry<TKey, TValue>>(new ListValueEntry<TKey, TValue>(default(TKey), default(TValue)));
+ }
+ }
+ }
+
+ public override TValue Get(object Key)
+ {
+ LinkedListNode<ListValueEntry<TKey, TValue>> node = null;
+ if (!this.lookup.TryGetValue((TKey)Key, out node))
+ {
+ return default(TValue);
+ }
+ this.list.Remove(node);
+ this.list.AddFirst(node);
+ return node.Value.ItemValue;
+ }
+
+ /// <summary>
+ /// Container to hold the key and value to aid in removal from
+ /// the <see cref="lookup"/> dictionary when an item is removed from cache.
+ /// </summary>
+ class ListValueEntry<K, V> where K : TKey
+ where V : TValue
+ {
+ internal V ItemValue;
+ internal K ItemKey;
+
+ internal ListValueEntry(K key, V value)
+ {
+ this.ItemKey = key;
+ this.ItemValue = value;
+ }
+ }
+ }
+
+
+#region NOT_USED_FROM_JLCA_PORT
+/*
+
+ //
+ // This is the oringal port as it was generated via JLCA.
+ // This code is not used. It's here for referance only.
+ //
+
+
+ /// <summary> Simple LRU cache implementation that uses a LinkedHashMap.
+ /// This cache is not synchronized, use <see cref="Cache.SynchronizedCache(Cache)" />
+ /// if needed.
+ ///
+ /// </summary>
+ public class SimpleLRUCache:SimpleMapCache
+ {
+ private class AnonymousClassLinkedHashMap : LinkedHashMap
+ {
+ public AnonymousClassLinkedHashMap(SimpleLRUCache enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(SimpleLRUCache enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SimpleLRUCache enclosingInstance;
+ public SimpleLRUCache Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ protected internal virtual bool RemoveEldestEntry(System.Collections.DictionaryEntry eldest)
+ {
+ return size() > Enclosing_Instance.cacheSize;
+ }
+ }
+ private const float LOADFACTOR = 0.75f;
+
+ private int cacheSize;
+
+ /// <summary> Creates a last-recently-used cache with the specified size. </summary>
+ public SimpleLRUCache(int cacheSize):base(null)
+ {
+ this.cacheSize = cacheSize;
+ int capacity = (int) System.Math.Ceiling(cacheSize / LOADFACTOR) + 1;
+
+ base.map = new AnonymousClassLinkedHashMap(this, capacity, LOADFACTOR, true);
+ }
+ }
+*/
+#endregion
+
+} \ No newline at end of file
diff --git a/src/core/Util/Cache/SimpleMapCache.cs b/src/core/Util/Cache/SimpleMapCache.cs
new file mode 100644
index 0000000..b424bd1
--- /dev/null
+++ b/src/core/Util/Cache/SimpleMapCache.cs
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Util.Cache
+{
+
+ /// <summary> Simple cache implementation that uses a HashMap to store (key, value) pairs.
+ /// This cache is not synchronized, use <see cref="Cache{TKey, TValue}.SynchronizedCache(Cache{TKey, TValue})" />
+ /// if needed.
+ /// </summary>
+ public class SimpleMapCache<TKey, TValue> : Cache<TKey, TValue>
+ {
+ internal System.Collections.Generic.Dictionary<TKey, TValue> map;
+
+ public SimpleMapCache()
+ : this(new System.Collections.Generic.Dictionary<TKey, TValue>())
+ {
+ }
+
+ public SimpleMapCache(System.Collections.Generic.Dictionary<TKey, TValue> map)
+ {
+ this.map = map;
+ }
+
+ public override TValue Get(System.Object key)
+ {
+ return map[(TKey)key];
+ }
+
+ public override void Put(TKey key, TValue value_Renamed)
+ {
+ map[key] = value_Renamed;
+ }
+
+ public override bool ContainsKey(System.Object key)
+ {
+ return map.ContainsKey((TKey)key);
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // do nothing
+ }
+
+ /// <summary> Returns a Set containing all keys in this cache.</summary>
+ public virtual System.Collections.Generic.HashSet<TKey> KeySet()
+ {
+ return new HashSet<TKey>(map.Keys);
+ }
+
+ internal override Cache<TKey, TValue> GetSynchronizedCache()
+ {
+ return new SynchronizedSimpleMapCache(this);
+ }
+
+ // Why does does this use both inheritance and composition?
+ private class SynchronizedSimpleMapCache : SimpleMapCache<TKey, TValue>
+ {
+ private System.Object mutex;
+ private SimpleMapCache<TKey, TValue> cache;
+
+ private bool isDisposed;
+
+ internal SynchronizedSimpleMapCache(SimpleMapCache<TKey, TValue> cache)
+ {
+ this.cache = cache;
+ this.mutex = this;
+ }
+
+ public override void Put(TKey key, TValue value_Renamed)
+ {
+ lock (mutex)
+ {
+ cache.Put(key, value_Renamed);
+ }
+ }
+
+ public override TValue Get(System.Object key)
+ {
+ lock (mutex)
+ {
+ return cache.Get(key);
+ }
+ }
+
+ public override bool ContainsKey(System.Object key)
+ {
+ lock (mutex)
+ {
+ return cache.ContainsKey(key);
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ lock (mutex)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ cache.Dispose(disposing);
+ }
+
+ isDisposed = true;
+ base.Dispose(disposing);
+ }
+ }
+
+ public override HashSet<TKey> KeySet()
+ {
+ lock (mutex)
+ {
+ return cache.KeySet();
+ }
+ }
+
+ internal override Cache<TKey, TValue> GetSynchronizedCache()
+ {
+ return this;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/CloseableThreadLocal.cs b/src/core/Util/CloseableThreadLocal.cs
new file mode 100644
index 0000000..84e16e8
--- /dev/null
+++ b/src/core/Util/CloseableThreadLocal.cs
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using Lucene.Net.Support;
+
+#if NET35
+using Lucene.Net.Support.Compatibility;
+#endif
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary>Java's builtin ThreadLocal has a serious flaw:
+ /// it can take an arbitrarily long amount of time to
+ /// dereference the things you had stored in it, even once the
+ /// ThreadLocal instance itself is no longer referenced.
+ /// This is because there is single, master map stored for
+ /// each thread, which all ThreadLocals share, and that
+ /// master map only periodically purges "stale" entries.
+ ///
+ /// While not technically a memory leak, because eventually
+ /// the memory will be reclaimed, it can take a long time
+ /// and you can easily hit OutOfMemoryError because from the
+ /// GC's standpoint the stale entries are not reclaimaible.
+ ///
+ /// This class works around that, by only enrolling
+ /// WeakReference values into the ThreadLocal, and
+ /// separately holding a hard reference to each stored
+ /// value. When you call <see cref="Close" />, these hard
+ /// references are cleared and then GC is freely able to
+ /// reclaim space by objects stored in it.
+ /// </summary>
+ ///
+
+ public class CloseableThreadLocal<T> : IDisposable where T : class
+ {
+ // NOTE: Java has WeakReference<T>. This isn't available for .Net until 4.5 (according to msdn docs)
+ private ThreadLocal<WeakReference> t = new ThreadLocal<WeakReference>();
+
+ private IDictionary<Thread, T> hardRefs = new HashMap<Thread, T>();
+
+ private bool isDisposed;
+
+ public virtual T InitialValue()
+ {
+ return null;
+ }
+
+ public virtual T Get()
+ {
+ WeakReference weakRef = t.Get();
+ if (weakRef == null)
+ {
+ T iv = InitialValue();
+ if (iv != null)
+ {
+ Set(iv);
+ return iv;
+ }
+ else
+ return null;
+ }
+ else
+ {
+ return (T)weakRef.Get();
+ }
+ }
+
+ public virtual void Set(T @object)
+ {
+ //+-- For Debuging
+ if (CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler == true)
+ {
+ lock (CloseableThreadLocalProfiler.Instances)
+ {
+ CloseableThreadLocalProfiler.Instances.Add(new WeakReference(@object));
+ }
+ }
+ //+--
+
+ t.Set(new WeakReference(@object));
+
+ lock (hardRefs)
+ {
+ //hardRefs[Thread.CurrentThread] = @object;
+ hardRefs.Add(Thread.CurrentThread, @object);
+
+ // Java's iterator can remove, .NET's cannot
+ var threadsToRemove = hardRefs.Keys.Where(thread => !thread.IsAlive).ToList();
+ // Purge dead threads
+ foreach (var thread in threadsToRemove)
+ {
+ hardRefs.Remove(thread);
+ }
+ }
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public virtual void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ // Clear the hard refs; then, the only remaining refs to
+ // all values we were storing are weak (unless somewhere
+ // else is still using them) and so GC may reclaim them:
+ hardRefs = null;
+ // Take care of the current thread right now; others will be
+ // taken care of via the WeakReferences.
+ if (t != null)
+ {
+ t.Remove();
+ }
+ t = null;
+ }
+
+ isDisposed = true;
+ }
+ }
+
+ internal static class CloseableThreadLocalExtensions
+ {
+ public static void Set<T>(this ThreadLocal<T> t, T val)
+ {
+ t.Value = val;
+ }
+
+ public static T Get<T>(this ThreadLocal<T> t)
+ {
+ return t.Value;
+ }
+
+ public static void Remove<T>(this ThreadLocal<T> t)
+ {
+ t.Dispose();
+ }
+
+ public static object Get(this WeakReference w)
+ {
+ return w.Target;
+ }
+ }
+
+ //// {{DIGY}}
+ //// To compile against Framework 2.0
+ //// Uncomment below class
+ //public class ThreadLocal<T> : IDisposable
+ //{
+ // [ThreadStatic]
+ // static SupportClass.WeakHashTable slots;
+
+ // void Init()
+ // {
+ // if (slots == null) slots = new SupportClass.WeakHashTable();
+ // }
+
+ // public T Value
+ // {
+ // set
+ // {
+ // Init();
+ // slots.Add(this, value);
+ // }
+ // get
+ // {
+ // Init();
+ // return (T)slots[this];
+ // }
+ // }
+
+ // public void Dispose()
+ // {
+ // if (slots != null) slots.Remove(this);
+ // }
+ //}
+}
diff --git a/src/core/Util/Constants.cs b/src/core/Util/Constants.cs
new file mode 100644
index 0000000..88761d2
--- /dev/null
+++ b/src/core/Util/Constants.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using LucenePackage = Lucene.Net.LucenePackage;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Some useful constants.</summary>
+ public sealed class Constants
+ {
+ private Constants()
+ {
+ } // can't construct
+
+ /// <summary>The value of <tt>System.getProperty("java.version")</tt>. *</summary>
+ public static readonly System.String JAVA_VERSION = AppSettings.Get("java.version", "");
+ /// <summary>True iff this is Java version 1.1. </summary>
+ public static readonly bool JAVA_1_1 = JAVA_VERSION.StartsWith("1.1.");
+ /// <summary>True iff this is Java version 1.2. </summary>
+ public static readonly bool JAVA_1_2 = JAVA_VERSION.StartsWith("1.2.");
+ /// <summary>True iff this is Java version 1.3. </summary>
+ public static readonly bool JAVA_1_3 = JAVA_VERSION.StartsWith("1.3.");
+
+ /// <summary>The value of <tt>System.getProperty("os.name")</tt>. *</summary>
+ public static readonly System.String OS_NAME = GetEnvironmentVariable("OS","Windows_NT") ?? "Linux";
+ /// <summary>True iff running on Linux. </summary>
+ public static readonly bool LINUX = OS_NAME.StartsWith("Linux");
+ /// <summary>True iff running on Windows. </summary>
+ public static readonly bool WINDOWS = OS_NAME.StartsWith("Windows");
+ /// <summary>True iff running on SunOS. </summary>
+ public static readonly bool SUN_OS = OS_NAME.StartsWith("SunOS");
+
+ public static readonly System.String OS_ARCH = GetEnvironmentVariable("PROCESSOR_ARCHITECTURE","x86");
+ public static readonly System.String OS_VERSION = GetEnvironmentVariable("OS_VERSION", "?");
+ public static readonly System.String JAVA_VENDOR = AppSettings.Get("java.vendor", "");
+
+ // NOTE: this logic may not be correct; if you know of a
+ // more reliable approach please raise it on java-dev!
+ public static bool JRE_IS_64BIT;
+
+ // this method prevents inlining the final version constant in compiled
+ // classes,
+ // see: http://www.javaworld.com/community/node/3400
+ private static System.String Ident(System.String s)
+ {
+ return s.ToString();
+ }
+
+ public static readonly System.String LUCENE_MAIN_VERSION = Ident("3.0.3");
+
+ public static System.String LUCENE_VERSION="8.8.8.8";
+ static Constants()
+ {
+ if (IntPtr.Size == 8)
+ {
+ JRE_IS_64BIT = true;// 64 bit machine
+ }
+ else if (IntPtr.Size == 4)
+ {
+ JRE_IS_64BIT = false;// 32 bit machine
+ }
+
+ try
+ {
+ LUCENE_VERSION = System.Reflection.Assembly.GetExecutingAssembly().GetName().Version.ToString();
+ }
+ catch (System.Security.SecurityException) //Ignore in medium trust.
+ {
+ }
+
+ }
+
+ #region MEDIUM-TRUST Support
+ static string GetEnvironmentVariable(string variable, string defaultValueOnSecurityException)
+ {
+ try
+ {
+ if (variable == "OS_VERSION") return System.Environment.OSVersion.ToString();
+
+ return System.Environment.GetEnvironmentVariable(variable);
+ }
+ catch (System.Security.SecurityException)
+ {
+ return defaultValueOnSecurityException;
+ }
+
+ }
+ #endregion
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/DocIdBitSet.cs b/src/core/Util/DocIdBitSet.cs
new file mode 100644
index 0000000..1601ad2
--- /dev/null
+++ b/src/core/Util/DocIdBitSet.cs
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using Lucene.Net.Support;
+using DocIdSet = Lucene.Net.Search.DocIdSet;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+ /// <summary>Simple DocIdSet and DocIdSetIterator backed by a BitSet </summary>
+ public class DocIdBitSet:DocIdSet
+ {
+ private System.Collections.BitArray bitSet;
+
+ public DocIdBitSet(System.Collections.BitArray bitSet)
+ {
+ this.bitSet = bitSet;
+ }
+
+ public override DocIdSetIterator Iterator()
+ {
+ return new DocIdBitSetIterator(bitSet);
+ }
+
+ /// <summary>This DocIdSet implementation is cacheable.</summary>
+ public override bool IsCacheable
+ {
+ get { return true; }
+ }
+
+ /// <summary> Returns the underlying BitSet. </summary>
+ public virtual BitArray BitSet
+ {
+ get { return this.bitSet; }
+ }
+
+ private class DocIdBitSetIterator:DocIdSetIterator
+ {
+ private int docId;
+ private System.Collections.BitArray bitSet;
+
+ internal DocIdBitSetIterator(System.Collections.BitArray bitSet)
+ {
+ this.bitSet = bitSet;
+ this.docId = - 1;
+ }
+
+ public override int DocID()
+ {
+ return docId;
+ }
+
+ public override int NextDoc()
+ {
+ // (docId + 1) on next line requires -1 initial value for docNr:
+ int d = BitSetSupport.NextSetBit(bitSet, docId + 1);
+ // -1 returned by BitSet.nextSetBit() when exhausted
+ docId = d == - 1?NO_MORE_DOCS:d;
+ return docId;
+ }
+
+ public override int Advance(int target)
+ {
+ int d = BitSetSupport.NextSetBit(bitSet, target);
+ // -1 returned by BitSet.nextSetBit() when exhausted
+ docId = d == - 1?NO_MORE_DOCS:d;
+ return docId;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/FieldCacheSanityChecker.cs b/src/core/Util/FieldCacheSanityChecker.cs
new file mode 100644
index 0000000..7456969
--- /dev/null
+++ b/src/core/Util/FieldCacheSanityChecker.cs
@@ -0,0 +1,439 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+using IndexReader = Lucene.Net.Index.IndexReader;
+using FieldCache = Lucene.Net.Search.FieldCache;
+using CacheEntry = Lucene.Net.Search.CacheEntry;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Provides methods for sanity checking that entries in the FieldCache
+ /// are not wasteful or inconsistent.
+ /// <p/>
+ /// <p/>
+ /// Lucene 2.9 Introduced numerous enhancements into how the FieldCache
+ /// is used by the low levels of Lucene searching (for Sorting and
+ /// ValueSourceQueries) to improve both the speed for Sorting, as well
+ /// as reopening of IndexReaders. But these changes have shifted the
+ /// usage of FieldCache from "top level" IndexReaders (frequently a
+ /// MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
+ /// As a result, existing applications that directly access the FieldCache
+ /// may find RAM usage increase significantly when upgrading to 2.9 or
+ /// Later. This class provides an API for these applications (or their
+ /// Unit tests) to check at run time if the FieldCache contains "insane"
+ /// usages of the FieldCache.
+ /// <p/>
+ /// <p/>
+ /// <b>EXPERIMENTAL API:</b> This API is considered extremely advanced and
+ /// experimental. It may be removed or altered w/o warning in future releases
+ /// of Lucene.
+ /// <p/>
+ /// </summary>
+ /// <seealso cref="FieldCache">
+ /// </seealso>
+ /// <seealso cref="FieldCacheSanityChecker.Insanity">
+ /// </seealso>
+ /// <seealso cref="FieldCacheSanityChecker.InsanityType">
+ /// </seealso>
+ public sealed class FieldCacheSanityChecker
+ {
+
+ private RamUsageEstimator ramCalc = null;
+ public FieldCacheSanityChecker()
+ {
+ /* NOOP */
+ }
+ /// <summary> If set, will be used to estimate size for all CacheEntry objects
+ /// dealt with.
+ /// </summary>
+ public void SetRamUsageEstimator(RamUsageEstimator r)
+ {
+ ramCalc = r;
+ }
+
+
+ /// <summary> Quick and dirty convenience method</summary>
+ /// <seealso cref="Check">
+ /// </seealso>
+ public static Insanity[] CheckSanity(FieldCache cache)
+ {
+ return CheckSanity(cache.GetCacheEntries());
+ }
+
+ /// <summary> Quick and dirty convenience method that instantiates an instance with
+ /// "good defaults" and uses it to test the CacheEntrys
+ /// </summary>
+ /// <seealso cref="Check">
+ /// </seealso>
+ public static Insanity[] CheckSanity(params CacheEntry[] cacheEntries)
+ {
+ FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
+ // doesn't check for interned
+ sanityChecker.SetRamUsageEstimator(new RamUsageEstimator(false));
+ return sanityChecker.Check(cacheEntries);
+ }
+
+
+ /// <summary> Tests a CacheEntry[] for indication of "insane" cache usage.
+ /// <p/>
+ /// NOTE:FieldCache CreationPlaceholder objects are ignored.
+ /// (:TODO: is this a bad idea? are we masking a real problem?)
+ /// <p/>
+ /// </summary>
+ public Insanity[] Check(params CacheEntry[] cacheEntries)
+ {
+ if (null == cacheEntries || 0 == cacheEntries.Length)
+ return new Insanity[0];
+
+ if (null != ramCalc)
+ {
+ for (int i = 0; i < cacheEntries.Length; i++)
+ {
+ cacheEntries[i].EstimateSize(ramCalc);
+ }
+ }
+
+ // the indirect mapping lets MapOfSet dedup identical valIds for us
+ //
+ // maps the (valId) identityhashCode of cache values to
+ // sets of CacheEntry instances
+ MapOfSets<int,CacheEntry> valIdToItems = new MapOfSets<int,CacheEntry>(new Dictionary<int,HashSet<CacheEntry>>(17));
+ // maps ReaderField keys to Sets of ValueIds
+ MapOfSets<ReaderField,int> readerFieldToValIds = new MapOfSets<ReaderField,int>(new Dictionary<ReaderField,HashSet<int>>(17));
+ //
+
+ // any keys that we know result in more then one valId
+ HashSet<ReaderField> valMismatchKeys = new HashSet<ReaderField>();
+
+ // iterate over all the cacheEntries to get the mappings we'll need
+ for (int i = 0; i < cacheEntries.Length; i++)
+ {
+ CacheEntry item = cacheEntries[i];
+ System.Object val = item.Value;
+
+ if (val is Lucene.Net.Search.CreationPlaceholder)
+ continue;
+
+ ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName);
+
+ System.Int32 valId = val.GetHashCode();
+
+ // indirect mapping, so the MapOfSet will dedup identical valIds for us
+ valIdToItems.Put(valId, item);
+ if (1 < readerFieldToValIds.Put(rf, valId))
+ {
+ valMismatchKeys.Add(rf);
+ }
+ }
+
+ List<Insanity> insanity = new List<Insanity>(valMismatchKeys.Count * 3);
+
+ insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys));
+ insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds));
+
+ return insanity.ToArray();
+ }
+
+ /// <summary> Internal helper method used by check that iterates over
+ /// valMismatchKeys and generates a Collection of Insanity
+ /// instances accordingly. The MapOfSets are used to populate
+ /// the Insantiy objects.
+ /// </summary>
+ /// <seealso cref="InsanityType.VALUEMISMATCH">
+ /// </seealso>
+ private List<Insanity> CheckValueMismatch(MapOfSets<int,CacheEntry> valIdToItems,
+ MapOfSets<ReaderField,int> readerFieldToValIds,
+ HashSet<ReaderField> valMismatchKeys)
+ {
+
+ List<Insanity> insanity = new List<Insanity>(valMismatchKeys.Count * 3);
+
+ if (!(valMismatchKeys.Count == 0))
+ {
+ // we have multiple values for some ReaderFields
+
+ IDictionary<ReaderField,HashSet<int>> rfMap = readerFieldToValIds.Map;
+ IDictionary<int,HashSet<CacheEntry>> valMap = valIdToItems.Map;
+ foreach (ReaderField rf in valMismatchKeys)
+ {
+ List<CacheEntry> badEntries = new List<CacheEntry>(valMismatchKeys.Count * 2);
+ foreach (int val in rfMap[rf])
+ {
+ foreach (CacheEntry entry in valMap[val])
+ {
+ badEntries.Add(entry);
+ }
+ }
+
+ insanity.Add(new Insanity(InsanityType.VALUEMISMATCH, "Multiple distinct value objects for " + rf.ToString(), badEntries.ToArray()));
+ }
+ }
+ return insanity;
+ }
+
+ /// <summary> Internal helper method used by check that iterates over
+ /// the keys of readerFieldToValIds and generates a Collection
+ /// of Insanity instances whenever two (or more) ReaderField instances are
+ /// found that have an ancestery relationships.
+ ///
+ /// </summary>
+ /// <seealso cref="InsanityType.SUBREADER">
+ /// </seealso>
+ private List<Insanity> CheckSubreaders(MapOfSets<int,CacheEntry> valIdToItems,
+ MapOfSets<ReaderField,int> readerFieldToValIds)
+ {
+ List<Insanity> insanity = new List<Insanity>(23);
+
+ Dictionary<ReaderField, HashSet<ReaderField>> badChildren = new Dictionary<ReaderField, HashSet<ReaderField>>(17);
+ MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<ReaderField, ReaderField>(badChildren); // wrapper
+
+ IDictionary<int, HashSet<CacheEntry>> viToItemSets = valIdToItems.Map;
+ IDictionary<ReaderField, HashSet<int>> rfToValIdSets = readerFieldToValIds.Map;
+
+ HashSet<ReaderField> seen = new HashSet<ReaderField>();
+
+ foreach (ReaderField rf in rfToValIdSets.Keys)
+ {
+ if (seen.Contains(rf))
+ continue;
+
+ System.Collections.IList kids = GetAllDecendentReaderKeys(rf.readerKey);
+ foreach (Object kidKey in kids)
+ {
+ ReaderField kid = new ReaderField(kidKey, rf.fieldName);
+
+ if (badChildren.ContainsKey(kid))
+ {
+ // we've already process this kid as RF and found other problems
+ // track those problems as our own
+ badKids.Put(rf, kid);
+ badKids.PutAll(rf, badChildren[kid]);
+ badChildren.Remove(kid);
+ }
+ else if (rfToValIdSets.ContainsKey(kid))
+ {
+ // we have cache entries for the kid
+ badKids.Put(rf, kid);
+ }
+ seen.Add(kid);
+ }
+ seen.Add(rf);
+ }
+
+ // every mapping in badKids represents an Insanity
+ foreach (ReaderField parent in badChildren.Keys)
+ {
+ HashSet<ReaderField> kids = badChildren[parent];
+
+ List<CacheEntry> badEntries = new List<CacheEntry>(kids.Count * 2);
+
+ // put parent entr(ies) in first
+ {
+ foreach (int val in rfToValIdSets[parent])
+ {
+ badEntries.AddRange(viToItemSets[val]);
+ }
+ }
+
+ // now the entries for the descendants
+ foreach (ReaderField kid in kids)
+ {
+ foreach (int val in rfToValIdSets[kid])
+ {
+ badEntries.AddRange(viToItemSets[val]);
+ }
+ }
+
+ insanity.Add(new Insanity(InsanityType.SUBREADER, "Found caches for decendents of " + parent.ToString(), badEntries.ToArray()));
+ }
+
+ return insanity;
+ }
+
+ /// <summary> Checks if the seed is an IndexReader, and if so will walk
+ /// the hierarchy of subReaders building up a list of the objects
+ /// returned by obj.getFieldCacheKey()
+ /// </summary>
+ private System.Collections.IList GetAllDecendentReaderKeys(System.Object seed)
+ {
+ List<object> all = new List<object>(17); // will grow as we iter
+ all.Add(seed);
+ for (int i = 0; i < all.Count; i++)
+ {
+ System.Object obj = all[i];
+ if (obj is IndexReader)
+ {
+ IndexReader[] subs = ((IndexReader) obj).GetSequentialSubReaders();
+ for (int j = 0; (null != subs) && (j < subs.Length); j++)
+ {
+ all.Add(subs[j].FieldCacheKey);
+ }
+ }
+ }
+ // need to skip the first, because it was the seed
+ return all.GetRange(1, all.Count - 1);
+ }
+
+ /// <summary> Simple pair object for using "readerKey + fieldName" a Map key</summary>
+ private sealed class ReaderField
+ {
+ public System.Object readerKey;
+ public System.String fieldName;
+ public ReaderField(System.Object readerKey, System.String fieldName)
+ {
+ this.readerKey = readerKey;
+ this.fieldName = fieldName;
+ }
+ public override int GetHashCode()
+ {
+ return readerKey.GetHashCode() * fieldName.GetHashCode();
+ }
+ public override bool Equals(System.Object that)
+ {
+ if (!(that is ReaderField))
+ return false;
+
+ ReaderField other = (ReaderField) that;
+ return (this.readerKey == other.readerKey && this.fieldName.Equals(other.fieldName));
+ }
+ public override System.String ToString()
+ {
+ return readerKey.ToString() + "+" + fieldName;
+ }
+ }
+
+ /// <summary> Simple container for a collection of related CacheEntry objects that
+ /// in conjunction with eachother represent some "insane" usage of the
+ /// FieldCache.
+ /// </summary>
+ public sealed class Insanity
+ {
+ private InsanityType type;
+ private System.String msg;
+ private CacheEntry[] entries;
+ public Insanity(InsanityType type, System.String msg, params CacheEntry[] entries)
+ {
+ if (null == type)
+ {
+ throw new System.ArgumentException("Insanity requires non-null InsanityType");
+ }
+ if (null == entries || 0 == entries.Length)
+ {
+ throw new System.ArgumentException("Insanity requires non-null/non-empty CacheEntry[]");
+ }
+ this.type = type;
+ this.msg = msg;
+ this.entries = entries;
+ }
+
+ /// <summary> Type of insane behavior this object represents</summary>
+ public InsanityType Type
+ {
+ get { return type; }
+ }
+
+ /// <summary> Description of hte insane behavior</summary>
+ public string Msg
+ {
+ get { return msg; }
+ }
+
+ /// <summary> CacheEntry objects which suggest a problem</summary>
+ public CacheEntry[] GetCacheEntries()
+ {
+ return entries;
+ }
+ /// <summary> Multi-Line representation of this Insanity object, starting with
+ /// the Type and Msg, followed by each CacheEntry.toString() on it's
+ /// own line prefaced by a tab character
+ /// </summary>
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buf = new System.Text.StringBuilder();
+ buf.Append(Type).Append(": ");
+
+ System.String m = Msg;
+ if (null != m)
+ buf.Append(m);
+
+ buf.Append('\n');
+
+ CacheEntry[] ce = GetCacheEntries();
+ for (int i = 0; i < ce.Length; i++)
+ {
+ buf.Append('\t').Append(ce[i].ToString()).Append('\n');
+ }
+
+ return buf.ToString();
+ }
+ }
+
+ /// <summary> An Enumaration of the differnet types of "insane" behavior that
+ /// may be detected in a FieldCache.
+ ///
+ /// </summary>
+ /// <seealso cref="InsanityType.SUBREADER">
+ /// </seealso>
+ /// <seealso cref="InsanityType.VALUEMISMATCH">
+ /// </seealso>
+ /// <seealso cref="InsanityType.EXPECTED">
+ /// </seealso>
+ public sealed class InsanityType
+ {
+ private System.String label;
+ internal InsanityType(System.String label)
+ {
+ this.label = label;
+ }
+ public override System.String ToString()
+ {
+ return label;
+ }
+
+ /// <summary> Indicates an overlap in cache usage on a given field
+ /// in sub/super readers.
+ /// </summary>
+ public static readonly InsanityType SUBREADER = new InsanityType("SUBREADER");
+
+ /// <summary> <p/>
+ /// Indicates entries have the same reader+fieldname but
+ /// different cached values. This can happen if different datatypes,
+ /// or parsers are used -- and while it's not necessarily a bug
+ /// it's typically an indication of a possible problem.
+ /// <p/>
+ /// <p/>
+ /// PNOTE: Only the reader, fieldname, and cached value are actually
+ /// tested -- if two cache entries have different parsers or datatypes but
+ /// the cached values are the same Object (== not just equal()) this method
+ /// does not consider that a red flag. This allows for subtle variations
+ /// in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...)
+ /// <p/>
+ /// </summary>
+ public static readonly InsanityType VALUEMISMATCH = new InsanityType("VALUEMISMATCH");
+
+ /// <summary> Indicates an expected bit of "insanity". This may be useful for
+ /// clients that wish to preserve/log information about insane usage
+ /// but indicate that it was expected.
+ /// </summary>
+ public static readonly InsanityType EXPECTED = new InsanityType("EXPECTED");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/IAttribute.cs b/src/core/Util/IAttribute.cs
new file mode 100644
index 0000000..e84313a
--- /dev/null
+++ b/src/core/Util/IAttribute.cs
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Base interface for attributes.</summary>
+ public interface IAttribute
+ {
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/IdentityDictionary.cs b/src/core/Util/IdentityDictionary.cs
new file mode 100644
index 0000000..f23f91f
--- /dev/null
+++ b/src/core/Util/IdentityDictionary.cs
@@ -0,0 +1,64 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.Serialization;
+using System.Text;
+
+namespace Lucene.Net.Util
+{
+ /// <summary>
+ /// A class that mimics Java's IdentityHashMap in that it determines
+ /// object equality solely on ReferenceEquals rather than (possibly overloaded)
+ /// object.Equals().
+ ///
+ /// NOTE: Java's documentation on IdentityHashMap says that it also uses
+ /// ReferenceEquals on it's Values as well. This class does not follow this behavior
+ /// </summary>
+ /// <typeparam name="TKey">The type of the keys in the dictionary</typeparam>
+ /// <typeparam name="TValue">The type of the values in the dictionary</typeparam>
+ public class IdentityDictionary<TKey, TValue> : Dictionary<TKey, TValue>
+ {
+ public IdentityDictionary(IDictionary<TKey, TValue> other) : base(other, new IdentityComparer())
+ { }
+
+ public IdentityDictionary(int capacity) : base(capacity, new IdentityComparer())
+ { }
+
+ public IdentityDictionary() : this(16)
+ { }
+
+ class IdentityComparer : IEqualityComparer<TKey>
+ {
+ public bool Equals(TKey x, TKey y)
+ {
+ return ReferenceEquals(x, y);
+ }
+
+ public int GetHashCode(TKey obj)
+ {
+ return obj.GetHashCode();
+ }
+ }
+ }
+}
diff --git a/src/core/Util/IndexableBinaryStringTools.cs b/src/core/Util/IndexableBinaryStringTools.cs
new file mode 100644
index 0000000..c6c9f46
--- /dev/null
+++ b/src/core/Util/IndexableBinaryStringTools.cs
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+// {{Aroush-2.9}} Port issue? Both of those were treated as: System.IO.MemoryStream
+//using CharBuffer = java.nio.CharBuffer;
+//using ByteBuffer = java.nio.ByteBuffer;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Provides support for converting byte sequences to Strings and back again.
+ /// The resulting Strings preserve the original byte sequences' sort order.
+ ///
+ /// The Strings are constructed using a Base 8000h encoding of the original
+ /// binary data - each char of an encoded String represents a 15-bit chunk
+ /// from the byte sequence. Base 8000h was chosen because it allows for all
+ /// lower 15 bits of char to be used without restriction; the surrogate range
+ /// [U+D8000-U+DFFF] does not represent valid chars, and would require
+ /// complicated handling to avoid them and allow use of char's high bit.
+ ///
+ /// Although unset bits are used as padding in the final char, the original
+ /// byte sequence could contain trailing bytes with no set bits (null bytes):
+ /// padding is indistinguishable from valid information. To overcome this
+ /// problem, a char is appended, indicating the number of encoded bytes in the
+ /// final content char.
+ ///
+ /// This class's operations are defined over CharBuffers and ByteBuffers, to
+ /// allow for wrapped arrays to be reused, reducing memory allocation costs for
+ /// repeated operations. Note that this class calls array() and arrayOffset()
+ /// on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be
+ /// used. This class interprets the arrayOffset() and limit() values returned by
+ /// its input buffers as beginning and end+1 positions on the wrapped array,
+ /// resprectively; similarly, on the output buffer, arrayOffset() is the first
+ /// position written to, and limit() is set to one past the final output array
+ /// position.
+ /// </summary>
+ public class IndexableBinaryStringTools
+ {
+
+ private static readonly CodingCase[] CODING_CASES = new CodingCase[]{new CodingCase(7, 1), new CodingCase(14, 6, 2), new CodingCase(13, 5, 3), new CodingCase(12, 4, 4), new CodingCase(11, 3, 5), new CodingCase(10, 2, 6), new CodingCase(9, 1, 7), new CodingCase(8, 0)};
+
+ // Export only static methods
+ private IndexableBinaryStringTools()
+ {
+ }
+
+ /// <summary> Returns the number of chars required to encode the given byte sequence.
+ ///
+ /// </summary>
+ /// <param name="original">The byte sequence to be encoded. Must be backed by an array.
+ /// </param>
+ /// <returns> The number of chars required to encode the given byte sequence
+ /// </returns>
+ /// <throws> IllegalArgumentException If the given ByteBuffer is not backed by an array </throws>
+ public static int GetEncodedLength(System.Collections.Generic.List<byte> original)
+ {
+ return (original.Count == 0) ? 0 : ((original.Count * 8 + 14) / 15) + 1;
+ }
+
+ /// <summary> Returns the number of bytes required to decode the given char sequence.
+ ///
+ /// </summary>
+ /// <param name="encoded">The char sequence to be encoded. Must be backed by an array.
+ /// </param>
+ /// <returns> The number of bytes required to decode the given char sequence
+ /// </returns>
+ /// <throws> IllegalArgumentException If the given CharBuffer is not backed by an array </throws>
+ public static int GetDecodedLength(System.Collections.Generic.List<char> encoded)
+ {
+ int numChars = encoded.Count - 1;
+ if (numChars <= 0)
+ {
+ return 0;
+ }
+ else
+ {
+ int numFullBytesInFinalChar = encoded[encoded.Count - 1];
+ int numEncodedChars = numChars - 1;
+ return ((numEncodedChars * 15 + 7) / 8 + numFullBytesInFinalChar);
+ }
+ }
+
+ /// <summary> Encodes the input byte sequence into the output char sequence. Before
+ /// calling this method, ensure that the output CharBuffer has sufficient
+ /// capacity by calling <see cref="GetEncodedLength(System.Collections.Generic.List{byte})" />.
+ ///
+ /// </summary>
+ /// <param name="input">The byte sequence to encode
+ /// </param>
+ /// <param name="output">Where the char sequence encoding result will go. The limit
+ /// is set to one past the position of the final char.
+ /// </param>
+ /// <throws> IllegalArgumentException If either the input or the output buffer </throws>
+ /// <summary> is not backed by an array
+ /// </summary>
+ public static void Encode(System.Collections.Generic.List<byte> input, System.Collections.Generic.List<char> output)
+ {
+ int outputLength = GetEncodedLength(input);
+ // only adjust capacity if needed
+ if (output.Capacity < outputLength)
+ {
+ output.Capacity = outputLength;
+ }
+
+ // ensure the buffer we are writing into is occupied with nulls
+ if (output.Count < outputLength)
+ {
+ for (int i = output.Count; i < outputLength; i++)
+ {
+ output.Add(Char.MinValue);
+ }
+ }
+
+ if (input.Count > 0)
+ {
+ int inputByteNum = 0;
+ int caseNum = 0;
+ int outputCharNum = 0;
+ CodingCase codingCase;
+ for (; inputByteNum + CODING_CASES[caseNum].numBytes <= input.Count; ++outputCharNum)
+ {
+ codingCase = CODING_CASES[caseNum];
+ if (2 == codingCase.numBytes)
+ {
+ output[outputCharNum] = (char)(((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((Number.URShift((input[inputByteNum + 1] & 0xFF), codingCase.finalShift)) & codingCase.finalMask) & (short)0x7FFF);
+ }
+ else
+ {
+ // numBytes is 3
+ output[outputCharNum] = (char)(((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((input[inputByteNum + 1] & 0xFF) << codingCase.middleShift) + ((Number.URShift((input[inputByteNum + 2] & 0xFF), codingCase.finalShift)) & codingCase.finalMask) & (short)0x7FFF);
+ }
+ inputByteNum += codingCase.advanceBytes;
+ if (++caseNum == CODING_CASES.Length)
+ {
+ caseNum = 0;
+ }
+ }
+ // Produce final char (if any) and trailing count chars.
+ codingCase = CODING_CASES[caseNum];
+
+ if (inputByteNum + 1 < input.Count)
+ {
+ // codingCase.numBytes must be 3
+ output[outputCharNum++] = (char) ((((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((input[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF);
+ // Add trailing char containing the number of full bytes in final char
+ output[outputCharNum++] = (char) 1;
+ }
+ else if (inputByteNum < input.Count)
+ {
+ output[outputCharNum++] = (char) (((input[inputByteNum] & 0xFF) << codingCase.initialShift) & (short) 0x7FFF);
+ // Add trailing char containing the number of full bytes in final char
+ output[outputCharNum++] = caseNum == 0?(char) 1:(char) 0;
+ }
+ else
+ {
+ // No left over bits - last char is completely filled.
+ // Add trailing char containing the number of full bytes in final char
+ output[outputCharNum++] = (char) 1;
+ }
+ }
+ }
+
+ /// <summary> Decodes the input char sequence into the output byte sequence. Before
+ /// calling this method, ensure that the output ByteBuffer has sufficient
+ /// capacity by calling <see cref="GetDecodedLength(System.Collections.Generic.List{char})" />.
+ ///
+ /// </summary>
+ /// <param name="input">The char sequence to decode
+ /// </param>
+ /// <param name="output">Where the byte sequence decoding result will go. The limit
+ /// is set to one past the position of the final char.
+ /// </param>
+ /// <throws> IllegalArgumentException If either the input or the output buffer </throws>
+ /// <summary> is not backed by an array
+ /// </summary>
+ public static void Decode(System.Collections.Generic.List<char> input, System.Collections.Generic.List<byte> output)
+ {
+ int numOutputBytes = GetDecodedLength(input);
+ if (output.Capacity < numOutputBytes)
+ {
+ output.Capacity = numOutputBytes;
+ }
+
+ // ensure the buffer we are writing into is occupied with nulls
+ if (output.Count < numOutputBytes)
+ {
+ for (int i = output.Count; i < numOutputBytes; i++)
+ {
+ output.Add(Byte.MinValue);
+ }
+ }
+
+ if (input.Count > 0)
+ {
+ int caseNum = 0;
+ int outputByteNum = 0;
+ int inputCharNum = 0;
+ short inputChar;
+ CodingCase codingCase;
+ for (; inputCharNum < input.Count - 2; ++inputCharNum)
+ {
+ codingCase = CODING_CASES[caseNum];
+ inputChar = (short) input[inputCharNum];
+ if (2 == codingCase.numBytes)
+ {
+ if (0 == caseNum)
+ {
+ output[outputByteNum] = (byte) (Number.URShift(inputChar, codingCase.initialShift));
+ }
+ else
+ {
+ output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift)));
+ }
+ output[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
+ }
+ else
+ {
+ // numBytes is 3
+ output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift)));
+ output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.middleMask), codingCase.middleShift));
+ output[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
+ }
+ outputByteNum += codingCase.advanceBytes;
+ if (++caseNum == CODING_CASES.Length)
+ {
+ caseNum = 0;
+ }
+ }
+ // Handle final char
+ inputChar = (short) input[inputCharNum];
+ codingCase = CODING_CASES[caseNum];
+ if (0 == caseNum)
+ {
+ output[outputByteNum] = 0;
+ }
+ output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift)));
+ long bytesLeft = numOutputBytes - outputByteNum;
+ if (bytesLeft > 1)
+ {
+ if (2 == codingCase.numBytes)
+ {
+ output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.finalMask), codingCase.finalShift));
+ }
+ else
+ {
+ // numBytes is 3
+ output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.middleMask), codingCase.middleShift));
+ if (bytesLeft > 2)
+ {
+ output[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
+ }
+ }
+ }
+ }
+ }
+
+ /// <summary> Decodes the given char sequence, which must have been encoded by
+ /// <see cref="Encode(System.Collections.Generic.List{byte})" /> or
+ /// <see cref="Encode(System.Collections.Generic.List{byte}, System.Collections.Generic.List{char})" />.
+ ///
+ /// </summary>
+ /// <param name="input">The char sequence to decode
+ /// </param>
+ /// <returns> A byte sequence containing the decoding result. The limit
+ /// is set to one past the position of the final char.
+ /// </returns>
+ /// <throws> IllegalArgumentException If the input buffer is not backed by an </throws>
+ /// <summary> array
+ /// </summary>
+ public static System.Collections.Generic.List<byte> Decode(System.Collections.Generic.List<char> input)
+ {
+ System.Collections.Generic.List<byte> output =
+ new System.Collections.Generic.List<byte>(new byte[GetDecodedLength(input)]);
+ Decode(input, output);
+ return output;
+ }
+
+ /// <summary> Encodes the input byte sequence.
+ ///
+ /// </summary>
+ /// <param name="input">The byte sequence to encode
+ /// </param>
+ /// <returns> A char sequence containing the encoding result. The limit is set
+ /// to one past the position of the final char.
+ /// </returns>
+ /// <throws> IllegalArgumentException If the input buffer is not backed by an </throws>
+ /// <summary> array
+ /// </summary>
+ public static System.Collections.Generic.List<char> Encode(System.Collections.Generic.List<byte> input)
+ {
+ System.Collections.Generic.List<char> output =
+ new System.Collections.Generic.List<char>(new char[GetEncodedLength(input)]);
+ Encode(input, output);
+ return output;
+ }
+
+ internal class CodingCase
+ {
+ internal int numBytes, initialShift, middleShift, finalShift, advanceBytes = 2;
+ internal short middleMask, finalMask;
+
+ internal CodingCase(int initialShift, int middleShift, int finalShift)
+ {
+ this.numBytes = 3;
+ this.initialShift = initialShift;
+ this.middleShift = middleShift;
+ this.finalShift = finalShift;
+ this.finalMask = (short) (Number.URShift((short) 0xFF, finalShift));
+ this.middleMask = (short) ((short) 0xFF << middleShift);
+ }
+
+ internal CodingCase(int initialShift, int finalShift)
+ {
+ this.numBytes = 2;
+ this.initialShift = initialShift;
+ this.finalShift = finalShift;
+ this.finalMask = (short) (Number.URShift((short) 0xFF, finalShift));
+ if (finalShift != 0)
+ {
+ advanceBytes = 1;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/MapOfSets.cs b/src/core/Util/MapOfSets.cs
new file mode 100644
index 0000000..ee997f4
--- /dev/null
+++ b/src/core/Util/MapOfSets.cs
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Helper class for keeping Listss of Objects associated with keys. <b>WARNING: THIS CLASS IS NOT THREAD SAFE</b></summary>
+ public class MapOfSets<TKey, TValue>
+ {
+ private IDictionary<TKey, HashSet<TValue>> theMap;
+
+ /// <param name="m">the backing store for this object
+ /// </param>
+ public MapOfSets(IDictionary<TKey, HashSet<TValue>> m)
+ {
+ theMap = m;
+ }
+
+ /// <value> direct access to the map backing this object. </value>
+ public virtual IDictionary<TKey, HashSet<TValue>> Map
+ {
+ get { return theMap; }
+ }
+
+ /// <summary> Adds val to the Set associated with key in the Map. If key is not
+ /// already in the map, a new Set will first be created.
+ /// </summary>
+ /// <returns> the size of the Set associated with key once val is added to it.
+ /// </returns>
+ public virtual int Put(TKey key, TValue val)
+ {
+ HashSet<TValue> theSet;
+ if (!theMap.TryGetValue(key, out theSet))
+ {
+ theSet = new HashSet<TValue>();
+ theMap[key] = theSet;
+ }
+ theSet.Add(val);
+ return theSet.Count;
+ }
+ /// <summary> Adds multiple vals to the Set associated with key in the Map.
+ /// If key is not
+ /// already in the map, a new Set will first be created.
+ /// </summary>
+ /// <returns> the size of the Set associated with key once val is added to it.
+ /// </returns>
+ public virtual int PutAll(TKey key, IEnumerable<TValue> vals)
+ {
+ HashSet<TValue> theSet;
+ if (!theMap.TryGetValue(key, out theSet))
+ {
+ theSet = new HashSet<TValue>();
+ theMap[key] = theSet;
+ }
+ theSet.UnionWith(vals);
+ return theSet.Count;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/MemoryModel.cs b/src/core/Util/MemoryModel.cs
new file mode 100644
index 0000000..ad5091a
--- /dev/null
+++ b/src/core/Util/MemoryModel.cs
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Returns primitive memory sizes for estimating RAM usage.
+ ///
+ /// </summary>
+ public abstract class MemoryModel
+ {
+ /// <value> size of array beyond contents </value>
+ public abstract int ArraySize { get; }
+
+ /// <value> Class size overhead </value>
+ public abstract int ClassSize { get; }
+
+ /// <param name="clazz">a primitive Class - bool, byte, char, short, long, float,
+ /// short, double, int
+ /// </param>
+ /// <returns> the size in bytes of given primitive Class
+ /// </returns>
+ public abstract int GetPrimitiveSize(System.Type clazz);
+
+ /// <value> size of reference </value>
+ public abstract int ReferenceSize { get; }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/NumericUtils.cs b/src/core/Util/NumericUtils.cs
new file mode 100644
index 0000000..1bd68c2
--- /dev/null
+++ b/src/core/Util/NumericUtils.cs
@@ -0,0 +1,488 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> This is a helper class to generate prefix-encoded representations for numerical values
+ /// and supplies converters to represent float/double values as sortable integers/longs.
+ ///
+ /// <p/>To quickly execute range queries in Apache Lucene, a range is divided recursively
+ /// into multiple intervals for searching: The center of the range is searched only with
+ /// the lowest possible precision in the trie, while the boundaries are matched
+ /// more exactly. This reduces the number of terms dramatically.
+ ///
+ /// <p/>This class generates terms to achive this: First the numerical integer values need to
+ /// be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned
+ /// and the bits are converted to ASCII chars with each 7 bit. The resulting string is
+ /// sortable like the original integer value. Each value is also prefixed
+ /// (in the first char) by the <c>shift</c> value (number of bits removed) used
+ /// during encoding.
+ ///
+ /// <p/>To also index floating point numbers, this class supplies two methods to convert them
+ /// to integer values by changing their bit layout: <see cref="DoubleToSortableLong" />,
+ /// <see cref="FloatToSortableInt" />. You will have no precision loss by
+ /// converting floating point numbers to integers and back (only that the integer form
+ /// is not usable). Other data types like dates can easily converted to longs or ints (e.g.
+ /// date to long: <see cref="DateTime" />).
+ ///
+ /// <p/>For easy usage, the trie algorithm is implemented for indexing inside
+ /// <see cref="NumericTokenStream" /> that can index <c>int</c>, <c>long</c>,
+ /// <c>float</c>, and <c>double</c>. For querying,
+ /// <see cref="NumericRangeQuery{T}" /> and <see cref="NumericRangeFilter{T}" /> implement the query part
+ /// for the same data types.
+ ///
+ /// <p/>This class can also be used, to generate lexicographically sortable (according
+ /// <see cref="String.CompareTo(String)" />) representations of numeric data types for other
+ /// usages (e.g. sorting).
+ ///
+ /// <p/><font color="red"><b>NOTE:</b> This API is experimental and
+ /// might change in incompatible ways in the next release.</font>
+ ///
+ /// </summary>
+ /// <since> 2.9
+ /// </since>
+ public sealed class NumericUtils
+ {
+
+ private NumericUtils()
+ {
+ } // no instance!
+
+ /// <summary> The default precision step used by <see cref="NumericField" />, <see cref="NumericTokenStream" />,
+ /// <see cref="NumericRangeQuery{T}" />, and <see cref="NumericRangeFilter{T}" /> as default
+ /// </summary>
+ public const int PRECISION_STEP_DEFAULT = 4;
+
+ /// <summary> Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is
+ /// stored as <c>SHIFT_START_LONG+shift</c> in the first character
+ /// </summary>
+ public static char SHIFT_START_LONG = (char) 0x20;
+
+ /// <summary> Expert: The maximum term length (used for <c>char[]</c> buffer size)
+ /// for encoding <c>long</c> values.
+ /// </summary>
+ /// <seealso cref="LongToPrefixCoded(long,int,char[])">
+ /// </seealso>
+ public const int BUF_SIZE_LONG = 63 / 7 + 2;
+
+ /// <summary> Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is
+ /// stored as <c>SHIFT_START_INT+shift</c> in the first character
+ /// </summary>
+ public static char SHIFT_START_INT = (char) 0x60;
+
+ /// <summary> Expert: The maximum term length (used for <c>char[]</c> buffer size)
+ /// for encoding <c>int</c> values.
+ /// </summary>
+ /// <seealso cref="IntToPrefixCoded(int,int,char[])">
+ /// </seealso>
+ public const int BUF_SIZE_INT = 31 / 7 + 2;
+
+ /// <summary> Expert: Returns prefix coded bits after reducing the precision by <c>shift</c> bits.
+ /// This is method is used by <see cref="NumericTokenStream" />.
+ /// </summary>
+ /// <param name="val">the numeric value
+ /// </param>
+ /// <param name="shift">how many bits to strip from the right
+ /// </param>
+ /// <param name="buffer">that will contain the encoded chars, must be at least of <see cref="BUF_SIZE_LONG" />
+ /// length
+ /// </param>
+ /// <returns> number of chars written to buffer
+ /// </returns>
+ public static int LongToPrefixCoded(long val, int shift, char[] buffer)
+ {
+ if (shift > 63 || shift < 0)
+ throw new System.ArgumentException("Illegal shift value, must be 0..63");
+ int nChars = (63 - shift) / 7 + 1, len = nChars + 1;
+ buffer[0] = (char) (SHIFT_START_LONG + shift);
+ ulong sortableBits = BitConverter.ToUInt64(BitConverter.GetBytes(val), 0) ^ 0x8000000000000000L;
+ sortableBits = sortableBits >> shift;
+ while (nChars >= 1)
+ {
+ // Store 7 bits per character for good efficiency when UTF-8 encoding.
+ // The whole number is right-justified so that lucene can prefix-encode
+ // the terms more efficiently.
+ buffer[nChars--] = (char) (sortableBits & 0x7f);
+ sortableBits = sortableBits >> 7;
+ }
+ return len;
+ }
+
+ /// <summary> Expert: Returns prefix coded bits after reducing the precision by <c>shift</c> bits.
+ /// This is method is used by <see cref="LongRangeBuilder" />.
+ /// </summary>
+ /// <param name="val">the numeric value
+ /// </param>
+ /// <param name="shift">how many bits to strip from the right
+ /// </param>
+ public static System.String LongToPrefixCoded(long val, int shift)
+ {
+ char[] buffer = new char[BUF_SIZE_LONG];
+ int len = LongToPrefixCoded(val, shift, buffer);
+ return new System.String(buffer, 0, len);
+ }
+
+ /// <summary> This is a convenience method, that returns prefix coded bits of a long without
+ /// reducing the precision. It can be used to store the full precision value as a
+ /// stored field in index.
+ /// <p/>To decode, use <see cref="PrefixCodedToLong" />.
+ /// </summary>
+ public static System.String LongToPrefixCoded(long val)
+ {
+ return LongToPrefixCoded(val, 0);
+ }
+
+ /// <summary> Expert: Returns prefix coded bits after reducing the precision by <c>shift</c> bits.
+ /// This is method is used by <see cref="NumericTokenStream" />.
+ /// </summary>
+ /// <param name="val">the numeric value
+ /// </param>
+ /// <param name="shift">how many bits to strip from the right
+ /// </param>
+ /// <param name="buffer">that will contain the encoded chars, must be at least of <see cref="BUF_SIZE_INT" />
+ /// length
+ /// </param>
+ /// <returns> number of chars written to buffer
+ /// </returns>
+ public static int IntToPrefixCoded(int val, int shift, char[] buffer)
+ {
+ if (shift > 31 || shift < 0)
+ throw new System.ArgumentException("Illegal shift value, must be 0..31");
+ int nChars = (31 - shift) / 7 + 1, len = nChars + 1;
+ buffer[0] = (char) (SHIFT_START_INT + shift);
+ int sortableBits = val ^ unchecked((int) 0x80000000);
+ sortableBits = Number.URShift(sortableBits, shift);
+ while (nChars >= 1)
+ {
+ // Store 7 bits per character for good efficiency when UTF-8 encoding.
+ // The whole number is right-justified so that lucene can prefix-encode
+ // the terms more efficiently.
+ buffer[nChars--] = (char) (sortableBits & 0x7f);
+ sortableBits = Number.URShift(sortableBits, 7);
+ }
+ return len;
+ }
+
+ /// <summary> Expert: Returns prefix coded bits after reducing the precision by <c>shift</c> bits.
+ /// This is method is used by <see cref="IntRangeBuilder" />.
+ /// </summary>
+ /// <param name="val">the numeric value
+ /// </param>
+ /// <param name="shift">how many bits to strip from the right
+ /// </param>
+ public static System.String IntToPrefixCoded(int val, int shift)
+ {
+ char[] buffer = new char[BUF_SIZE_INT];
+ int len = IntToPrefixCoded(val, shift, buffer);
+ return new System.String(buffer, 0, len);
+ }
+
+ /// <summary> This is a convenience method, that returns prefix coded bits of an int without
+ /// reducing the precision. It can be used to store the full precision value as a
+ /// stored field in index.
+ /// <p/>To decode, use <see cref="PrefixCodedToInt" />.
+ /// </summary>
+ public static System.String IntToPrefixCoded(int val)
+ {
+ return IntToPrefixCoded(val, 0);
+ }
+
+ /// <summary> Returns a long from prefixCoded characters.
+ /// Rightmost bits will be zero for lower precision codes.
+ /// This method can be used to decode e.g. a stored field.
+ /// </summary>
+ /// <throws> NumberFormatException if the supplied string is </throws>
+ /// <summary> not correctly prefix encoded.
+ /// </summary>
+ /// <seealso cref="LongToPrefixCoded(long)">
+ /// </seealso>
+ public static long PrefixCodedToLong(System.String prefixCoded)
+ {
+ int shift = prefixCoded[0] - SHIFT_START_LONG;
+ if (shift > 63 || shift < 0)
+ throw new System.FormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)");
+ ulong sortableBits = 0UL;
+ for (int i = 1, len = prefixCoded.Length; i < len; i++)
+ {
+ sortableBits <<= 7;
+ char ch = prefixCoded[i];
+ if (ch > 0x7f)
+ {
+ throw new System.FormatException("Invalid prefixCoded numerical value representation (char " + System.Convert.ToString((int) ch, 16) + " at position " + i + " is invalid)");
+ }
+ sortableBits |= (ulong) ch;
+ }
+ return BitConverter.ToInt64(BitConverter.GetBytes((sortableBits << shift) ^ 0x8000000000000000L), 0);
+ }
+
+ /// <summary> Returns an int from prefixCoded characters.
+ /// Rightmost bits will be zero for lower precision codes.
+ /// This method can be used to decode e.g. a stored field.
+ /// </summary>
+ /// <throws> NumberFormatException if the supplied string is </throws>
+ /// <summary> not correctly prefix encoded.
+ /// </summary>
+ /// <seealso cref="IntToPrefixCoded(int)">
+ /// </seealso>
+ public static int PrefixCodedToInt(System.String prefixCoded)
+ {
+ int shift = prefixCoded[0] - SHIFT_START_INT;
+ if (shift > 31 || shift < 0)
+ throw new System.FormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)");
+ int sortableBits = 0;
+ for (int i = 1, len = prefixCoded.Length; i < len; i++)
+ {
+ sortableBits <<= 7;
+ char ch = prefixCoded[i];
+ if (ch > 0x7f)
+ {
+ throw new System.FormatException("Invalid prefixCoded numerical value representation (char " + System.Convert.ToString((int) ch, 16) + " at position " + i + " is invalid)");
+ }
+ sortableBits |= (int) ch;
+ }
+ return (sortableBits << shift) ^ unchecked((int) 0x80000000);
+ }
+
+ /// <summary> Converts a <c>double</c> value to a sortable signed <c>long</c>.
+ /// The value is converted by getting their IEEE 754 floating-point &quot;double format&quot;
+ /// bit layout and then some bits are swapped, to be able to compare the result as long.
+ /// By this the precision is not reduced, but the value can easily used as a long.
+ /// </summary>
+ /// <seealso cref="SortableLongToDouble">
+ /// </seealso>
+ public static long DoubleToSortableLong(double val)
+ {
+ long f = BitConverter.DoubleToInt64Bits(val); // {{Aroush-2.9}} will this work the same as 'java.lang.Double.doubleToRawLongBits()'?
+ if (f < 0)
+ f ^= 0x7fffffffffffffffL;
+ return f;
+ }
+
+ /// <summary> Convenience method: this just returns:
+ /// longToPrefixCoded(doubleToSortableLong(val))
+ /// </summary>
+ public static System.String DoubleToPrefixCoded(double val)
+ {
+ return LongToPrefixCoded(DoubleToSortableLong(val));
+ }
+
+ /// <summary> Converts a sortable <c>long</c> back to a <c>double</c>.</summary>
+ /// <seealso cref="DoubleToSortableLong">
+ /// </seealso>
+ public static double SortableLongToDouble(long val)
+ {
+ if (val < 0)
+ val ^= 0x7fffffffffffffffL;
+ return BitConverter.Int64BitsToDouble(val);
+ }
+
+ /// <summary> Convenience method: this just returns:
+ /// sortableLongToDouble(prefixCodedToLong(val))
+ /// </summary>
+ public static double PrefixCodedToDouble(System.String val)
+ {
+ return SortableLongToDouble(PrefixCodedToLong(val));
+ }
+
+ /// <summary> Converts a <c>float</c> value to a sortable signed <c>int</c>.
+ /// The value is converted by getting their IEEE 754 floating-point &quot;float format&quot;
+ /// bit layout and then some bits are swapped, to be able to compare the result as int.
+ /// By this the precision is not reduced, but the value can easily used as an int.
+ /// </summary>
+ /// <seealso cref="SortableIntToFloat">
+ /// </seealso>
+ public static int FloatToSortableInt(float val)
+ {
+ int f = BitConverter.ToInt32(BitConverter.GetBytes(val), 0);
+ if (f < 0)
+ f ^= 0x7fffffff;
+ return f;
+ }
+
+ /// <summary> Convenience method: this just returns:
+ /// intToPrefixCoded(floatToSortableInt(val))
+ /// </summary>
+ public static System.String FloatToPrefixCoded(float val)
+ {
+ return IntToPrefixCoded(FloatToSortableInt(val));
+ }
+
+ /// <summary> Converts a sortable <c>int</c> back to a <c>float</c>.</summary>
+ /// <seealso cref="FloatToSortableInt">
+ /// </seealso>
+ public static float SortableIntToFloat(int val)
+ {
+ if (val < 0)
+ val ^= 0x7fffffff;
+ return BitConverter.ToSingle(BitConverter.GetBytes(val), 0);
+ }
+
+ /// <summary> Convenience method: this just returns:
+ /// sortableIntToFloat(prefixCodedToInt(val))
+ /// </summary>
+ public static float PrefixCodedToFloat(System.String val)
+ {
+ return SortableIntToFloat(PrefixCodedToInt(val));
+ }
+
+ /// <summary> Expert: Splits a long range recursively.
+ /// You may implement a builder that adds clauses to a
+ /// <see cref="Lucene.Net.Search.BooleanQuery" /> for each call to its
+ /// <see cref="LongRangeBuilder.AddRange(String,String)" />
+ /// method.
+ /// <p/>This method is used by <see cref="NumericRangeQuery{T}" />.
+ /// </summary>
+ public static void SplitLongRange(LongRangeBuilder builder, int precisionStep, long minBound, long maxBound)
+ {
+ SplitRange(builder, 64, precisionStep, minBound, maxBound);
+ }
+
+ /// <summary> Expert: Splits an int range recursively.
+ /// You may implement a builder that adds clauses to a
+ /// <see cref="Lucene.Net.Search.BooleanQuery" /> for each call to its
+ /// <see cref="IntRangeBuilder.AddRange(String,String)" />
+ /// method.
+ /// <p/>This method is used by <see cref="NumericRangeQuery{T}" />.
+ /// </summary>
+ public static void SplitIntRange(IntRangeBuilder builder, int precisionStep, int minBound, int maxBound)
+ {
+ SplitRange(builder, 32, precisionStep, (long) minBound, (long) maxBound);
+ }
+
+ /// <summary>This helper does the splitting for both 32 and 64 bit. </summary>
+ private static void SplitRange(System.Object builder, int valSize, int precisionStep, long minBound, long maxBound)
+ {
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ if (minBound > maxBound)
+ return ;
+ for (int shift = 0; ; shift += precisionStep)
+ {
+ // calculate new bounds for inner precision
+ long diff = 1L << (shift + precisionStep);
+ long mask = ((1L << precisionStep) - 1L) << shift;
+ bool hasLower = (minBound & mask) != 0L;
+ bool hasUpper = (maxBound & mask) != mask;
+ long nextMinBound = (hasLower?(minBound + diff):minBound) & ~ mask;
+ long nextMaxBound = (hasUpper?(maxBound - diff):maxBound) & ~ mask;
+ bool lowerWrapped = nextMinBound < minBound,
+ upperWrapped = nextMaxBound > maxBound;
+
+ if (shift+precisionStep>=valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped)
+ {
+ // We are in the lowest precision or the next precision is not available.
+ AddRange(builder, valSize, minBound, maxBound, shift);
+ // exit the split recursion loop
+ break;
+ }
+
+ if (hasLower)
+ AddRange(builder, valSize, minBound, minBound | mask, shift);
+ if (hasUpper)
+ AddRange(builder, valSize, maxBound & ~ mask, maxBound, shift);
+
+ // recurse to next precision
+ minBound = nextMinBound;
+ maxBound = nextMaxBound;
+ }
+ }
+
+ /// <summary>Helper that delegates to correct range builder </summary>
+ private static void AddRange(System.Object builder, int valSize, long minBound, long maxBound, int shift)
+ {
+ // for the max bound set all lower bits (that were shifted away):
+ // this is important for testing or other usages of the splitted range
+ // (e.g. to reconstruct the full range). The prefixEncoding will remove
+ // the bits anyway, so they do not hurt!
+ maxBound |= (1L << shift) - 1L;
+ // delegate to correct range builder
+ switch (valSize)
+ {
+
+ case 64:
+ ((LongRangeBuilder) builder).AddRange(minBound, maxBound, shift);
+ break;
+
+ case 32:
+ ((IntRangeBuilder) builder).AddRange((int) minBound, (int) maxBound, shift);
+ break;
+
+ default:
+ // Should not happen!
+ throw new System.ArgumentException("valSize must be 32 or 64.");
+
+ }
+ }
+
+ /// <summary> Expert: Callback for <see cref="SplitLongRange" />.
+ /// You need to overwrite only one of the methods.
+ /// <p/><font color="red"><b>NOTE:</b> This is a very low-level interface,
+ /// the method signatures may change in later versions.</font>
+ /// </summary>
+ public abstract class LongRangeBuilder
+ {
+
+ /// <summary> Overwrite this method, if you like to receive the already prefix encoded range bounds.
+ /// You can directly build classical (inclusive) range queries from them.
+ /// </summary>
+ public virtual void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> Overwrite this method, if you like to receive the raw long range bounds.
+ /// You can use this for e.g. debugging purposes (print out range bounds).
+ /// </summary>
+ public virtual void AddRange(long min, long max, int shift)
+ {
+ AddRange(Lucene.Net.Util.NumericUtils.LongToPrefixCoded(min, shift), Lucene.Net.Util.NumericUtils.LongToPrefixCoded(max, shift));
+ }
+ }
+
+ /// <summary> Expert: Callback for <see cref="SplitIntRange" />.
+ /// You need to overwrite only one of the methods.
+ /// <p/><font color="red"><b>NOTE:</b> This is a very low-level interface,
+ /// the method signatures may change in later versions.</font>
+ /// </summary>
+ public abstract class IntRangeBuilder
+ {
+
+ /// <summary> Overwrite this method, if you like to receive the already prefix encoded range bounds.
+ /// You can directly build classical range (inclusive) queries from them.
+ /// </summary>
+ public virtual void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> Overwrite this method, if you like to receive the raw int range bounds.
+ /// You can use this for e.g. debugging purposes (print out range bounds).
+ /// </summary>
+ public virtual void AddRange(int min, int max, int shift)
+ {
+ AddRange(Lucene.Net.Util.NumericUtils.IntToPrefixCoded(min, shift), Lucene.Net.Util.NumericUtils.IntToPrefixCoded(max, shift));
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/OpenBitSet.cs b/src/core/Util/OpenBitSet.cs
new file mode 100644
index 0000000..d7c0979
--- /dev/null
+++ b/src/core/Util/OpenBitSet.cs
@@ -0,0 +1,944 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using DocIdSet = Lucene.Net.Search.DocIdSet;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary>An "open" BitSet implementation that allows direct access to the array of words
+ /// storing the bits.
+ /// <p/>
+ /// Unlike java.util.bitset, the fact that bits are packed into an array of longs
+ /// is part of the interface. This allows efficient implementation of other algorithms
+ /// by someone other than the author. It also allows one to efficiently implement
+ /// alternate serialization or interchange formats.
+ /// <p/>
+ /// <c>OpenBitSet</c> is faster than <c>java.util.BitSet</c> in most operations
+ /// and *much* faster at calculating cardinality of sets and results of set operations.
+ /// It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+ /// <p/>
+ /// The goals of <c>OpenBitSet</c> are the fastest implementation possible, and
+ /// maximum code reuse. Extra safety and encapsulation
+ /// may always be built on top, but if that's built in, the cost can never be removed (and
+ /// hence people re-implement their own version in order to get better performance).
+ /// If you want a "safe", totally encapsulated (and slower and limited) BitSet
+ /// class, use <c>java.util.BitSet</c>.
+ /// <p/>
+ /// <h3>Performance Results</h3>
+ ///
+ /// Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+ /// <br/>BitSet size = 1,000,000
+ /// <br/>Results are java.util.BitSet time divided by OpenBitSet time.
+ /// <table border="1">
+ /// <tr>
+ /// <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ /// </tr>
+ /// <tr>
+ /// <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> <td>1.99</td> <td>1.58</td>
+ /// </tr>
+ /// <tr>
+ /// <th>1% full</th> <td>3.31</td> <td>3.90</td> <td>&#160;</td> <td>1.04</td> <td>&#160;</td> <td>0.99</td>
+ /// </tr>
+ /// </table>
+ /// <br/>
+ /// Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+ /// <br/>BitSet size = 1,000,000
+ /// <br/>Results are java.util.BitSet time divided by OpenBitSet time.
+ /// <table border="1">
+ /// <tr>
+ /// <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ /// </tr>
+ /// <tr>
+ /// <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> <td>1.12</td> <td>1.25</td>
+ /// </tr>
+ /// <tr>
+ /// <th>1% full</th> <td>2.51</td> <td>3.49</td> <td>&#160;</td> <td>1.00</td> <td>&#160;</td> <td>1.02</td>
+ /// </tr>
+ /// </table>
+ /// </summary>
+ /// <version> $Id$
+ /// </version>
+
+ [Serializable]
+ public class OpenBitSet:DocIdSet, System.ICloneable
+ {
+ protected internal long[] internalbits;
+ protected internal int wlen; // number of words (elements) used in the array
+
+ /// <summary>Constructs an OpenBitSet large enough to hold numBits.
+ ///
+ /// </summary>
+ /// <param name="numBits">
+ /// </param>
+ public OpenBitSet(long numBits)
+ {
+ internalbits = new long[Bits2words(numBits)];
+ wlen = internalbits.Length;
+ }
+
+ public OpenBitSet():this(64)
+ {
+ }
+
+ /// <summary>Constructs an OpenBitSet from an existing long[].
+ /// <br/>
+ /// The first 64 bits are in long[0],
+ /// with bit index 0 at the least significant bit, and bit index 63 at the most significant.
+ /// Given a bit index,
+ /// the word containing it is long[index/64], and it is at bit number index%64 within that word.
+ /// <p/>
+ /// numWords are the number of elements in the array that contain
+ /// set bits (non-zero longs).
+ /// numWords should be &lt;= bits.length, and
+ /// any existing words in the array at position &gt;= numWords should be zero.
+ ///
+ /// </summary>
+ public OpenBitSet(long[] bits, int numWords)
+ {
+ this.internalbits = bits;
+ this.wlen = numWords;
+ }
+
+ public override DocIdSetIterator Iterator()
+ {
+ return new OpenBitSetIterator(internalbits, wlen);
+ }
+
+ /// <summary>This DocIdSet implementation is cacheable. </summary>
+ public override bool IsCacheable
+ {
+ get { return true; }
+ }
+
+ /// <summary>Returns the current capacity in bits (1 greater than the index of the last bit) </summary>
+ public virtual long Capacity()
+ {
+ return internalbits.Length << 6;
+ }
+
+ /// <summary> Returns the current capacity of this set. Included for
+ /// compatibility. This is *not* equal to <see cref="Cardinality" />
+ /// </summary>
+ public virtual long Size()
+ {
+ return Capacity();
+ }
+
+ /// <summary>Returns true if there are no set bits </summary>
+ public virtual bool IsEmpty()
+ {
+ return Cardinality() == 0;
+ }
+
+ /// <summary>Expert: Gets or sets the long[] storing the bits </summary>
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1819:PropertiesShouldNotReturnArrays")]
+ public virtual long[] Bits
+ {
+ set { this.internalbits = value; }
+ get { return internalbits; }
+ }
+
+ /// <summary>Expert: gets or sets the number of longs in the array that are in use </summary>
+ public virtual int NumWords
+ {
+ get { return wlen; }
+ set { this.wlen = value; }
+ }
+
+
+ /// <summary>Returns true or false for the specified bit index. </summary>
+ public virtual bool Get(int index)
+ {
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ if (i >= internalbits.Length)
+ return false;
+
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (internalbits[i] & bitmask) != 0;
+ }
+
+
+ /// <summary>Returns true or false for the specified bit index.
+ /// The index should be less than the OpenBitSet size
+ /// </summary>
+ public virtual bool FastGet(int index)
+ {
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (internalbits[i] & bitmask) != 0;
+ }
+
+
+
+ /// <summary>Returns true or false for the specified bit index</summary>
+ public virtual bool Get(long index)
+ {
+ int i = (int) (index >> 6); // div 64
+ if (i >= internalbits.Length)
+ return false;
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (internalbits[i] & bitmask) != 0;
+ }
+
+ /// <summary>Returns true or false for the specified bit index.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool FastGet(long index)
+ {
+ int i = (int) (index >> 6); // div 64
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (internalbits[i] & bitmask) != 0;
+ }
+
+ /*
+ // alternate implementation of get()
+ public boolean get1(int index) {
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ return ((bits[i]>>>bit) & 0x01) != 0;
+ // this does a long shift and a bittest (on x86) vs
+ // a long shift, and a long AND, (the test for zero is prob a no-op)
+ // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
+ }
+ */
+
+
+ /// <summary>returns 1 if the bit is set, 0 if not.
+ /// The index should be less than the OpenBitSet size
+ /// </summary>
+ public virtual int GetBit(int index)
+ {
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ return ((int )((ulong) (internalbits[i]) >> bit)) & 0x01;
+ }
+
+
+ /*
+ public boolean get2(int index) {
+ int word = index >> 6; // div 64
+ int bit = index & 0x0000003f; // mod 64
+ return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
+ // we could right shift and check for parity bit, if it was available to us.
+ }
+ */
+
+ /// <summary>sets a bit, expanding the set size if necessary </summary>
+ public virtual void Set(long index)
+ {
+ int wordNum = ExpandingWordNum(index);
+ int bit = (int) index & 0x3f;
+ long bitmask = 1L << bit;
+ internalbits[wordNum] |= bitmask;
+ }
+
+
+ /// <summary>Sets the bit at the specified index.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastSet(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] |= bitmask;
+ }
+
+ /// <summary>Sets the bit at the specified index.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastSet(long index)
+ {
+ int wordNum = (int) (index >> 6);
+ int bit = (int) index & 0x3f;
+ long bitmask = 1L << bit;
+ internalbits[wordNum] |= bitmask;
+ }
+
+ /// <summary>Sets a range of bits, expanding the set size if necessary
+ ///
+ /// </summary>
+ /// <param name="startIndex">lower index
+ /// </param>
+ /// <param name="endIndex">one-past the last bit to set
+ /// </param>
+ public virtual void Set(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex)
+ return ;
+
+ int startWord = (int) (startIndex >> 6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ExpandingWordNum(endIndex - 1);
+
+ long startmask = - 1L << (int) startIndex;
+ long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord)
+ {
+ internalbits[startWord] |= (startmask & endmask);
+ return ;
+ }
+
+ internalbits[startWord] |= startmask;
+ for (int i = startWord + 1; i < endWord; i++)
+ internalbits[i] = -1L;
+ internalbits[endWord] |= endmask;
+ }
+
+
+
+ protected internal virtual int ExpandingWordNum(long index)
+ {
+ int wordNum = (int) (index >> 6);
+ if (wordNum >= wlen)
+ {
+ EnsureCapacity(index + 1);
+ wlen = wordNum + 1;
+ }
+ return wordNum;
+ }
+
+
+ /// <summary>clears a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastClear(int index)
+ {
+ int wordNum = index >> 6;
+ int bit = index & 0x03f;
+ long bitmask = 1L << bit;
+ internalbits[wordNum] &= ~ bitmask;
+ // hmmm, it takes one more instruction to clear than it does to set... any
+ // way to work around this? If there were only 63 bits per word, we could
+ // use a right shift of 10111111...111 in binary to position the 0 in the
+ // correct place (using sign extension).
+ // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
+ // by the JVM into a native instruction.
+ // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
+ }
+
+ /// <summary>clears a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastClear(long index)
+ {
+ int wordNum = (int) (index >> 6); // div 64
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] &= ~ bitmask;
+ }
+
+ /// <summary>clears a bit, allowing access beyond the current set size without changing the size.</summary>
+ public virtual void Clear(long index)
+ {
+ int wordNum = (int) (index >> 6); // div 64
+ if (wordNum >= wlen)
+ return ;
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] &= ~ bitmask;
+ }
+
+ /// <summary>Clears a range of bits. Clearing past the end does not change the size of the set.
+ ///
+ /// </summary>
+ /// <param name="startIndex">lower index
+ /// </param>
+ /// <param name="endIndex">one-past the last bit to clear
+ /// </param>
+ public virtual void Clear(int startIndex, int endIndex)
+ {
+ if (endIndex <= startIndex)
+ return ;
+
+ int startWord = (startIndex >> 6);
+ if (startWord >= wlen)
+ return ;
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ((endIndex - 1) >> 6);
+
+ long startmask = - 1L << startIndex;
+ long endmask = (long) (0xffffffffffffffffUL >> - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ // invert masks since we are clearing
+ startmask = ~ startmask;
+ endmask = ~ endmask;
+
+ if (startWord == endWord)
+ {
+ internalbits[startWord] &= (startmask | endmask);
+ return ;
+ }
+
+ internalbits[startWord] &= startmask;
+
+ int middle = System.Math.Min(wlen, endWord);
+ for (int i = startWord + 1; i < middle; i++)
+ internalbits[i] = 0L;
+ if (endWord < wlen)
+ {
+ internalbits[endWord] &= endmask;
+ }
+ }
+
+
+ /// <summary>Clears a range of bits. Clearing past the end does not change the size of the set.
+ ///
+ /// </summary>
+ /// <param name="startIndex">lower index
+ /// </param>
+ /// <param name="endIndex">one-past the last bit to clear
+ /// </param>
+ public virtual void Clear(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex)
+ return ;
+
+ int startWord = (int) (startIndex >> 6);
+ if (startWord >= wlen)
+ return ;
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = (int) ((endIndex - 1) >> 6);
+
+ long startmask = - 1L << (int) startIndex;
+ long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ // invert masks since we are clearing
+ startmask = ~ startmask;
+ endmask = ~ endmask;
+
+ if (startWord == endWord)
+ {
+ internalbits[startWord] &= (startmask | endmask);
+ return ;
+ }
+
+ internalbits[startWord] &= startmask;
+
+ int middle = System.Math.Min(wlen, endWord);
+ for (int i = startWord + 1; i < middle; i++)
+ internalbits[i] = 0L;
+ if (endWord < wlen)
+ {
+ internalbits[endWord] &= endmask;
+ }
+ }
+
+
+
+ /// <summary>Sets a bit and returns the previous value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool GetAndSet(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bool val = (internalbits[wordNum] & bitmask) != 0;
+ internalbits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /// <summary>Sets a bit and returns the previous value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool GetAndSet(long index)
+ {
+ int wordNum = (int) (index >> 6); // div 64
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bool val = (internalbits[wordNum] & bitmask) != 0;
+ internalbits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /// <summary>flips a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastFlip(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] ^= bitmask;
+ }
+
+ /// <summary>flips a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastFlip(long index)
+ {
+ int wordNum = (int) (index >> 6); // div 64
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] ^= bitmask;
+ }
+
+ /// <summary>flips a bit, expanding the set size if necessary </summary>
+ public virtual void Flip(long index)
+ {
+ int wordNum = ExpandingWordNum(index);
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] ^= bitmask;
+ }
+
+ /// <summary>flips a bit and returns the resulting bit value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool FlipAndGet(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] ^= bitmask;
+ return (internalbits[wordNum] & bitmask) != 0;
+ }
+
+ /// <summary>flips a bit and returns the resulting bit value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool FlipAndGet(long index)
+ {
+ int wordNum = (int) (index >> 6); // div 64
+ int bit = (int) index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ internalbits[wordNum] ^= bitmask;
+ return (internalbits[wordNum] & bitmask) != 0;
+ }
+
+ /// <summary>Flips a range of bits, expanding the set size if necessary
+ ///
+ /// </summary>
+ /// <param name="startIndex">lower index
+ /// </param>
+ /// <param name="endIndex">one-past the last bit to flip
+ /// </param>
+ public virtual void Flip(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex)
+ return ;
+ int startWord = (int) (startIndex >> 6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ExpandingWordNum(endIndex - 1);
+
+ /* Grrr, java shifting wraps around so -1L>>>64 == -1
+ * for that reason, make sure not to use endmask if the bits to flip will
+ * be zero in the last word (redefine endWord to be the last changed...)
+ long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
+ long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
+ ***/
+
+ long startmask = - 1L << (int) startIndex;
+ long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord)
+ {
+ internalbits[startWord] ^= (startmask & endmask);
+ return ;
+ }
+
+ internalbits[startWord] ^= startmask;
+
+ for (int i = startWord + 1; i < endWord; i++)
+ {
+ internalbits[i] = ~ internalbits[i];
+ }
+
+ internalbits[endWord] ^= endmask;
+ }
+
+
+ /*
+ public static int pop(long v0, long v1, long v2, long v3) {
+ // derived from pop_array by setting last four elems to 0.
+ // exchanges one pop() call for 10 elementary operations
+ // saving about 7 instructions... is there a better way?
+ long twosA=v0 & v1;
+ long ones=v0^v1;
+
+ long u2=ones^v2;
+ long twosB =(ones&v2)|(u2&v3);
+ ones=u2^v3;
+
+ long fours=(twosA&twosB);
+ long twos=twosA^twosB;
+
+ return (pop(fours)<<2)
+ + (pop(twos)<<1)
+ + pop(ones);
+
+ }
+ */
+
+
+ /// <returns> the number of set bits
+ /// </returns>
+ public virtual long Cardinality()
+ {
+ return BitUtil.Pop_array(internalbits, 0, wlen);
+ }
+
+ /// <summary>Returns the popcount or cardinality of the intersection of the two sets.
+ /// Neither set is modified.
+ /// </summary>
+ public static long IntersectionCount(OpenBitSet a, OpenBitSet b)
+ {
+ return BitUtil.Pop_intersect(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
+ }
+
+ /// <summary>Returns the popcount or cardinality of the union of the two sets.
+ /// Neither set is modified.
+ /// </summary>
+ public static long UnionCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.Pop_union(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
+ if (a.wlen < b.wlen)
+ {
+ tot += BitUtil.Pop_array(b.internalbits, a.wlen, b.wlen - a.wlen);
+ }
+ else if (a.wlen > b.wlen)
+ {
+ tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen);
+ }
+ return tot;
+ }
+
+ /// <summary>Returns the popcount or cardinality of "a and not b"
+ /// or "intersection(a, not(b))".
+ /// Neither set is modified.
+ /// </summary>
+ public static long AndNotCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.Pop_andnot(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
+ if (a.wlen > b.wlen)
+ {
+ tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen);
+ }
+ return tot;
+ }
+
+ /// <summary>Returns the popcount or cardinality of the exclusive-or of the two sets.
+ /// Neither set is modified.
+ /// </summary>
+ public static long XorCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.Pop_xor(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
+ if (a.wlen < b.wlen)
+ {
+ tot += BitUtil.Pop_array(b.internalbits, a.wlen, b.wlen - a.wlen);
+ }
+ else if (a.wlen > b.wlen)
+ {
+ tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen);
+ }
+ return tot;
+ }
+
+
+ /// <summary>Returns the index of the first set bit starting at the index specified.
+ /// -1 is returned if there are no more set bits.
+ /// </summary>
+ public virtual int NextSetBit(int index)
+ {
+ int i = index >> 6;
+ if (i >= wlen)
+ return - 1;
+ int subIndex = index & 0x3f; // index within the word
+ long word = internalbits[i] >> subIndex; // skip all the bits to the right of index
+
+ if (word != 0)
+ {
+ return (i << 6) + subIndex + BitUtil.Ntz(word);
+ }
+
+ while (++i < wlen)
+ {
+ word = internalbits[i];
+ if (word != 0)
+ return (i << 6) + BitUtil.Ntz(word);
+ }
+
+ return - 1;
+ }
+
+ /// <summary>Returns the index of the first set bit starting at the index specified.
+ /// -1 is returned if there are no more set bits.
+ /// </summary>
+ public virtual long NextSetBit(long index)
+ {
+ int i = (int) (index >> 6);
+ if (i >= wlen)
+ return - 1;
+ int subIndex = (int) index & 0x3f; // index within the word
+ long word = (long) ((ulong) internalbits[i] >> subIndex); // skip all the bits to the right of index
+
+ if (word != 0)
+ {
+ return (((long) i) << 6) + (subIndex + BitUtil.Ntz(word));
+ }
+
+ while (++i < wlen)
+ {
+ word = internalbits[i];
+ if (word != 0)
+ return (((long) i) << 6) + BitUtil.Ntz(word);
+ }
+
+ return - 1;
+ }
+
+
+
+
+ public virtual System.Object Clone()
+ {
+ try
+ {
+ OpenBitSet obs = new OpenBitSet((long[]) internalbits.Clone(), wlen);
+ //obs.bits = new long[obs.bits.Length];
+ //obs.bits.CopyTo(obs.bits, 0); // hopefully an array clone is as fast(er) than arraycopy
+ return obs;
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException(e.Message, e);
+ }
+ }
+
+ /// <summary>this = this AND other </summary>
+ public virtual void Intersect(OpenBitSet other)
+ {
+ int newLen = System.Math.Min(this.wlen, other.wlen);
+ long[] thisArr = this.internalbits;
+ long[] otherArr = other.internalbits;
+ // testing against zero can be more efficient
+ int pos = newLen;
+ while (--pos >= 0)
+ {
+ thisArr[pos] &= otherArr[pos];
+ }
+ if (this.wlen > newLen)
+ {
+ // fill zeros from the new shorter length to the old length
+ for (int i = newLen; i < this.wlen; i++)
+ internalbits[i] = 0L;
+ }
+ this.wlen = newLen;
+ }
+
+ /// <summary>this = this OR other </summary>
+ public virtual void Union(OpenBitSet other)
+ {
+ int newLen = System.Math.Max(wlen, other.wlen);
+ EnsureCapacityWords(newLen);
+
+ long[] thisArr = this.internalbits;
+ long[] otherArr = other.internalbits;
+ int pos = System.Math.Min(wlen, other.wlen);
+ while (--pos >= 0)
+ {
+ thisArr[pos] |= otherArr[pos];
+ }
+ if (this.wlen < newLen)
+ {
+ Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+ }
+ this.wlen = newLen;
+ }
+
+
+ /// <summary>Remove all elements set in other. this = this AND_NOT other </summary>
+ public virtual void Remove(OpenBitSet other)
+ {
+ int idx = System.Math.Min(wlen, other.wlen);
+ long[] thisArr = this.internalbits;
+ long[] otherArr = other.internalbits;
+ while (--idx >= 0)
+ {
+ thisArr[idx] &= ~ otherArr[idx];
+ }
+ }
+
+ /// <summary>this = this XOR other </summary>
+ public virtual void Xor(OpenBitSet other)
+ {
+ int newLen = System.Math.Max(wlen, other.wlen);
+ EnsureCapacityWords(newLen);
+
+ long[] thisArr = this.internalbits;
+ long[] otherArr = other.internalbits;
+ int pos = System.Math.Min(wlen, other.wlen);
+ while (--pos >= 0)
+ {
+ thisArr[pos] ^= otherArr[pos];
+ }
+ if (this.wlen < newLen)
+ {
+ Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+ }
+ this.wlen = newLen;
+ }
+
+
+ // some BitSet compatability methods
+
+ //* see <see cref="intersect" /> */
+ public virtual void And(OpenBitSet other)
+ {
+ Intersect(other);
+ }
+
+ //* see <see cref="union" /> */
+ public virtual void Or(OpenBitSet other)
+ {
+ Union(other);
+ }
+
+ //* see <see cref="andNot" /> */
+ public virtual void AndNot(OpenBitSet other)
+ {
+ Remove(other);
+ }
+
+ /// <summary>returns true if the sets have any elements in common </summary>
+ public virtual bool Intersects(OpenBitSet other)
+ {
+ int pos = System.Math.Min(this.wlen, other.wlen);
+ long[] thisArr = this.internalbits;
+ long[] otherArr = other.internalbits;
+ while (--pos >= 0)
+ {
+ if ((thisArr[pos] & otherArr[pos]) != 0)
+ return true;
+ }
+ return false;
+ }
+
+
+
+ /// <summary>Expand the long[] with the size given as a number of words (64 bit longs).
+ /// getNumWords() is unchanged by this call.
+ /// </summary>
+ public virtual void EnsureCapacityWords(int numWords)
+ {
+ if (internalbits.Length < numWords)
+ {
+ internalbits = ArrayUtil.Grow(internalbits, numWords);
+ }
+ }
+
+ /// <summary>Ensure that the long[] is big enough to hold numBits, expanding it if necessary.
+ /// getNumWords() is unchanged by this call.
+ /// </summary>
+ public virtual void EnsureCapacity(long numBits)
+ {
+ EnsureCapacityWords(Bits2words(numBits));
+ }
+
+ /// <summary>Lowers numWords, the number of words in use,
+ /// by checking for trailing zero words.
+ /// </summary>
+ public virtual void TrimTrailingZeros()
+ {
+ int idx = wlen - 1;
+ while (idx >= 0 && internalbits[idx] == 0)
+ idx--;
+ wlen = idx + 1;
+ }
+
+ /// <summary>returns the number of 64 bit words it would take to hold numBits </summary>
+ public static int Bits2words(long numBits)
+ {
+ return (int) ((((numBits - 1) >> 6)) + 1);
+ }
+
+
+ /// <summary>returns true if both sets have the same bits set </summary>
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is OpenBitSet))
+ return false;
+ OpenBitSet a;
+ OpenBitSet b = (OpenBitSet) o;
+ // make a the larger set.
+ if (b.wlen > this.wlen)
+ {
+ a = b; b = this;
+ }
+ else
+ {
+ a = this;
+ }
+
+ // check for any set bits out of the range of b
+ for (int i = a.wlen - 1; i >= b.wlen; i--)
+ {
+ if (a.internalbits[i] != 0)
+ return false;
+ }
+
+ for (int i = b.wlen - 1; i >= 0; i--)
+ {
+ if (a.internalbits[i] != b.internalbits[i])
+ return false;
+ }
+
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ // Start with a zero hash and use a mix that results in zero if the input is zero.
+ // This effectively truncates trailing zeros without an explicit check.
+ long h = 0;
+ for (int i = internalbits.Length; --i >= 0; )
+ {
+ h ^= internalbits[i];
+ h = (h << 1) | (Number.URShift(h, 63)); // rotate left
+ }
+ // fold leftmost bits into right and add a constant to prevent
+ // empty sets from returning 0, which is too common.
+ return (int)(((h >> 32) ^ h) + 0x98761234);
+ }
+
+
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/OpenBitSetDISI.cs b/src/core/Util/OpenBitSetDISI.cs
new file mode 100644
index 0000000..41d9fa9
--- /dev/null
+++ b/src/core/Util/OpenBitSetDISI.cs
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+
+ [Serializable]
+ public class OpenBitSetDISI:OpenBitSet
+ {
+
+ /// <summary>Construct an OpenBitSetDISI with its bits set
+ /// from the doc ids of the given DocIdSetIterator.
+ /// Also give a maximum size one larger than the largest doc id for which a
+ /// bit may ever be set on this OpenBitSetDISI.
+ /// </summary>
+ public OpenBitSetDISI(DocIdSetIterator disi, int maxSize):base(maxSize)
+ {
+ InPlaceOr(disi);
+ }
+
+ /// <summary>Construct an OpenBitSetDISI with no bits set, and a given maximum size
+ /// one larger than the largest doc id for which a bit may ever be set
+ /// on this OpenBitSetDISI.
+ /// </summary>
+ public OpenBitSetDISI(int maxSize):base(maxSize)
+ {
+ }
+
+ /// <summary> Perform an inplace OR with the doc ids from a given DocIdSetIterator,
+ /// setting the bit for each such doc id.
+ /// These doc ids should be smaller than the maximum size passed to the
+ /// constructor.
+ /// </summary>
+ public virtual void InPlaceOr(DocIdSetIterator disi)
+ {
+ int doc;
+ long size = Size();
+ while ((doc = disi.NextDoc()) < size)
+ {
+ FastSet(doc);
+ }
+ }
+
+ /// <summary> Perform an inplace AND with the doc ids from a given DocIdSetIterator,
+ /// leaving only the bits set for which the doc ids are in common.
+ /// These doc ids should be smaller than the maximum size passed to the
+ /// constructor.
+ /// </summary>
+ public virtual void InPlaceAnd(DocIdSetIterator disi)
+ {
+ int bitSetDoc = NextSetBit(0);
+ int disiDoc;
+ while (bitSetDoc != - 1 && (disiDoc = disi.Advance(bitSetDoc)) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ Clear(bitSetDoc, disiDoc);
+ bitSetDoc = NextSetBit(disiDoc + 1);
+ }
+ if (bitSetDoc != - 1)
+ {
+ Clear(bitSetDoc, Size());
+ }
+ }
+
+ /// <summary> Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
+ /// clearing all the bits for each such doc id.
+ /// These doc ids should be smaller than the maximum size passed to the
+ /// constructor.
+ /// </summary>
+ public virtual void InPlaceNot(DocIdSetIterator disi)
+ {
+ int doc;
+ long size = Size();
+ while ((doc = disi.NextDoc()) < size)
+ {
+ FastClear(doc);
+ }
+ }
+
+ /// <summary> Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
+ /// flipping all the bits for each such doc id.
+ /// These doc ids should be smaller than the maximum size passed to the
+ /// constructor.
+ /// </summary>
+ public virtual void InPlaceXor(DocIdSetIterator disi)
+ {
+ int doc;
+ long size = Size();
+ while ((doc = disi.NextDoc()) < size)
+ {
+ FastFlip(doc);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/OpenBitSetIterator.cs b/src/core/Util/OpenBitSetIterator.cs
new file mode 100644
index 0000000..110dba6
--- /dev/null
+++ b/src/core/Util/OpenBitSetIterator.cs
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary>An iterator to iterate over set bits in an OpenBitSet.
+ /// This is faster than nextSetBit() for iterating over the complete set of bits,
+ /// especially when the density of the bits set is high.
+ ///
+ /// </summary>
+ /// <version> $Id$
+ /// </version>
+ public class OpenBitSetIterator:DocIdSetIterator
+ {
+
+ // The General Idea: instead of having an array per byte that has
+ // the offsets of the next set bit, that array could be
+ // packed inside a 32 bit integer (8 4 bit numbers). That
+ // should be faster than accessing an array for each index, and
+ // the total array size is kept smaller (256*sizeof(int))=1K
+ // NOTE: Removed protected access for CLS-Compliance
+ /*protected*/ internal static readonly uint[] bitlist = new uint[]
+ {
+ 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41,
+ 0x42, 0x421, 0x43, 0x431, 0x432, 0x4321, 0x5, 0x51,
+ 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, 0x54, 0x541,
+ 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6,
+ 0x61, 0x62, 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64,
+ 0x641, 0x642, 0x6421, 0x643, 0x6431, 0x6432, 0x64321,
+ 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532,
+ 0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543,
+ 0x65431, 0x65432, 0x654321, 0x7, 0x71, 0x72, 0x721,
+ 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421,
+ 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752,
+ 0x7521, 0x753, 0x7531, 0x7532, 0x75321, 0x754, 0x7541,
+ 0x7542, 0x75421, 0x7543, 0x75431, 0x75432, 0x754321,
+ 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632,
+ 0x76321, 0x764, 0x7641, 0x7642, 0x76421, 0x7643,
+ 0x76431, 0x76432, 0x764321, 0x765, 0x7651, 0x7652,
+ 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654,
+ 0x76541, 0x76542, 0x765421, 0x76543, 0x765431, 0x765432
+ , 0x7654321, 0x8, 0x81, 0x82, 0x821, 0x83, 0x831, 0x832
+ , 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431,
+ 0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853,
+ 0x8531, 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421
+ , 0x8543, 0x85431, 0x85432, 0x854321, 0x86, 0x861,
+ 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864,
+ 0x8641, 0x8642, 0x86421, 0x8643, 0x86431, 0x86432,
+ 0x864321, 0x865, 0x8651, 0x8652, 0x86521, 0x8653,
+ 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, 0x86542,
+ 0x865421, 0x86543, 0x865431, 0x865432, 0x8654321, 0x87,
+ 0x871, 0x872, 0x8721, 0x873, 0x8731, 0x8732, 0x87321,
+ 0x874, 0x8741, 0x8742, 0x87421, 0x8743, 0x87431,
+ 0x87432, 0x874321, 0x875, 0x8751, 0x8752, 0x87521,
+ 0x8753, 0x87531, 0x87532, 0x875321, 0x8754, 0x87541,
+ 0x87542, 0x875421, 0x87543, 0x875431, 0x875432,
+ 0x8754321, 0x876, 0x8761, 0x8762, 0x87621, 0x8763,
+ 0x87631, 0x87632, 0x876321, 0x8764, 0x87641, 0x87642,
+ 0x876421, 0x87643, 0x876431, 0x876432, 0x8764321,
+ 0x8765, 0x87651, 0x87652, 0x876521, 0x87653, 0x876531,
+ 0x876532, 0x8765321, 0x87654,
+ 0x876541, 0x876542, 0x8765421, 0x876543, 0x8765431,
+ 0x8765432, 0x87654321
+ };
+ /// <summary>** the python code that generated bitlist
+ /// def bits2int(val):
+ /// arr=0
+ /// for shift in range(8,0,-1):
+ /// if val &amp; 0x80:
+ /// arr = (arr &lt;&lt; 4) | shift
+ /// val = val &lt;&lt; 1
+ /// return arr
+ /// def int_table():
+ /// tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
+ /// return ','.join(tbl)
+ /// ****
+ /// </summary>
+
+ // hmmm, what about an iterator that finds zeros though,
+ // or a reverse iterator... should they be separate classes
+ // for efficiency, or have a common root interface? (or
+ // maybe both? could ask for a SetBitsIterator, etc...
+
+ private readonly long[] arr;
+ private readonly int words;
+ private int i = - 1;
+ private long word;
+ private int wordShift;
+ private int indexArray;
+ private int curDocId = - 1;
+
+ public OpenBitSetIterator(OpenBitSet obs):this(obs.Bits, obs.NumWords)
+ {
+ }
+
+ public OpenBitSetIterator(long[] bits, int numWords)
+ {
+ arr = bits;
+ words = numWords;
+ }
+
+ // 64 bit shifts
+ private void Shift()
+ {
+ if ((int) word == 0)
+ {
+ wordShift += 32; word = (long) ((ulong) word >> 32);
+ }
+ if ((word & 0x0000FFFF) == 0)
+ {
+ wordShift += 16; word = (long) ((ulong) word >> 16);
+ }
+ if ((word & 0x000000FF) == 0)
+ {
+ wordShift += 8; word = (long) ((ulong) word >> 8);
+ }
+ indexArray = (int) bitlist[word & 0xff];
+ }
+
+ /*/// <summary>** alternate shift implementations
+ /// // 32 bit shifts, but a long shift needed at the end
+ /// private void shift2() {
+ /// int y = (int)word;
+ /// if (y==0) {wordShift +=32; y = (int)(word >>>32); }
+ /// if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
+ /// if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
+ /// indexArray = bitlist[y & 0xff];
+ /// word >>>= (wordShift +1);
+ /// }
+ /// private void shift3() {
+ /// int lower = (int)word;
+ /// int lowByte = lower & 0xff;
+ /// if (lowByte != 0) {
+ /// indexArray=bitlist[lowByte];
+ /// return;
+ /// }
+ /// shift();
+ /// }
+ /// ****
+ /// </summary>*/
+
+ public override int NextDoc()
+ {
+ if (indexArray == 0)
+ {
+ if (word != 0)
+ {
+ word = (long) ((ulong) word >> 8);
+ wordShift += 8;
+ }
+
+ while (word == 0)
+ {
+ if (++i >= words)
+ {
+ return curDocId = NO_MORE_DOCS;
+ }
+ word = arr[i];
+ wordShift = - 1; // loop invariant code motion should move this
+ }
+
+ // after the first time, should I go with a linear search, or
+ // stick with the binary search in shift?
+ Shift();
+ }
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ indexArray = (int) ((uint) indexArray >> 4);
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ return curDocId = (i << 6) + bitIndex;
+ }
+
+ public override int Advance(int target)
+ {
+ indexArray = 0;
+ i = target >> 6;
+ if (i >= words)
+ {
+ word = 0; // setup so next() will also return -1
+ return curDocId = NO_MORE_DOCS;
+ }
+ wordShift = target & 0x3f;
+ word = (long) ((ulong) arr[i] >> wordShift);
+ if (word != 0)
+ {
+ wordShift--; // compensate for 1 based arrIndex
+ }
+ else
+ {
+ while (word == 0)
+ {
+ if (++i >= words)
+ {
+ return curDocId = NO_MORE_DOCS;
+ }
+ word = arr[i];
+ }
+ wordShift = - 1;
+ }
+
+ Shift();
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ indexArray = (int) ((uint) indexArray >> 4);
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ return curDocId = (i << 6) + bitIndex;
+ }
+
+ public override int DocID()
+ {
+ return curDocId;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/PriorityQueue.cs b/src/core/Util/PriorityQueue.cs
new file mode 100644
index 0000000..77a682b
--- /dev/null
+++ b/src/core/Util/PriorityQueue.cs
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary>A PriorityQueue maintains a partial ordering of its elements such that the
+ /// least element can always be found in constant time. Put()'s and pop()'s
+ /// require log(size) time.
+ ///
+ /// <p/><b>NOTE</b>: This class pre-allocates a full array of
+ /// length <c>maxSize+1</c>, in <see cref="Initialize" />.
+ ///
+ /// </summary>
+ // TODO: T needs to be able to return null. Behavior might be unexpected otherwise, since it returns default(T)
+ // I only see a non-nullable type used in PriorityQueue in the tests. may be possible to re-write tests to
+ // use an IComparable class, and this can be changed back to constraining on class, to return null, or should
+ // we leave as is?
+ public abstract class PriorityQueue<T> //where T : class
+ {
+ private int size;
+ private int maxSize;
+ protected internal T[] heap;
+
+ /// <summary>Determines the ordering of objects in this priority queue. Subclasses
+ /// must define this one method.
+ /// </summary>
+ public abstract bool LessThan(T a, T b);
+
+ /// <summary> This method can be overridden by extending classes to return a sentinel
+ /// object which will be used by <see cref="Initialize(int)" /> to fill the queue, so
+ /// that the code which uses that queue can always assume it's full and only
+ /// change the top without attempting to insert any new object.<br/>
+ ///
+ /// Those sentinel values should always compare worse than any non-sentinel
+ /// value (i.e., <see cref="LessThan" /> should always favor the
+ /// non-sentinel values).<br/>
+ ///
+ /// By default, this method returns false, which means the queue will not be
+ /// filled with sentinel values. Otherwise, the value returned will be used to
+ /// pre-populate the queue. Adds sentinel values to the queue.<br/>
+ ///
+ /// If this method is extended to return a non-null value, then the following
+ /// usage pattern is recommended:
+ ///
+ /// <code>
+ /// // extends getSentinelObject() to return a non-null value.
+ /// PriorityQueue&lt;MyObject&gt; pq = new MyQueue&lt;MyObject&gt;(numHits);
+ /// // save the 'top' element, which is guaranteed to not be null.
+ /// MyObject pqTop = pq.top();
+ /// &lt;...&gt;
+ /// // now in order to add a new element, which is 'better' than top (after
+ /// // you've verified it is better), it is as simple as:
+ /// pqTop.change().
+ /// pqTop = pq.updateTop();
+ /// </code>
+ ///
+ /// <b>NOTE:</b> if this method returns a non-null value, it will be called by
+ /// <see cref="Initialize(int)" /> <see cref="Size()" /> times, relying on a new object to
+ /// be returned and will not check if it's null again. Therefore you should
+ /// ensure any call to this method creates a new instance and behaves
+ /// consistently, e.g., it cannot return null if it previously returned
+ /// non-null.
+ ///
+ /// </summary>
+ /// <value> the sentinel object to use to pre-populate the queue, or null if sentinel objects are not supported. </value>
+ protected internal virtual T SentinelObject
+ {
+ get { return default(T); }
+ }
+
+ /// <summary>Subclass constructors must call this. </summary>
+ protected internal void Initialize(int maxSize)
+ {
+ size = 0;
+ int heapSize;
+ if (0 == maxSize)
+ // We allocate 1 extra to avoid if statement in top()
+ heapSize = 2;
+ else
+ {
+ if (maxSize == Int32.MaxValue)
+ {
+ // Don't wrap heapSize to -1, in this case, which
+ // causes a confusing NegativeArraySizeException.
+ // Note that very likely this will simply then hit
+ // an OOME, but at least that's more indicative to
+ // caller that this values is too big. We don't +1
+ // in this case, but it's very unlikely in practice
+ // one will actually insert this many objects into
+ // the PQ:
+ heapSize = Int32.MaxValue;
+ }
+ else
+ {
+ // NOTE: we add +1 because all access to heap is
+ // 1-based not 0-based. heap[0] is unused.
+ heapSize = maxSize + 1;
+ }
+ }
+ heap = new T[heapSize];
+ this.maxSize = maxSize;
+
+ // If sentinel objects are supported, populate the queue with them
+ T sentinel = SentinelObject;
+ if (sentinel != null)
+ {
+ heap[1] = sentinel;
+ for (int i = 2; i < heap.Length; i++)
+ {
+ heap[i] = SentinelObject;
+ }
+ size = maxSize;
+ }
+ }
+
+ /// <summary>
+ /// Adds an Object to a PriorityQueue in log(size) time. If one tries to add
+ /// more objects than maxSize from initialize an
+ /// <see cref="System.IndexOutOfRangeException" /> is thrown.
+ /// </summary>
+ /// <returns> the new 'top' element in the queue.
+ /// </returns>
+ public T Add(T element)
+ {
+ size++;
+ heap[size] = element;
+ UpHeap();
+ return heap[1];
+ }
+
+ /// <summary> Adds an Object to a PriorityQueue in log(size) time.
+ /// It returns the object (if any) that was
+ /// dropped off the heap because it was full. This can be
+ /// the given parameter (in case it is smaller than the
+ /// full heap's minimum, and couldn't be added), or another
+ /// object that was previously the smallest value in the
+ /// heap and now has been replaced by a larger one, or null
+ /// if the queue wasn't yet full with maxSize elements.
+ /// </summary>
+ public virtual T InsertWithOverflow(T element)
+ {
+ if (size < maxSize)
+ {
+ Add(element);
+ return default(T);
+ }
+ else if (size > 0 && !LessThan(element, heap[1]))
+ {
+ T ret = heap[1];
+ heap[1] = element;
+ UpdateTop();
+ return ret;
+ }
+ else
+ {
+ return element;
+ }
+ }
+
+ /// <summary>Returns the least element of the PriorityQueue in constant time. </summary>
+ public T Top()
+ {
+ // We don't need to check size here: if maxSize is 0,
+ // then heap is length 2 array with both entries null.
+ // If size is 0 then heap[1] is already null.
+ return heap[1];
+ }
+
+ /// <summary>
+ /// Removes and returns the least element of the
+ /// PriorityQueue in log(size) time.
+ /// </summary>
+ public T Pop()
+ {
+ if (size > 0)
+ {
+ T result = heap[1]; // save first value
+ heap[1] = heap[size]; // move last to first
+ heap[size] = default(T); // permit GC of objects
+ size--;
+ DownHeap(); // adjust heap
+ return result;
+ }
+ else
+ return default(T);
+ }
+
+ /// <summary> Should be called when the Object at top changes values.
+ /// Still log(n) worst case, but it's at least twice as fast to
+ /// <code>
+ /// pq.top().change();
+ /// pq.updateTop();
+ /// </code>
+ /// instead of
+ /// <code>
+ /// o = pq.pop();
+ /// o.change();
+ /// pq.push(o);
+ /// </code>
+ /// </summary>
+ /// <returns> the new 'top' element.</returns>
+ public T UpdateTop()
+ {
+ DownHeap();
+ return heap[1];
+ }
+
+ /// <summary>Returns the number of elements currently stored in the PriorityQueue. </summary>
+ public int Size()
+ {
+ return size;
+ }
+
+ /// <summary>Removes all entries from the PriorityQueue. </summary>
+ public void Clear()
+ {
+ for (int i = 0; i <= size; i++)
+ {
+ heap[i] = default(T);
+ }
+ size = 0;
+ }
+
+ private void UpHeap()
+ {
+ int i = size;
+ T node = heap[i]; // save bottom node
+ int j = Number.URShift(i, 1);
+ while (j > 0 && LessThan(node, heap[j]))
+ {
+ heap[i] = heap[j]; // shift parents down
+ i = j;
+ j = Number.URShift(j, 1);
+ }
+ heap[i] = node; // install saved node
+ }
+
+ private void DownHeap()
+ {
+ int i = 1;
+ T node = heap[i]; // save top node
+ int j = i << 1; // find smaller child
+ int k = j + 1;
+ if (k <= size && LessThan(heap[k], heap[j]))
+ {
+ j = k;
+ }
+ while (j <= size && LessThan(heap[j], node))
+ {
+ heap[i] = heap[j]; // shift up child
+ i = j;
+ j = i << 1;
+ k = j + 1;
+ if (k <= size && LessThan(heap[k], heap[j]))
+ {
+ j = k;
+ }
+ }
+ heap[i] = node; // install saved node
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/RamUsageEstimator.cs b/src/core/Util/RamUsageEstimator.cs
new file mode 100644
index 0000000..343f7bb
--- /dev/null
+++ b/src/core/Util/RamUsageEstimator.cs
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Estimates the size of a given Object using a given MemoryModel for primitive
+ /// size information.
+ ///
+ /// Resource Usage:
+ ///
+ /// Internally uses a Map to temporally hold a reference to every
+ /// object seen.
+ ///
+ /// If checkIntered, all Strings checked will be interned, but those
+ /// that were not already interned will be released for GC when the
+ /// estimate is complete.
+ /// </summary>
+ public sealed class RamUsageEstimator
+ {
+ private MemoryModel memoryModel;
+
+ private IDictionary<object, object> seen;
+
+ private int refSize;
+ private int arraySize;
+ private int classSize;
+
+ private bool checkInterned;
+
+ /// <summary> Constructs this object with an AverageGuessMemoryModel and
+ /// checkInterned = true.
+ /// </summary>
+ public RamUsageEstimator():this(new AverageGuessMemoryModel())
+ {
+ }
+
+ /// <param name="checkInterned">check if Strings are interned and don't add to size
+ /// if they are. Defaults to true but if you know the objects you are checking
+ /// won't likely contain many interned Strings, it will be faster to turn off
+ /// intern checking.
+ /// </param>
+ public RamUsageEstimator(bool checkInterned):this(new AverageGuessMemoryModel(), checkInterned)
+ {
+ }
+
+ /// <param name="memoryModel">MemoryModel to use for primitive object sizes.
+ /// </param>
+ public RamUsageEstimator(MemoryModel memoryModel):this(memoryModel, true)
+ {
+ }
+
+ /// <param name="memoryModel">MemoryModel to use for primitive object sizes.
+ /// </param>
+ /// <param name="checkInterned">check if Strings are interned and don't add to size
+ /// if they are. Defaults to true but if you know the objects you are checking
+ /// won't likely contain many interned Strings, it will be faster to turn off
+ /// intern checking.
+ /// </param>
+ public RamUsageEstimator(MemoryModel memoryModel, bool checkInterned)
+ {
+ this.memoryModel = memoryModel;
+ this.checkInterned = checkInterned;
+ // Use Map rather than Set so that we can use an IdentityHashMap - not
+ // seeing an IdentityHashSet
+ seen = new IdentityDictionary<object, object>(64);
+ this.refSize = memoryModel.ReferenceSize;
+ this.arraySize = memoryModel.ArraySize;
+ this.classSize = memoryModel.ClassSize;
+ }
+
+ public long EstimateRamUsage(System.Object obj)
+ {
+ long size = Size(obj);
+ seen.Clear();
+ return size;
+ }
+
+ private long Size(System.Object obj)
+ {
+ if (obj == null)
+ {
+ return 0;
+ }
+ // interned not part of this object
+ if (checkInterned && obj is System.String && obj == (System.Object) String.Intern(((System.String) obj)))
+ {
+ // interned string will be eligible
+ // for GC on
+ // estimateRamUsage(Object) return
+ return 0;
+ }
+
+ // skip if we have seen before
+ if (seen.ContainsKey(obj))
+ {
+ return 0;
+ }
+
+ // add to seen
+ seen[obj] = null;
+
+ System.Type clazz = obj.GetType();
+ if (clazz.IsArray)
+ {
+ return SizeOfArray(obj);
+ }
+
+ long size = 0;
+
+ // walk type hierarchy
+ while (clazz != null)
+ {
+ System.Reflection.FieldInfo[] fields = clazz.GetFields(System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Static);
+ for (int i = 0; i < fields.Length; i++)
+ {
+ if (fields[i].IsStatic)
+ {
+ continue;
+ }
+
+ if (fields[i].FieldType.IsPrimitive)
+ {
+ size += memoryModel.GetPrimitiveSize(fields[i].FieldType);
+ }
+ else
+ {
+ size += refSize;
+ fields[i].GetType();
+ try
+ {
+ System.Object value_Renamed = fields[i].GetValue(obj);
+ if (value_Renamed != null)
+ {
+ size += Size(value_Renamed);
+ }
+ }
+ catch (System.UnauthorizedAccessException)
+ {
+ // ignore for now?
+ }
+ }
+ }
+ clazz = clazz.BaseType;
+ }
+ size += classSize;
+ return size;
+ }
+
+ private long SizeOfArray(System.Object obj)
+ {
+ int len = ((System.Array) obj).Length;
+ if (len == 0)
+ {
+ return 0;
+ }
+ long size = arraySize;
+ System.Type arrayElementClazz = obj.GetType().GetElementType();
+ if (arrayElementClazz.IsPrimitive)
+ {
+ size += len * memoryModel.GetPrimitiveSize(arrayElementClazz);
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ size += refSize + Size(((System.Array) obj).GetValue(i));
+ }
+ }
+
+ return size;
+ }
+
+ private const long ONE_KB = 1024;
+ private static readonly long ONE_MB = ONE_KB * ONE_KB;
+ private static readonly long ONE_GB = ONE_KB * ONE_MB;
+
+ /// <summary> Return good default units based on byte size.</summary>
+ public static System.String HumanReadableUnits(long bytes, System.IFormatProvider df)
+ {
+ System.String newSizeAndUnits;
+
+ if (bytes / ONE_GB > 0)
+ {
+ newSizeAndUnits = System.Convert.ToString(((float) bytes / ONE_GB), df) + " GB";
+ }
+ else if (bytes / ONE_MB > 0)
+ {
+ newSizeAndUnits = System.Convert.ToString((float) bytes / ONE_MB, df) + " MB";
+ }
+ else if (bytes / ONE_KB > 0)
+ {
+ newSizeAndUnits = System.Convert.ToString((float) bytes / ONE_KB, df) + " KB";
+ }
+ else
+ {
+ newSizeAndUnits = System.Convert.ToString(bytes) + " bytes";
+ }
+
+ return newSizeAndUnits;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/ReaderUtil.cs b/src/core/Util/ReaderUtil.cs
new file mode 100644
index 0000000..7c716eb
--- /dev/null
+++ b/src/core/Util/ReaderUtil.cs
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using IndexReader = Lucene.Net.Index.IndexReader;
+
+namespace Lucene.Net.Util
+{
+ /// <summary>
+ /// Common util methods for dealing with <see cref="IndexReader" />s.
+ /// </summary>
+ public class ReaderUtil
+ {
+ /// <summary>Gathers sub-readers from reader into a List.</summary>
+ /// <param name="allSubReaders"></param>
+ /// <param name="reader"></param>
+ public static void GatherSubReaders(System.Collections.Generic.IList<IndexReader> allSubReaders, IndexReader reader)
+ {
+ IndexReader[] subReaders = reader.GetSequentialSubReaders();
+ if (subReaders == null)
+ {
+ // Add the reader itself, and do not recurse
+ allSubReaders.Add(reader);
+ }
+ else
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ GatherSubReaders(allSubReaders, subReaders[i]);
+ }
+ }
+ }
+
+ /// <summary> Returns sub IndexReader that contains the given document id.
+ ///
+ /// </summary>
+ /// <param name="doc">id of document
+ /// </param>
+ /// <param name="reader">parent reader
+ /// </param>
+ /// <returns> sub reader of parent which contains the specified doc id
+ /// </returns>
+ public static IndexReader SubReader(int doc, IndexReader reader)
+ {
+ var subReadersList = new System.Collections.Generic.List<IndexReader>();
+ ReaderUtil.GatherSubReaders(subReadersList, reader);
+ IndexReader[] subReaders = subReadersList.ToArray();
+ int[] docStarts = new int[subReaders.Length];
+ int maxDoc = 0;
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ docStarts[i] = maxDoc;
+ maxDoc += subReaders[i].MaxDoc;
+ }
+ return subReaders[ReaderUtil.SubIndex(doc, docStarts)];
+ }
+
+ /// <summary> Returns sub-reader subIndex from reader.
+ ///
+ /// </summary>
+ /// <param name="reader">parent reader
+ /// </param>
+ /// <param name="subIndex">index of desired sub reader
+ /// </param>
+ /// <returns> the subreader at subINdex
+ /// </returns>
+ public static IndexReader SubReader(IndexReader reader, int subIndex)
+ {
+ var subReadersList = new System.Collections.Generic.List<IndexReader>();
+ ReaderUtil.GatherSubReaders(subReadersList, reader);
+ IndexReader[] subReaders = subReadersList.ToArray();
+ return subReaders[subIndex];
+ }
+
+
+ /// <summary> Returns index of the searcher/reader for document <c>n</c> in the
+ /// array used to construct this searcher/reader.
+ /// </summary>
+ public static int SubIndex(int n, int[] docStarts)
+ {
+ // find
+ // searcher/reader for doc n:
+ int size = docStarts.Length;
+ int lo = 0; // search starts array
+ int hi = size - 1; // for first element less than n, return its index
+ while (hi >= lo)
+ {
+ int mid = Number.URShift((lo + hi), 1);
+ int midValue = docStarts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else
+ {
+ // found a match
+ while (mid + 1 < size && docStarts[mid + 1] == midValue)
+ {
+ mid++; // scan to last match
+ }
+ return mid;
+ }
+ }
+ return hi;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/ScorerDocQueue.cs b/src/core/Util/ScorerDocQueue.cs
new file mode 100644
index 0000000..ee6c259
--- /dev/null
+++ b/src/core/Util/ScorerDocQueue.cs
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* Derived from Lucene.Net.Util.PriorityQueue of March 2005 */
+using System;
+using Lucene.Net.Support;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+using Scorer = Lucene.Net.Search.Scorer;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary>A ScorerDocQueue maintains a partial ordering of its Scorers such that the
+ /// least Scorer can always be found in constant time. Put()'s and pop()'s
+ /// require log(size) time. The ordering is by Scorer.doc().
+ /// </summary>
+ public class ScorerDocQueue
+ {
+ // later: SpansQueue for spans with doc and term positions
+ private HeapedScorerDoc[] heap;
+ private int maxSize;
+ private int size;
+
+ private class HeapedScorerDoc
+ {
+ private void InitBlock(ScorerDocQueue enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ScorerDocQueue enclosingInstance;
+ public ScorerDocQueue Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal Scorer scorer;
+ internal int doc;
+
+ internal HeapedScorerDoc(ScorerDocQueue enclosingInstance, Scorer s):this(enclosingInstance, s, s.DocID())
+ {
+ }
+
+ internal HeapedScorerDoc(ScorerDocQueue enclosingInstance, Scorer scorer, int doc)
+ {
+ InitBlock(enclosingInstance);
+ this.scorer = scorer;
+ this.doc = doc;
+ }
+
+ internal virtual void Adjust()
+ {
+ doc = scorer.DocID();
+ }
+ }
+
+ private HeapedScorerDoc topHSD; // same as heap[1], only for speed
+
+ /// <summary>Create a ScorerDocQueue with a maximum size. </summary>
+ public ScorerDocQueue(int maxSize)
+ {
+ // assert maxSize >= 0;
+ size = 0;
+ int heapSize = maxSize + 1;
+ heap = new HeapedScorerDoc[heapSize];
+ this.maxSize = maxSize;
+ topHSD = heap[1]; // initially null
+ }
+
+ /// <summary> Adds a Scorer to a ScorerDocQueue in log(size) time.
+ /// If one tries to add more Scorers than maxSize
+ /// a RuntimeException (ArrayIndexOutOfBound) is thrown.
+ /// </summary>
+ public void Put(Scorer scorer)
+ {
+ size++;
+ heap[size] = new HeapedScorerDoc(this, scorer);
+ UpHeap();
+ }
+
+ /// <summary> Adds a Scorer to the ScorerDocQueue in log(size) time if either
+ /// the ScorerDocQueue is not full, or not lessThan(scorer, top()).
+ /// </summary>
+ /// <param name="scorer">
+ /// </param>
+ /// <returns> true if scorer is added, false otherwise.
+ /// </returns>
+ public virtual bool Insert(Scorer scorer)
+ {
+ if (size < maxSize)
+ {
+ Put(scorer);
+ return true;
+ }
+ else
+ {
+ int docNr = scorer.DocID();
+ if ((size > 0) && (!(docNr < topHSD.doc)))
+ {
+ // heap[1] is top()
+ heap[1] = new HeapedScorerDoc(this, scorer, docNr);
+ DownHeap();
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ /// <summary>Returns the least Scorer of the ScorerDocQueue in constant time.
+ /// Should not be used when the queue is empty.
+ /// </summary>
+ public Scorer Top()
+ {
+ // assert size > 0;
+ return topHSD.scorer;
+ }
+
+ /// <summary>Returns document number of the least Scorer of the ScorerDocQueue
+ /// in constant time.
+ /// Should not be used when the queue is empty.
+ /// </summary>
+ public int TopDoc()
+ {
+ // assert size > 0;
+ return topHSD.doc;
+ }
+
+ public float TopScore()
+ {
+ // assert size > 0;
+ return topHSD.scorer.Score();
+ }
+
+ public bool TopNextAndAdjustElsePop()
+ {
+ return CheckAdjustElsePop(topHSD.scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ }
+
+ public bool TopSkipToAndAdjustElsePop(int target)
+ {
+ return CheckAdjustElsePop(topHSD.scorer.Advance(target) != DocIdSetIterator.NO_MORE_DOCS);
+ }
+
+ private bool CheckAdjustElsePop(bool cond)
+ {
+ if (cond)
+ {
+ // see also adjustTop
+ topHSD.doc = topHSD.scorer.DocID();
+ }
+ else
+ {
+ // see also popNoResult
+ heap[1] = heap[size]; // move last to first
+ heap[size] = null;
+ size--;
+ }
+ DownHeap();
+ return cond;
+ }
+
+ /// <summary>Removes and returns the least scorer of the ScorerDocQueue in log(size)
+ /// time.
+ /// Should not be used when the queue is empty.
+ /// </summary>
+ public Scorer Pop()
+ {
+ // assert size > 0;
+ Scorer result = topHSD.scorer;
+ PopNoResult();
+ return result;
+ }
+
+ /// <summary>Removes the least scorer of the ScorerDocQueue in log(size) time.
+ /// Should not be used when the queue is empty.
+ /// </summary>
+ private void PopNoResult()
+ {
+ heap[1] = heap[size]; // move last to first
+ heap[size] = null;
+ size--;
+ DownHeap(); // adjust heap
+ }
+
+ /// <summary>Should be called when the scorer at top changes doc() value.
+ /// Still log(n) worst case, but it's at least twice as fast to <c>
+ /// { pq.top().change(); pq.adjustTop(); }
+ /// </c> instead of <c>
+ /// { o = pq.pop(); o.change(); pq.push(o); }
+ /// </c>
+ /// </summary>
+ public void AdjustTop()
+ {
+ // assert size > 0;
+ topHSD.Adjust();
+ DownHeap();
+ }
+
+ /// <summary>Returns the number of scorers currently stored in the ScorerDocQueue. </summary>
+ public int Size()
+ {
+ return size;
+ }
+
+ /// <summary>Removes all entries from the ScorerDocQueue. </summary>
+ public void Clear()
+ {
+ for (int i = 0; i <= size; i++)
+ {
+ heap[i] = null;
+ }
+ size = 0;
+ }
+
+ private void UpHeap()
+ {
+ int i = size;
+ HeapedScorerDoc node = heap[i]; // save bottom node
+ int j = Number.URShift(i, 1);
+ while ((j > 0) && (node.doc < heap[j].doc))
+ {
+ heap[i] = heap[j]; // shift parents down
+ i = j;
+ j = Number.URShift(j, 1);
+ }
+ heap[i] = node; // install saved node
+ topHSD = heap[1];
+ }
+
+ private void DownHeap()
+ {
+ int i = 1;
+ HeapedScorerDoc node = heap[i]; // save top node
+ int j = i << 1; // find smaller child
+ int k = j + 1;
+ if ((k <= size) && (heap[k].doc < heap[j].doc))
+ {
+ j = k;
+ }
+ while ((j <= size) && (heap[j].doc < node.doc))
+ {
+ heap[i] = heap[j]; // shift up child
+ i = j;
+ j = i << 1;
+ k = j + 1;
+ if (k <= size && (heap[k].doc < heap[j].doc))
+ {
+ j = k;
+ }
+ }
+ heap[i] = node; // install saved node
+ topHSD = heap[1];
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/SimpleStringInterner.cs b/src/core/Util/SimpleStringInterner.cs
new file mode 100644
index 0000000..eea707a
--- /dev/null
+++ b/src/core/Util/SimpleStringInterner.cs
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+
+ /// <summary> Simple lockless and memory barrier free String intern cache that is guaranteed
+ /// to return the same String instance as String.intern() does.
+ /// </summary>
+ public class SimpleStringInterner:StringInterner
+ {
+
+ internal /*private*/ class Entry
+ {
+ internal /*private*/ System.String str;
+ internal /*private*/ int hash;
+ internal /*private*/ Entry next;
+ internal Entry(System.String str, int hash, Entry next)
+ {
+ this.str = str;
+ this.hash = hash;
+ this.next = next;
+ }
+ }
+
+ private Entry[] cache;
+ private int maxChainLength;
+
+ /// <param name="tableSize"> Size of the hash table, should be a power of two.
+ /// </param>
+ /// <param name="maxChainLength"> Maximum length of each bucket, after which the oldest item inserted is dropped.
+ /// </param>
+ public SimpleStringInterner(int tableSize, int maxChainLength)
+ {
+ cache = new Entry[System.Math.Max(1, BitUtil.NextHighestPowerOfTwo(tableSize))];
+ this.maxChainLength = System.Math.Max(2, maxChainLength);
+ }
+
+ // @Override
+ public override System.String Intern(System.String s)
+ {
+ int h = s.GetHashCode();
+ // In the future, it may be worth augmenting the string hash
+ // if the lower bits need better distribution.
+ int slot = h & (cache.Length - 1);
+
+ Entry first = this.cache[slot];
+ Entry nextToLast = null;
+
+ int chainLength = 0;
+
+ for (Entry e = first; e != null; e = e.next)
+ {
+ if (e.hash == h && (ReferenceEquals(e.str, s) || String.CompareOrdinal(e.str, s) == 0))
+ {
+ // if (e.str == s || (e.hash == h && e.str.compareTo(s)==0)) {
+ return e.str;
+ }
+
+ chainLength++;
+ if (e.next != null)
+ {
+ nextToLast = e;
+ }
+ }
+
+ // insertion-order cache: add new entry at head
+ s = String.Intern(s);
+ this.cache[slot] = new Entry(s, h, first);
+ if (chainLength >= maxChainLength)
+ {
+ // prune last entry
+ nextToLast.next = null;
+ }
+ return s;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/SmallFloat.cs b/src/core/Util/SmallFloat.cs
new file mode 100644
index 0000000..848fb13
--- /dev/null
+++ b/src/core/Util/SmallFloat.cs
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+
+ /// <summary>Floating point numbers smaller than 32 bits.
+ ///
+ /// </summary>
+ /// <version> $Id$
+ /// </version>
+ public class SmallFloat
+ {
+
+ /// <summary>Converts a 32 bit float to an 8 bit float.
+ /// <br/>Values less than zero are all mapped to zero.
+ /// <br/>Values are truncated (rounded down) to the nearest 8 bit value.
+ /// <br/>Values between zero and the smallest representable value
+ /// are rounded up.
+ ///
+ /// </summary>
+ /// <param name="f">the 32 bit float to be converted to an 8 bit float (byte)
+ /// </param>
+ /// <param name="numMantissaBits">the number of mantissa bits to use in the byte, with the remainder to be used in the exponent
+ /// </param>
+ /// <param name="zeroExp">the zero-point in the range of exponent values
+ /// </param>
+ /// <returns> the 8 bit float representation
+ /// </returns>
+ public static sbyte FloatToByte(float f, int numMantissaBits, int zeroExp)
+ {
+ // Adjustment from a float zero exponent to our zero exponent,
+ // shifted over to our exponent position.
+ int fzero = (63 - zeroExp) << numMantissaBits;
+ int bits = System.BitConverter.ToInt32(System.BitConverter.GetBytes(f), 0);
+ int smallfloat = bits >> (24 - numMantissaBits);
+ if (smallfloat < fzero)
+ {
+ return (bits <= 0)?(sbyte) 0:(sbyte) 1; // underflow is mapped to smallest non-zero number.
+ }
+ else if (smallfloat >= fzero + 0x100)
+ {
+ return - 1; // overflow maps to largest number
+ }
+ else
+ {
+ return (sbyte) (smallfloat - fzero);
+ }
+ }
+
+ /// <summary>Converts an 8 bit float to a 32 bit float. </summary>
+ public static float ByteToFloat(byte b, int numMantissaBits, int zeroExp)
+ {
+ // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
+ // is only a little bit faster (anywhere from 0% to 7%)
+ if (b == 0)
+ return 0.0f;
+ int bits = (b & 0xff) << (24 - numMantissaBits);
+ bits += ((63 - zeroExp) << 24);
+ return BitConverter.ToSingle(BitConverter.GetBytes(bits), 0);
+ }
+
+
+ //
+ // Some specializations of the generic functions follow.
+ // The generic functions are just as fast with current (1.5)
+ // -server JVMs, but still slower with client JVMs.
+ //
+
+ /// <summary>floatToByte(b, mantissaBits=3, zeroExponent=15)
+ /// <br/>smallest non-zero value = 5.820766E-10
+ /// <br/>largest value = 7.5161928E9
+ /// <br/>epsilon = 0.125
+ /// </summary>
+ public static sbyte FloatToByte315(float f)
+ {
+ int bits = System.BitConverter.ToInt32(System.BitConverter.GetBytes(f), 0);
+ int smallfloat = bits >> (24 - 3);
+ if (smallfloat < (63 - 15) << 3)
+ {
+ return (bits <= 0)?(sbyte) 0:(sbyte) 1;
+ }
+ if (smallfloat >= ((63 - 15) << 3) + 0x100)
+ {
+ return - 1;
+ }
+ return (sbyte) (smallfloat - ((63 - 15) << 3));
+ }
+
+ /// <summary>byteToFloat(b, mantissaBits=3, zeroExponent=15) </summary>
+ public static float Byte315ToFloat(byte b)
+ {
+ // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
+ // is only a little bit faster (anywhere from 0% to 7%)
+ if (b == 0)
+ return 0.0f;
+ int bits = (b & 0xff) << (24 - 3);
+ bits += ((63 - 15) << 24);
+ return BitConverter.ToSingle(BitConverter.GetBytes(bits), 0);
+ }
+
+
+ /// <summary>floatToByte(b, mantissaBits=5, zeroExponent=2)
+ /// <br/>smallest nonzero value = 0.033203125
+ /// <br/>largest value = 1984.0
+ /// <br/>epsilon = 0.03125
+ /// </summary>
+ public static sbyte FloatToByte52(float f)
+ {
+ int bits = System.BitConverter.ToInt32(System.BitConverter.GetBytes(f), 0);
+ int smallfloat = bits >> (24 - 5);
+ if (smallfloat < (63 - 2) << 5)
+ {
+ return (bits <= 0)?(sbyte) 0:(sbyte) 1;
+ }
+ if (smallfloat >= ((63 - 2) << 5) + 0x100)
+ {
+ return - 1;
+ }
+ return (sbyte) (smallfloat - ((63 - 2) << 5));
+ }
+
+ /// <summary>byteToFloat(b, mantissaBits=5, zeroExponent=2) </summary>
+ public static float Byte52ToFloat(byte b)
+ {
+ // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup
+ // is only a little bit faster (anywhere from 0% to 7%)
+ if (b == 0)
+ return 0.0f;
+ int bits = (b & 0xff) << (24 - 5);
+ bits += ((63 - 2) << 24);
+ return BitConverter.ToSingle(BitConverter.GetBytes(bits), 0);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/SortedVIntList.cs b/src/core/Util/SortedVIntList.cs
new file mode 100644
index 0000000..5e8e8d4
--- /dev/null
+++ b/src/core/Util/SortedVIntList.cs
@@ -0,0 +1,289 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using DocIdSet = Lucene.Net.Search.DocIdSet;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Stores and iterate on sorted integers in compressed form in RAM. <br/>
+ /// The code for compressing the differences between ascending integers was
+ /// borrowed from <see cref="Lucene.Net.Store.IndexInput" /> and
+ /// <see cref="Lucene.Net.Store.IndexOutput" />.<p/>
+ /// <b>NOTE:</b> this class assumes the stored integers are doc Ids (hence why it
+ /// extends <see cref="DocIdSet" />). Therefore its <see cref="Iterator()" /> assumes <see cref="DocIdSetIterator.NO_MORE_DOCS" />
+ /// can be used as sentinel. If you intent to use
+ /// this value, then make sure it's not used during search flow.
+ /// </summary>
+ public class SortedVIntList:DocIdSet
+ {
+ private class AnonymousClassDocIdSetIterator:DocIdSetIterator
+ {
+ public AnonymousClassDocIdSetIterator(SortedVIntList enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(SortedVIntList enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SortedVIntList enclosingInstance;
+ public SortedVIntList Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal int bytePos = 0;
+ internal int lastInt = 0;
+ internal int doc = - 1;
+
+ private void Advance()
+ {
+ // See Lucene.Net.Store.IndexInput.readVInt()
+ sbyte b = Enclosing_Instance.bytes[bytePos++];
+ lastInt += (b & Lucene.Net.Util.SortedVIntList.VB1);
+ for (int s = Lucene.Net.Util.SortedVIntList.BIT_SHIFT; (b & ~ Lucene.Net.Util.SortedVIntList.VB1) != 0; s += Lucene.Net.Util.SortedVIntList.BIT_SHIFT)
+ {
+ b = Enclosing_Instance.bytes[bytePos++];
+ lastInt += ((b & Lucene.Net.Util.SortedVIntList.VB1) << s);
+ }
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ if (bytePos >= Enclosing_Instance.lastBytePos)
+ {
+ doc = NO_MORE_DOCS;
+ }
+ else
+ {
+ Advance();
+ doc = lastInt;
+ }
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ while (bytePos < Enclosing_Instance.lastBytePos)
+ {
+ Advance();
+ if (lastInt >= target)
+ {
+ return doc = lastInt;
+ }
+ }
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ /// <summary>When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
+ /// a SortedVIntList representing the index numbers of the set bits
+ /// will be smaller than that BitSet.
+ /// </summary>
+ internal const int BITS2VINTLIST_SIZE = 8;
+
+ private int size;
+ private sbyte[] bytes;
+ private int lastBytePos;
+
+ /// <summary> Create a SortedVIntList from all elements of an array of integers.
+ ///
+ /// </summary>
+ /// <param name="sortedInts"> A sorted array of non negative integers.
+ /// </param>
+ public SortedVIntList(params int[] sortedInts):this(sortedInts, sortedInts.Length)
+ {
+ }
+
+ /// <summary> Create a SortedVIntList from an array of integers.</summary>
+ /// <param name="sortedInts"> An array of sorted non negative integers.
+ /// </param>
+ /// <param name="inputSize"> The number of integers to be used from the array.
+ /// </param>
+ public SortedVIntList(int[] sortedInts, int inputSize)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ for (int i = 0; i < inputSize; i++)
+ {
+ builder.AddInt(sortedInts[i]);
+ }
+ builder.Done();
+ }
+
+ /// <summary> Create a SortedVIntList from a BitSet.</summary>
+ /// <param name="bits"> A bit set representing a set of integers.
+ /// </param>
+ public SortedVIntList(System.Collections.BitArray bits)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ int nextInt = BitSetSupport.NextSetBit(bits, 0);
+ while (nextInt != - 1)
+ {
+ builder.AddInt(nextInt);
+ nextInt = BitSetSupport.NextSetBit(bits, nextInt + 1);
+ }
+ builder.Done();
+ }
+
+ /// <summary> Create a SortedVIntList from an OpenBitSet.</summary>
+ /// <param name="bits"> A bit set representing a set of integers.
+ /// </param>
+ public SortedVIntList(OpenBitSet bits)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ int nextInt = bits.NextSetBit(0);
+ while (nextInt != - 1)
+ {
+ builder.AddInt(nextInt);
+ nextInt = bits.NextSetBit(nextInt + 1);
+ }
+ builder.Done();
+ }
+
+ /// <summary> Create a SortedVIntList.</summary>
+ /// <param name="docIdSetIterator"> An iterator providing document numbers as a set of integers.
+ /// This DocIdSetIterator is iterated completely when this constructor
+ /// is called and it must provide the integers in non
+ /// decreasing order.
+ /// </param>
+ public SortedVIntList(DocIdSetIterator docIdSetIterator)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ int doc;
+ while ((doc = docIdSetIterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ builder.AddInt(doc);
+ }
+ builder.Done();
+ }
+
+
+ private class SortedVIntListBuilder
+ {
+ private void InitBlock(SortedVIntList enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SortedVIntList enclosingInstance;
+ public SortedVIntList Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private int lastInt = 0;
+
+ internal SortedVIntListBuilder(SortedVIntList enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ Enclosing_Instance.InitBytes();
+ lastInt = 0;
+ }
+
+ internal virtual void AddInt(int nextInt)
+ {
+ int diff = nextInt - lastInt;
+ if (diff < 0)
+ {
+ throw new System.ArgumentException("Input not sorted or first element negative.");
+ }
+
+ if ((Enclosing_Instance.lastBytePos + Enclosing_Instance.MAX_BYTES_PER_INT) > Enclosing_Instance.bytes.Length)
+ {
+ // biggest possible int does not fit
+ Enclosing_Instance.ResizeBytes((Enclosing_Instance.bytes.Length * 2) + Enclosing_Instance.MAX_BYTES_PER_INT);
+ }
+
+ // See Lucene.Net.Store.IndexOutput.writeVInt()
+ while ((diff & ~ Lucene.Net.Util.SortedVIntList.VB1) != 0)
+ {
+ // The high bit of the next byte needs to be set.
+ Enclosing_Instance.bytes[Enclosing_Instance.lastBytePos++] = (sbyte) ((diff & Lucene.Net.Util.SortedVIntList.VB1) | ~ Lucene.Net.Util.SortedVIntList.VB1);
+ diff = Number.URShift(diff, Lucene.Net.Util.SortedVIntList.BIT_SHIFT);
+ }
+ Enclosing_Instance.bytes[Enclosing_Instance.lastBytePos++] = (sbyte) diff; // Last byte, high bit not set.
+ Enclosing_Instance.size++;
+ lastInt = nextInt;
+ }
+
+ internal virtual void Done()
+ {
+ Enclosing_Instance.ResizeBytes(Enclosing_Instance.lastBytePos);
+ }
+ }
+
+
+ private void InitBytes()
+ {
+ size = 0;
+ bytes = new sbyte[128]; // initial byte size
+ lastBytePos = 0;
+ }
+
+ private void ResizeBytes(int newSize)
+ {
+ if (newSize != bytes.Length)
+ {
+ sbyte[] newBytes = new sbyte[newSize];
+ Array.Copy(bytes, 0, newBytes, 0, lastBytePos);
+ bytes = newBytes;
+ }
+ }
+
+ private const int VB1 = 0x7F;
+ private const int BIT_SHIFT = 7;
+ private int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
+
+ /// <value> The total number of sorted integers. </value>
+ public virtual int Size
+ {
+ get { return size; }
+ }
+
+ /// <value> The size of the byte array storing the compressed sorted integers. </value>
+ public virtual int ByteSize
+ {
+ get { return bytes.Length; }
+ }
+
+ /// <summary>This DocIdSet implementation is cacheable. </summary>
+ public override bool IsCacheable
+ {
+ get { return true; }
+ }
+
+ /// <returns> An iterator over the sorted integers.
+ /// </returns>
+ public override DocIdSetIterator Iterator()
+ {
+ return new AnonymousClassDocIdSetIterator(this);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/SorterTemplate.cs b/src/core/Util/SorterTemplate.cs
new file mode 100644
index 0000000..a44f229
--- /dev/null
+++ b/src/core/Util/SorterTemplate.cs
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Borrowed from Cglib. Allows custom swap so that two arrays can be sorted
+ /// at the same time.
+ /// </summary>
+ public abstract class SorterTemplate
+ {
+ private const int MERGESORT_THRESHOLD = 12;
+ private const int QUICKSORT_THRESHOLD = 7;
+
+ abstract protected internal void Swap(int i, int j);
+ abstract protected internal int Compare(int i, int j);
+
+ public virtual void QuickSort(int lo, int hi)
+ {
+ QuickSortHelper(lo, hi);
+ InsertionSort(lo, hi);
+ }
+
+ private void QuickSortHelper(int lo, int hi)
+ {
+ for (; ; )
+ {
+ int diff = hi - lo;
+ if (diff <= QUICKSORT_THRESHOLD)
+ {
+ break;
+ }
+ int i = (hi + lo) / 2;
+ if (Compare(lo, i) > 0)
+ {
+ Swap(lo, i);
+ }
+ if (Compare(lo, hi) > 0)
+ {
+ Swap(lo, hi);
+ }
+ if (Compare(i, hi) > 0)
+ {
+ Swap(i, hi);
+ }
+ int j = hi - 1;
+ Swap(i, j);
+ i = lo;
+ int v = j;
+ for (; ; )
+ {
+ while (Compare(++i, v) < 0)
+ {
+ /* nothing */ ;
+ }
+ while (Compare(--j, v) > 0)
+ {
+ /* nothing */ ;
+ }
+ if (j < i)
+ {
+ break;
+ }
+ Swap(i, j);
+ }
+ Swap(i, hi - 1);
+ if (j - lo <= hi - i + 1)
+ {
+ QuickSortHelper(lo, j);
+ lo = i + 1;
+ }
+ else
+ {
+ QuickSortHelper(i + 1, hi);
+ hi = j;
+ }
+ }
+ }
+
+ private void InsertionSort(int lo, int hi)
+ {
+ for (int i = lo + 1; i <= hi; i++)
+ {
+ for (int j = i; j > lo; j--)
+ {
+ if (Compare(j - 1, j) > 0)
+ {
+ Swap(j - 1, j);
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+ }
+
+ protected internal virtual void MergeSort(int lo, int hi)
+ {
+ int diff = hi - lo;
+ if (diff <= MERGESORT_THRESHOLD)
+ {
+ InsertionSort(lo, hi);
+ return ;
+ }
+ int mid = lo + diff / 2;
+ MergeSort(lo, mid);
+ MergeSort(mid, hi);
+ Merge(lo, mid, hi, mid - lo, hi - mid);
+ }
+
+ private void Merge(int lo, int pivot, int hi, int len1, int len2)
+ {
+ if (len1 == 0 || len2 == 0)
+ {
+ return ;
+ }
+ if (len1 + len2 == 2)
+ {
+ if (Compare(pivot, lo) < 0)
+ {
+ Swap(pivot, lo);
+ }
+ return ;
+ }
+ int first_cut, second_cut;
+ int len11, len22;
+ if (len1 > len2)
+ {
+ len11 = len1 / 2;
+ first_cut = lo + len11;
+ second_cut = Lower(pivot, hi, first_cut);
+ len22 = second_cut - pivot;
+ }
+ else
+ {
+ len22 = len2 / 2;
+ second_cut = pivot + len22;
+ first_cut = Upper(lo, pivot, second_cut);
+ len11 = first_cut - lo;
+ }
+ Rotate(first_cut, pivot, second_cut);
+ int new_mid = first_cut + len22;
+ Merge(lo, first_cut, new_mid, len11, len22);
+ Merge(new_mid, second_cut, hi, len1 - len11, len2 - len22);
+ }
+
+ private void Rotate(int lo, int mid, int hi)
+ {
+ int lot = lo;
+ int hit = mid - 1;
+ while (lot < hit)
+ {
+ Swap(lot++, hit--);
+ }
+ lot = mid; hit = hi - 1;
+ while (lot < hit)
+ {
+ Swap(lot++, hit--);
+ }
+ lot = lo; hit = hi - 1;
+ while (lot < hit)
+ {
+ Swap(lot++, hit--);
+ }
+ }
+
+ private int Lower(int lo, int hi, int val)
+ {
+ int len = hi - lo;
+ while (len > 0)
+ {
+ int half = len / 2;
+ int mid = lo + half;
+ if (Compare(mid, val) < 0)
+ {
+ lo = mid + 1;
+ len = len - half - 1;
+ }
+ else
+ {
+ len = half;
+ }
+ }
+ return lo;
+ }
+
+ private int Upper(int lo, int hi, int val)
+ {
+ int len = hi - lo;
+ while (len > 0)
+ {
+ int half = len / 2;
+ int mid = lo + half;
+ if (Compare(val, mid) < 0)
+ {
+ len = half;
+ }
+ else
+ {
+ lo = mid + 1;
+ len = len - half - 1;
+ }
+ }
+ return lo;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/StringHelper.cs b/src/core/Util/StringHelper.cs
new file mode 100644
index 0000000..3851087
--- /dev/null
+++ b/src/core/Util/StringHelper.cs
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+
+ /// <summary> Methods for manipulating strings.</summary>
+ public abstract class StringHelper
+ {
+ /// <summary> Expert:
+ /// The StringInterner implementation used by Lucene.
+ /// This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used.
+ /// </summary>
+ public static StringInterner interner = new SimpleStringInterner(1024, 8);
+
+ /// <summary>Return the same string object for all equal strings </summary>
+ public static System.String Intern(System.String s)
+ {
+ return interner.Intern(s);
+ }
+
+ /// <summary> Compares two byte[] arrays, element by element, and returns the
+ /// number of elements common to both arrays.
+ ///
+ /// </summary>
+ /// <param name="bytes1">The first byte[] to compare
+ /// </param>
+ /// <param name="len1"></param>
+ /// <param name="bytes2">The second byte[] to compare
+ /// </param>
+ /// <param name="len2"></param>
+ /// <returns> The number of common elements.
+ /// </returns>
+ public static int BytesDifference(byte[] bytes1, int len1, byte[] bytes2, int len2)
+ {
+ int len = len1 < len2?len1:len2;
+ for (int i = 0; i < len; i++)
+ if (bytes1[i] != bytes2[i])
+ return i;
+ return len;
+ }
+
+ /// <summary> Compares two strings, character by character, and returns the
+ /// first position where the two strings differ from one another.
+ ///
+ /// </summary>
+ /// <param name="s1">The first string to compare
+ /// </param>
+ /// <param name="s2">The second string to compare
+ /// </param>
+ /// <returns> The first position where the two strings differ.
+ /// </returns>
+ public static int StringDifference(System.String s1, System.String s2)
+ {
+ int len1 = s1.Length;
+ int len2 = s2.Length;
+ int len = len1 < len2?len1:len2;
+ for (int i = 0; i < len; i++)
+ {
+ if (s1[i] != s2[i])
+ {
+ return i;
+ }
+ }
+ return len;
+ }
+
+ private StringHelper()
+ {
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/StringInterner.cs b/src/core/Util/StringInterner.cs
new file mode 100644
index 0000000..b9efe5a
--- /dev/null
+++ b/src/core/Util/StringInterner.cs
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Subclasses of StringInterner are required to
+ /// return the same single String object for all equal strings.
+ /// Depending on the implementation, this may not be
+ /// the same object returned as String.intern().
+ ///
+ /// This StringInterner base class simply delegates to String.intern().
+ /// </summary>
+ public class StringInterner
+ {
+ /// <summary>Returns a single object instance for each equal string. </summary>
+ public virtual System.String Intern(System.String s)
+ {
+ return String.Intern(s);
+ }
+
+ /// <summary>Returns a single object instance for each equal string. </summary>
+ public virtual System.String Intern(char[] arr, int offset, int len)
+ {
+ return Intern(new System.String(arr, offset, len));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/ToStringUtils.cs b/src/core/Util/ToStringUtils.cs
new file mode 100644
index 0000000..c1ba665
--- /dev/null
+++ b/src/core/Util/ToStringUtils.cs
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+ /// <summary> Helper methods to ease implementing <see cref="Object.ToString()" />.</summary>
+ public class ToStringUtils
+ {
+ /// <summary>for printing boost only if not 1.0 </summary>
+ public static System.String Boost(float boost)
+ {
+ if (boost != 1.0f)
+ {
+ float boostAsLong = (long) boost;
+ if (boostAsLong == boost)
+ return "^" + boost.ToString(".0").Replace(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, ".");
+ return "^" + boost.ToString().Replace(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, ".");
+ }
+ else
+ return "";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/UnicodeUtil.cs b/src/core/Util/UnicodeUtil.cs
new file mode 100644
index 0000000..9a34992
--- /dev/null
+++ b/src/core/Util/UnicodeUtil.cs
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+
+
+ /*
+ * Some of this code came from the excellent Unicode
+ * conversion examples from:
+ *
+ * http://www.unicode.org/Public/PROGRAMS/CVTUTF
+ *
+ * Full Copyright for that code follows:*/
+
+ /*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+ /// <summary> Class to encode java's UTF16 char[] into UTF8 byte[]
+ /// without always allocating a new byte[] as
+ /// String.getBytes("UTF-8") does.
+ ///
+ /// <p/><b>WARNING</b>: This API is a new and experimental and
+ /// may suddenly change. <p/>
+ /// </summary>
+
+ public static class UnicodeUtil
+ {
+
+ public const int UNI_SUR_HIGH_START = 0xD800;
+ public const int UNI_SUR_HIGH_END = 0xDBFF;
+ public const int UNI_SUR_LOW_START = 0xDC00;
+ public const int UNI_SUR_LOW_END = 0xDFFF;
+ public const int UNI_REPLACEMENT_CHAR = 0xFFFD;
+
+ private const long UNI_MAX_BMP = 0x0000FFFF;
+
+ private const int HALF_BASE = 0x0010000;
+ private const long HALF_SHIFT = 10;
+ private const long HALF_MASK = 0x3FFL;
+
+ public sealed class UTF8Result
+ {
+ public byte[] result = new byte[10];
+ public int length;
+
+ public void SetLength(int newLength)
+ {
+ if (result.Length < newLength)
+ {
+ byte[] newArray = new byte[(int) (1.5 * newLength)];
+ Array.Copy(result, 0, newArray, 0, length);
+ result = newArray;
+ }
+ length = newLength;
+ }
+ }
+
+ public sealed class UTF16Result
+ {
+ public char[] result = new char[10];
+ public int[] offsets = new int[10];
+ public int length;
+
+ public void SetLength(int newLength)
+ {
+ if (result.Length < newLength)
+ {
+ char[] newArray = new char[(int) (1.5 * newLength)];
+ Array.Copy(result, 0, newArray, 0, length);
+ result = newArray;
+ }
+ length = newLength;
+ }
+
+ public void CopyText(UTF16Result other)
+ {
+ SetLength(other.length);
+ Array.Copy(other.result, 0, result, 0, length);
+ }
+ }
+
+ /// <summary>Encode characters from a char[] source, starting at
+ /// offset and stopping when the character 0xffff is seen.
+ /// Returns the number of bytes written to bytesOut.
+ /// </summary>
+ public static void UTF16toUTF8(char[] source, int offset, UTF8Result result)
+ {
+
+ int upto = 0;
+ int i = offset;
+ byte[] out_Renamed = result.result;
+
+ while (true)
+ {
+
+ int code = (int) source[i++];
+
+ if (upto + 4 > out_Renamed.Length)
+ {
+ byte[] newOut = new byte[2 * out_Renamed.Length];
+ System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+ Array.Copy(out_Renamed, 0, newOut, 0, upto);
+ result.result = out_Renamed = newOut;
+ }
+ if (code < 0x80)
+ out_Renamed[upto++] = (byte) code;
+ else if (code < 0x800)
+ {
+ out_Renamed[upto++] = (byte) (0xC0 | (code >> 6));
+ out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
+ }
+ else if (code < 0xD800 || code > 0xDFFF)
+ {
+ if (code == 0xffff)
+ // END
+ break;
+ out_Renamed[upto++] = (byte) (0xE0 | (code >> 12));
+ out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
+ }
+ else
+ {
+ // surrogate pair
+ // confirm valid high surrogate
+ if (code < 0xDC00 && source[i] != 0xffff)
+ {
+ int utf32 = (int) source[i];
+ // confirm valid low surrogate and write pair
+ if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+ {
+ utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+ i++;
+ out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18));
+ out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F));
+ continue;
+ }
+ }
+ // replace unpaired surrogate or out-of-order low surrogate
+ // with substitution character
+ out_Renamed[upto++] = (byte) (0xEF);
+ out_Renamed[upto++] = (byte) (0xBF);
+ out_Renamed[upto++] = (byte) (0xBD);
+ }
+ }
+ //assert matches(source, offset, i-offset-1, out, upto);
+ result.length = upto;
+ }
+
+ /// <summary>Encode characters from a char[] source, starting at
+ /// offset for length chars. Returns the number of bytes
+ /// written to bytesOut.
+ /// </summary>
+ public static void UTF16toUTF8(char[] source, int offset, int length, UTF8Result result)
+ {
+
+ int upto = 0;
+ int i = offset;
+ int end = offset + length;
+ byte[] out_Renamed = result.result;
+
+ while (i < end)
+ {
+
+ int code = (int) source[i++];
+
+ if (upto + 4 > out_Renamed.Length)
+ {
+ byte[] newOut = new byte[2 * out_Renamed.Length];
+ System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+ Array.Copy(out_Renamed, 0, newOut, 0, upto);
+ result.result = out_Renamed = newOut;
+ }
+ if (code < 0x80)
+ out_Renamed[upto++] = (byte) code;
+ else if (code < 0x800)
+ {
+ out_Renamed[upto++] = (byte) (0xC0 | (code >> 6));
+ out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
+ }
+ else if (code < 0xD800 || code > 0xDFFF)
+ {
+ out_Renamed[upto++] = (byte) (0xE0 | (code >> 12));
+ out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
+ }
+ else
+ {
+ // surrogate pair
+ // confirm valid high surrogate
+ if (code < 0xDC00 && i < end && source[i] != 0xffff)
+ {
+ int utf32 = (int) source[i];
+ // confirm valid low surrogate and write pair
+ if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+ {
+ utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+ i++;
+ out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18));
+ out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F));
+ continue;
+ }
+ }
+ // replace unpaired surrogate or out-of-order low surrogate
+ // with substitution character
+ out_Renamed[upto++] = (byte) (0xEF);
+ out_Renamed[upto++] = (byte) (0xBF);
+ out_Renamed[upto++] = (byte) (0xBD);
+ }
+ }
+ //assert matches(source, offset, length, out, upto);
+ result.length = upto;
+ }
+
+ /// <summary>Encode characters from this String, starting at offset
+ /// for length characters. Returns the number of bytes
+ /// written to bytesOut.
+ /// </summary>
+ public static void UTF16toUTF8(System.String s, int offset, int length, UTF8Result result)
+ {
+ int end = offset + length;
+
+ byte[] out_Renamed = result.result;
+
+ int upto = 0;
+ for (int i = offset; i < end; i++)
+ {
+ int code = (int) s[i];
+
+ if (upto + 4 > out_Renamed.Length)
+ {
+ byte[] newOut = new byte[2 * out_Renamed.Length];
+ System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+ Array.Copy(out_Renamed, 0, newOut, 0, upto);
+ result.result = out_Renamed = newOut;
+ }
+ if (code < 0x80)
+ out_Renamed[upto++] = (byte) code;
+ else if (code < 0x800)
+ {
+ out_Renamed[upto++] = (byte) (0xC0 | (code >> 6));
+ out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
+ }
+ else if (code < 0xD800 || code > 0xDFFF)
+ {
+ out_Renamed[upto++] = (byte) (0xE0 | (code >> 12));
+ out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
+ }
+ else
+ {
+ // surrogate pair
+ // confirm valid high surrogate
+ if (code < 0xDC00 && (i < end - 1))
+ {
+ int utf32 = (int) s[i + 1];
+ // confirm valid low surrogate and write pair
+ if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+ {
+ utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+ i++;
+ out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18));
+ out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F));
+ continue;
+ }
+ }
+ // replace unpaired surrogate or out-of-order low surrogate
+ // with substitution character
+ out_Renamed[upto++] = (byte) (0xEF);
+ out_Renamed[upto++] = (byte) (0xBF);
+ out_Renamed[upto++] = (byte) (0xBD);
+ }
+ }
+ //assert matches(s, offset, length, out, upto);
+ result.length = upto;
+ }
+
+ /// <summary>Convert UTF8 bytes into UTF16 characters. If offset
+ /// is non-zero, conversion starts at that starting point
+ /// in utf8, re-using the results from the previous call
+ /// up until offset.
+ /// </summary>
+ public static void UTF8toUTF16(byte[] utf8, int offset, int length, UTF16Result result)
+ {
+
+ int end = offset + length;
+ char[] out_Renamed = result.result;
+ if (result.offsets.Length <= end)
+ {
+ int[] newOffsets = new int[2 * end];
+ Array.Copy(result.offsets, 0, newOffsets, 0, result.offsets.Length);
+ result.offsets = newOffsets;
+ }
+ int[] offsets = result.offsets;
+
+ // If incremental decoding fell in the middle of a
+ // single unicode character, rollback to its start:
+ int upto = offset;
+ while (offsets[upto] == - 1)
+ upto--;
+
+ int outUpto = offsets[upto];
+
+ // Pre-allocate for worst case 1-for-1
+ if (outUpto + length >= out_Renamed.Length)
+ {
+ char[] newOut = new char[2 * (outUpto + length)];
+ Array.Copy(out_Renamed, 0, newOut, 0, outUpto);
+ result.result = out_Renamed = newOut;
+ }
+
+ while (upto < end)
+ {
+
+ int b = utf8[upto] & 0xff;
+ int ch;
+
+ offsets[upto++] = outUpto;
+
+ if (b < 0xc0)
+ {
+ System.Diagnostics.Debug.Assert(b < 0x80);
+ ch = b;
+ }
+ else if (b < 0xe0)
+ {
+ ch = ((b & 0x1f) << 6) + (utf8[upto] & 0x3f);
+ offsets[upto++] = - 1;
+ }
+ else if (b < 0xf0)
+ {
+ ch = ((b & 0xf) << 12) + ((utf8[upto] & 0x3f) << 6) + (utf8[upto + 1] & 0x3f);
+ offsets[upto++] = - 1;
+ offsets[upto++] = - 1;
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert(b < 0xf8);
+ ch = ((b & 0x7) << 18) + ((utf8[upto] & 0x3f) << 12) + ((utf8[upto + 1] & 0x3f) << 6) + (utf8[upto + 2] & 0x3f);
+ offsets[upto++] = - 1;
+ offsets[upto++] = - 1;
+ offsets[upto++] = - 1;
+ }
+
+ if (ch <= UNI_MAX_BMP)
+ {
+ // target is a character <= 0xFFFF
+ out_Renamed[outUpto++] = (char) ch;
+ }
+ else
+ {
+ // target is a character in range 0xFFFF - 0x10FFFF
+ int chHalf = ch - HALF_BASE;
+ out_Renamed[outUpto++] = (char) ((chHalf >> (int) HALF_SHIFT) + UNI_SUR_HIGH_START);
+ out_Renamed[outUpto++] = (char) ((chHalf & HALF_MASK) + UNI_SUR_LOW_START);
+ }
+ }
+
+ offsets[upto] = outUpto;
+ result.length = outUpto;
+ }
+
+ // Only called from assert
+ /*
+ private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
+ try {
+ String s1 = new String(source, offset, length);
+ String s2 = new String(result, 0, upto, "UTF-8");
+ if (!s1.equals(s2)) {
+ //System.out.println("DIFF: s1 len=" + s1.length());
+ //for(int i=0;i<s1.length();i++)
+ // System.out.println(" " + i + ": " + (int) s1.charAt(i));
+ //System.out.println("s2 len=" + s2.length());
+ //for(int i=0;i<s2.length();i++)
+ // System.out.println(" " + i + ": " + (int) s2.charAt(i));
+
+ // If the input string was invalid, then the
+ // difference is OK
+ if (!validUTF16String(s1))
+ return true;
+
+ return false;
+ }
+ return s1.equals(s2);
+ } catch (UnsupportedEncodingException uee) {
+ return false;
+ }
+ }
+
+ // Only called from assert
+ private static boolean matches(String source, int offset, int length, byte[] result, int upto) {
+ try {
+ String s1 = source.substring(offset, offset+length);
+ String s2 = new String(result, 0, upto, "UTF-8");
+ if (!s1.equals(s2)) {
+ // Allow a difference if s1 is not valid UTF-16
+
+ //System.out.println("DIFF: s1 len=" + s1.length());
+ //for(int i=0;i<s1.length();i++)
+ // System.out.println(" " + i + ": " + (int) s1.charAt(i));
+ //System.out.println(" s2 len=" + s2.length());
+ //for(int i=0;i<s2.length();i++)
+ // System.out.println(" " + i + ": " + (int) s2.charAt(i));
+
+ // If the input string was invalid, then the
+ // difference is OK
+ if (!validUTF16String(s1))
+ return true;
+
+ return false;
+ }
+ return s1.equals(s2);
+ } catch (UnsupportedEncodingException uee) {
+ return false;
+ }
+ }
+
+ public static final boolean validUTF16String(String s) {
+ final int size = s.length();
+ for(int i=0;i<size;i++) {
+ char ch = s.charAt(i);
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ if (i < size-1) {
+ i++;
+ char nextCH = s.charAt(i);
+ if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
+ // Valid surrogate pair
+ } else
+ // Unmatched hight surrogate
+ return false;
+ } else
+ // Unmatched hight surrogate
+ return false;
+ } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
+ // Unmatched low surrogate
+ return false;
+ }
+
+ return true;
+ }
+
+ public static final boolean validUTF16String(char[] s, int size) {
+ for(int i=0;i<size;i++) {
+ char ch = s[i];
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ if (i < size-1) {
+ i++;
+ char nextCH = s[i];
+ if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
+ // Valid surrogate pair
+ } else
+ return false;
+ } else
+ return false;
+ } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
+ // Unmatched low surrogate
+ return false;
+ }
+
+ return true;
+ }
+ */
+ }
+} \ No newline at end of file
diff --git a/src/core/Util/Version.cs b/src/core/Util/Version.cs
new file mode 100644
index 0000000..abc8f20
--- /dev/null
+++ b/src/core/Util/Version.cs
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Util
+{
+ /// <summary> Use by certain classes to match version compatibility
+ /// across releases of Lucene.
+ /// <p/>
+ /// <b>WARNING</b>: When changing the version parameter
+ /// that you supply to components in Lucene, do not simply
+ /// change the version at search-time, but instead also adjust
+ /// your indexing code to match, and re-index.
+ /// </summary>
+ public enum Version
+ {
+ /// <summary>Match settings and bugs in Lucene's 2.0 release.</summary>
+ LUCENE_20,
+
+ /// <summary>Match settings and bugs in Lucene's 2.1 release. </summary>
+ LUCENE_21,
+
+ /// <summary>Match settings and bugs in Lucene's 2.2 release. </summary>
+ LUCENE_22,
+
+ /// <summary>Match settings and bugs in Lucene's 2.3 release.</summary>
+ LUCENE_23,
+
+ /// <summary>Match settings and bugs in Lucene's 2.4 release.</summary>
+ LUCENE_24,
+
+ /// <summary>Match settings and bugs in Lucene's 2.9 release.</summary>
+ LUCENE_29,
+
+ /// <summary>
+ /// Match settings and bugs in Lucene's 3.0 release.
+ /// <para>
+ /// Use this to get the latest and greatest settings, bug fixes,
+ /// etc, for Lucene.
+ /// </para>
+ /// </summary>
+ LUCENE_30,
+
+ // NOTE: Add new constants for later versions **here** to respect order!
+
+ /// <summary>
+ /// <p/><b>WARNING</b>: if you use this setting, and then
+ /// upgrade to a newer release of Lucene, sizable changes
+ /// may happen. If precise back compatibility is important
+ /// then you should instead explicitly specify an actual
+ /// version.
+ /// If you use this constant then you may need to
+ /// <b>re-index all of your documents</b> when upgrading
+ /// Lucene, as the way text is indexed may have changed.
+ /// Additionally, you may need to <b>re-test your entire
+ /// application</b> to ensure it behaves as expected, as
+ /// some defaults may have changed and may break functionality
+ /// in your application.
+ /// </summary>
+ [Obsolete("Use an actual version instead.")]
+ LUCENE_CURRENT,
+ }
+
+ public static class VersionEnumExtensions
+ {
+ public static bool OnOrAfter(this Version first, Version other)
+ {
+ return first.CompareTo(other) >= 0;
+ }
+ }
+} \ No newline at end of file