Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/Lucene.Net.Light.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/Index/CompoundFileWriter.cs')
-rw-r--r--src/core/Index/CompoundFileWriter.cs275
1 files changed, 275 insertions, 0 deletions
diff --git a/src/core/Index/CompoundFileWriter.cs b/src/core/Index/CompoundFileWriter.cs
new file mode 100644
index 0000000..e2905e1
--- /dev/null
+++ b/src/core/Index/CompoundFileWriter.cs
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// <summary> Combines multiple files into a single compound file.
+ /// The file format:<br/>
+ /// <list type="bullet">
+ /// <item>VInt fileCount</item>
+ /// <item>{Directory}
+ /// fileCount entries with the following structure:</item>
+ /// <list type="bullet">
+ /// <item>long dataOffset</item>
+ /// <item>String fileName</item>
+ /// </list>
+ /// <item>{File Data}
+ /// fileCount entries with the raw data of the corresponding file</item>
+ /// </list>
+ ///
+ /// The fileCount integer indicates how many files are contained in this compound
+ /// file. The {directory} that follows has that many entries. Each directory entry
+ /// contains a long pointer to the start of this file's data section, and a String
+ /// with that file's name.
+ /// </summary>
+ public sealed class CompoundFileWriter : IDisposable
+ {
+
+ private sealed class FileEntry
+ {
+ /// <summary>source file </summary>
+ internal System.String file;
+
+ /// <summary>temporary holder for the start of directory entry for this file </summary>
+ internal long directoryOffset;
+
+ /// <summary>temporary holder for the start of this file's data section </summary>
+ internal long dataOffset;
+ }
+
+
+ private readonly Directory directory;
+ private readonly String fileName;
+ private readonly HashSet<string> ids;
+ private readonly LinkedList<FileEntry> entries;
+ private bool merged = false;
+ private readonly SegmentMerger.CheckAbort checkAbort;
+
+ /// <summary>Create the compound stream in the specified file. The file name is the
+ /// entire name (no extensions are added).
+ /// </summary>
+ /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws>
+ public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null)
+ {
+ }
+
+ internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort)
+ {
+ if (dir == null)
+ throw new ArgumentNullException("dir");
+ if (name == null)
+ throw new ArgumentNullException("name");
+ this.checkAbort = checkAbort;
+ directory = dir;
+ fileName = name;
+ ids = new HashSet<string>();
+ entries = new LinkedList<FileEntry>();
+ }
+
+ /// <summary>Returns the directory of the compound file. </summary>
+ public Directory Directory
+ {
+ get { return directory; }
+ }
+
+ /// <summary>Returns the name of the compound file. </summary>
+ public string Name
+ {
+ get { return fileName; }
+ }
+
+ /// <summary>Add a source stream. <c>file</c> is the string by which the
+ /// sub-stream will be known in the compound stream.
+ ///
+ /// </summary>
+ /// <throws> IllegalStateException if this writer is closed </throws>
+ /// <throws> NullPointerException if <c>file</c> is null </throws>
+ /// <throws> IllegalArgumentException if a file with the same name </throws>
+ /// <summary> has been added already
+ /// </summary>
+ public void AddFile(String file)
+ {
+ if (merged)
+ throw new InvalidOperationException("Can't add extensions after merge has been called");
+
+ if (file == null)
+ throw new ArgumentNullException("file");
+
+ try
+ {
+ ids.Add(file);
+ }
+ catch (Exception)
+ {
+ throw new ArgumentException("File " + file + " already added");
+ }
+
+ var entry = new FileEntry {file = file};
+ entries.AddLast(entry);
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// <summary>Merge files with the extensions added up to now.
+ /// All files with these extensions are combined sequentially into the
+ /// compound stream. After successful merge, the source files
+ /// are deleted.
+ /// </summary>
+ /// <throws> IllegalStateException if close() had been called before or </throws>
+ /// <summary> if no file has been added to this object
+ /// </summary>
+ public void Dispose()
+ {
+ // Extract into protected method if class ever becomes unsealed
+
+ // TODO: Dispose shouldn't throw exceptions!
+ if (merged)
+ throw new SystemException("Merge already performed");
+
+ if ((entries.Count == 0))
+ throw new SystemException("No entries to merge have been defined");
+
+ merged = true;
+
+ // open the compound stream
+ IndexOutput os = null;
+ try
+ {
+ os = directory.CreateOutput(fileName);
+
+ // Write the number of entries
+ os.WriteVInt(entries.Count);
+
+ // Write the directory with all offsets at 0.
+ // Remember the positions of directory entries so that we can
+ // adjust the offsets later
+ long totalSize = 0;
+ foreach (FileEntry fe in entries)
+ {
+ fe.directoryOffset = os.FilePointer;
+ os.WriteLong(0); // for now
+ os.WriteString(fe.file);
+ totalSize += directory.FileLength(fe.file);
+ }
+
+ // Pre-allocate size of file as optimization --
+ // this can potentially help IO performance as
+ // we write the file and also later during
+ // searching. It also uncovers a disk-full
+ // situation earlier and hopefully without
+ // actually filling disk to 100%:
+ long finalLength = totalSize + os.FilePointer;
+ os.SetLength(finalLength);
+
+ // Open the files and copy their data into the stream.
+ // Remember the locations of each file's data section.
+ var buffer = new byte[16384];
+ foreach (FileEntry fe in entries)
+ {
+ fe.dataOffset = os.FilePointer;
+ CopyFile(fe, os, buffer);
+ }
+
+ // Write the data offsets into the directory of the compound stream
+ foreach (FileEntry fe in entries)
+ {
+ os.Seek(fe.directoryOffset);
+ os.WriteLong(fe.dataOffset);
+ }
+
+ System.Diagnostics.Debug.Assert(finalLength == os.Length);
+
+ // Close the output stream. Set the os to null before trying to
+ // close so that if an exception occurs during the close, the
+ // finally clause below will not attempt to close the stream
+ // the second time.
+ IndexOutput tmp = os;
+ os = null;
+ tmp.Close();
+ }
+ finally
+ {
+ if (os != null)
+ try
+ {
+ os.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ }
+ }
+
+
+ /// <summary>Copy the contents of the file with specified extension into the
+ /// provided output stream. Use the provided buffer for moving data
+ /// to reduce memory allocation.
+ /// </summary>
+ private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
+ {
+ IndexInput isRenamed = null;
+ try
+ {
+ long startPtr = os.FilePointer;
+
+ isRenamed = directory.OpenInput(source.file);
+ long length = isRenamed.Length();
+ long remainder = length;
+ int chunk = buffer.Length;
+
+ while (remainder > 0)
+ {
+ var len = (int) Math.Min(chunk, remainder);
+ isRenamed.ReadBytes(buffer, 0, len, false);
+ os.WriteBytes(buffer, len);
+ remainder -= len;
+ if (checkAbort != null)
+ // Roughly every 2 MB we will check if
+ // it's time to abort
+ checkAbort.Work(80);
+ }
+
+ // Verify that remainder is 0
+ if (remainder != 0)
+ throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
+
+ // Verify that the output length diff is equal to original file
+ long endPtr = os.FilePointer;
+ long diff = endPtr - startPtr;
+ if (diff != length)
+ throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
+ }
+ finally
+ {
+ if (isRenamed != null)
+ isRenamed.Close();
+ }
+ }
+ }
+} \ No newline at end of file