diff options
Diffstat (limited to 'src/core/Index/CompoundFileWriter.cs')
-rw-r--r-- | src/core/Index/CompoundFileWriter.cs | 275 |
1 files changed, 275 insertions, 0 deletions
diff --git a/src/core/Index/CompoundFileWriter.cs b/src/core/Index/CompoundFileWriter.cs new file mode 100644 index 0000000..e2905e1 --- /dev/null +++ b/src/core/Index/CompoundFileWriter.cs @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Index +{ + + + /// <summary> Combines multiple files into a single compound file. + /// The file format:<br/> + /// <list type="bullet"> + /// <item>VInt fileCount</item> + /// <item>{Directory} + /// fileCount entries with the following structure:</item> + /// <list type="bullet"> + /// <item>long dataOffset</item> + /// <item>String fileName</item> + /// </list> + /// <item>{File Data} + /// fileCount entries with the raw data of the corresponding file</item> + /// </list> + /// + /// The fileCount integer indicates how many files are contained in this compound + /// file. The {directory} that follows has that many entries. Each directory entry + /// contains a long pointer to the start of this file's data section, and a String + /// with that file's name. + /// </summary> + public sealed class CompoundFileWriter : IDisposable + { + + private sealed class FileEntry + { + /// <summary>source file </summary> + internal System.String file; + + /// <summary>temporary holder for the start of directory entry for this file </summary> + internal long directoryOffset; + + /// <summary>temporary holder for the start of this file's data section </summary> + internal long dataOffset; + } + + + private readonly Directory directory; + private readonly String fileName; + private readonly HashSet<string> ids; + private readonly LinkedList<FileEntry> entries; + private bool merged = false; + private readonly SegmentMerger.CheckAbort checkAbort; + + /// <summary>Create the compound stream in the specified file. The file name is the + /// entire name (no extensions are added). + /// </summary> + /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws> + public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null) + { + } + + internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort) + { + if (dir == null) + throw new ArgumentNullException("dir"); + if (name == null) + throw new ArgumentNullException("name"); + this.checkAbort = checkAbort; + directory = dir; + fileName = name; + ids = new HashSet<string>(); + entries = new LinkedList<FileEntry>(); + } + + /// <summary>Returns the directory of the compound file. </summary> + public Directory Directory + { + get { return directory; } + } + + /// <summary>Returns the name of the compound file. </summary> + public string Name + { + get { return fileName; } + } + + /// <summary>Add a source stream. <c>file</c> is the string by which the + /// sub-stream will be known in the compound stream. + /// + /// </summary> + /// <throws> IllegalStateException if this writer is closed </throws> + /// <throws> NullPointerException if <c>file</c> is null </throws> + /// <throws> IllegalArgumentException if a file with the same name </throws> + /// <summary> has been added already + /// </summary> + public void AddFile(String file) + { + if (merged) + throw new InvalidOperationException("Can't add extensions after merge has been called"); + + if (file == null) + throw new ArgumentNullException("file"); + + try + { + ids.Add(file); + } + catch (Exception) + { + throw new ArgumentException("File " + file + " already added"); + } + + var entry = new FileEntry {file = file}; + entries.AddLast(entry); + } + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + /// <summary>Merge files with the extensions added up to now. + /// All files with these extensions are combined sequentially into the + /// compound stream. After successful merge, the source files + /// are deleted. + /// </summary> + /// <throws> IllegalStateException if close() had been called before or </throws> + /// <summary> if no file has been added to this object + /// </summary> + public void Dispose() + { + // Extract into protected method if class ever becomes unsealed + + // TODO: Dispose shouldn't throw exceptions! + if (merged) + throw new SystemException("Merge already performed"); + + if ((entries.Count == 0)) + throw new SystemException("No entries to merge have been defined"); + + merged = true; + + // open the compound stream + IndexOutput os = null; + try + { + os = directory.CreateOutput(fileName); + + // Write the number of entries + os.WriteVInt(entries.Count); + + // Write the directory with all offsets at 0. + // Remember the positions of directory entries so that we can + // adjust the offsets later + long totalSize = 0; + foreach (FileEntry fe in entries) + { + fe.directoryOffset = os.FilePointer; + os.WriteLong(0); // for now + os.WriteString(fe.file); + totalSize += directory.FileLength(fe.file); + } + + // Pre-allocate size of file as optimization -- + // this can potentially help IO performance as + // we write the file and also later during + // searching. It also uncovers a disk-full + // situation earlier and hopefully without + // actually filling disk to 100%: + long finalLength = totalSize + os.FilePointer; + os.SetLength(finalLength); + + // Open the files and copy their data into the stream. + // Remember the locations of each file's data section. + var buffer = new byte[16384]; + foreach (FileEntry fe in entries) + { + fe.dataOffset = os.FilePointer; + CopyFile(fe, os, buffer); + } + + // Write the data offsets into the directory of the compound stream + foreach (FileEntry fe in entries) + { + os.Seek(fe.directoryOffset); + os.WriteLong(fe.dataOffset); + } + + System.Diagnostics.Debug.Assert(finalLength == os.Length); + + // Close the output stream. Set the os to null before trying to + // close so that if an exception occurs during the close, the + // finally clause below will not attempt to close the stream + // the second time. + IndexOutput tmp = os; + os = null; + tmp.Close(); + } + finally + { + if (os != null) + try + { + os.Close(); + } + catch (System.IO.IOException) + { + } + } + } + + + /// <summary>Copy the contents of the file with specified extension into the + /// provided output stream. Use the provided buffer for moving data + /// to reduce memory allocation. + /// </summary> + private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) + { + IndexInput isRenamed = null; + try + { + long startPtr = os.FilePointer; + + isRenamed = directory.OpenInput(source.file); + long length = isRenamed.Length(); + long remainder = length; + int chunk = buffer.Length; + + while (remainder > 0) + { + var len = (int) Math.Min(chunk, remainder); + isRenamed.ReadBytes(buffer, 0, len, false); + os.WriteBytes(buffer, len); + remainder -= len; + if (checkAbort != null) + // Roughly every 2 MB we will check if + // it's time to abort + checkAbort.Work(80); + } + + // Verify that remainder is 0 + if (remainder != 0) + throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); + + // Verify that the output length diff is equal to original file + long endPtr = os.FilePointer; + long diff = endPtr - startPtr; + if (diff != length) + throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); + } + finally + { + if (isRenamed != null) + isRenamed.Close(); + } + } + } +}
\ No newline at end of file |