diff options
author | vng <viktor.govako@gmail.com> | 2012-01-03 17:07:55 +0400 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 01:31:16 +0300 |
commit | 1e5a568fcdc10584c226cd8e5cd0f05fc936eba1 (patch) | |
tree | 2676fc3d25c14e94c89bf6806ea878b3fa04dff4 /indexer/string_file.cpp | |
parent | edc3df3fc6c02baf8b318364ffeccf19ca726291 (diff) |
[search] New index generation algorithm.
Diffstat (limited to 'indexer/string_file.cpp')
-rw-r--r-- | indexer/string_file.cpp | 91 |
1 files changed, 71 insertions, 20 deletions
diff --git a/indexer/string_file.cpp b/indexer/string_file.cpp index 38c8efbe13..853f635cf2 100644 --- a/indexer/string_file.cpp +++ b/indexer/string_file.cpp @@ -1,10 +1,13 @@ #include "string_file.hpp" #include "../coding/read_write_utils.hpp" -#include "../coding/reader.hpp" -#include "../coding/writer.hpp" +#include "../coding/file_reader.hpp" +#include "../coding/file_writer.hpp" + +#include "../base/logging.hpp" #include "../std/algorithm.hpp" +#include "../std/bind.hpp" template <class TWriter> @@ -21,11 +24,8 @@ StringsFile::IdT StringsFile::StringT::Write(TWriter & writer) const } template <class TReader> -void StringsFile::StringT::Read(IdT id, TReader & reader) +void StringsFile::StringT::Read(TReader & src) { - ReaderSource<TReader> src(reader); - src.Skip(id); - rw::Read(src, m_name); m_pos = ReadVarUint<uint32_t>(src); m_rank = ReadPrimitiveFromSource<uint8_t>(src); @@ -47,30 +47,81 @@ bool StringsFile::StringT::operator == (StringT const & name) const return (m_name == name.m_name && m_pos == name.m_pos && m_rank == name.m_rank); } +StringsFile::~StringsFile() +{ + m_readers.clear(); + + for (int i = 0; i < m_index; ++i) + FileWriter::DeleteFileX(FormatFilePath(i)); +} + void StringsFile::AddString(StringT const & s) { - ASSERT ( m_writer != 0, () ); - m_ids.push_back(s.Write(*m_writer)); + if (m_strings.size() >= 30000) + Flush(); + + m_strings.push_back(s); } -bool StringsFile::StringCompare::operator() (IdT const & id1, IdT const & id2) const +StringsFile::StringT StringsFile::IteratorT::dereference() const { - StringT str[2]; - str[0].Read(id1, m_file.m_reader); - str[1].Read(id2, m_file.m_reader); - return (str[0] < str[1]); + ASSERT ( !m_file.m_queue.empty(), () ); + return m_file.m_queue.top().m_string; } -void StringsFile::SortStrings() +void StringsFile::IteratorT::increment() { - stable_sort(m_ids.begin(), m_ids.end(), StringCompare(*this)); + ASSERT ( !m_file.m_queue.empty(), () ); + int const index = m_file.m_queue.top().m_index; + + m_file.m_queue.pop(); + + if (!m_file.PushNextValue(index)) + m_end = m_file.m_queue.empty(); } -StringsFile::StringT StringsFile::IteratorT::dereference() const +string StringsFile::FormatFilePath(int i) const +{ + return m_filePath + string(".") + strings::to_string(i); +} + +void StringsFile::Flush() +{ + sort(m_strings.begin(), m_strings.end()); + + FileWriter w(FormatFilePath(m_index++)); + for_each(m_strings.begin(), m_strings.end(), bind(&StringT::Write<FileWriter>, _1, ref(w))); + + m_strings.clear(); +} + +bool StringsFile::PushNextValue(int i) +{ + try + { + StringT s; + s.Read(m_readers[i]); + + m_queue.push(QValue(s, i)); + return true; + } + catch (SourceOutOfBoundsException const &) + { + return false; + } +} + +void StringsFile::EndAdding() +{ + Flush(); +} + +void StringsFile::OpenForRead() { - ASSERT_LESS ( m_index, m_file->m_ids.size(), () ); + for (int i = 0; i < m_index; ++i) + { + m_readers.push_back(ReaderT(new FileReader(FormatFilePath(i), 6, 1))); - StringT s; - s.Read(m_file->m_ids[m_index], m_file->m_reader); - return s; + CHECK ( PushNextValue(i), () ); + } } |